From 2c22db841ae77087905423060ca6e31f80016436 Mon Sep 17 00:00:00 2001 From: "Henrique B.R" Date: Thu, 2 Apr 2020 17:38:47 +0100 Subject: [PATCH 01/21] Added momentum scaling to scalar HMC theories in order to follow UKQCD/CPS conventions --- Grid/qcd/action/scalar/ScalarImpl.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index febb315e..203e1824 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -1,5 +1,13 @@ #pragma once +#define CPS_MD_TIME + +#ifdef CPS_MD_TIME +#define HMC_MOMENTUM_DENOMINATOR (2.0) +#else +#define HMC_MOMENTUM_DENOMINATOR (1.0) +#endif + NAMESPACE_BEGIN(Grid); template @@ -20,13 +28,17 @@ public: typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); + // CPS and UKQCD conventions not yet implemented for U(1) scalars. gaussian(pRNG, P); + P *= scale; } static inline Field projectForce(Field& P){return P;} static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; + std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; } static inline RealD FieldSquareNorm(Field& U) { @@ -66,7 +78,7 @@ public: } static void FreePropagator(const Field &in, Field &out, - const Field &momKernel) + const Field &momKernel) { FFT fft((GridCartesian *)in.Grid()); Field inFT(in.Grid()); @@ -139,14 +151,17 @@ public: static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // Being consistent with CPS and UKQCD conventions #ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); + #else Field Pgaussian(P.Grid()), Pp(P.Grid()); ComplexField p2(P.Grid()); p2 = zero; RealD M = FFT_MASS; - + + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian); FFT theFFT((GridCartesian*)P.Grid()); @@ -156,8 +171,8 @@ public: p2 = sqrt(p2); Pp *= p2; theFFT.FFT_all_dim(P, Pp, FFT::backward); - #endif //USE_FFT_ACCELERATION + P *= scale; } static inline Field projectForce(Field& P) {return P;} @@ -166,7 +181,8 @@ public: { #ifndef USE_FFT_ACCELERATION double t0=usecond(); - U += P*ep; + U += P*ep; + std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; double t1=usecond(); double total_time = (t1-t0)/1e6; std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; From 244c003a1bd5316e80cfdac38959d7ff7528bb0b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Fri, 12 Jun 2020 11:00:25 +0100 Subject: [PATCH 02/21] Updated Baryon code --- Grid/qcd/utils/BaryonUtils.h | 40 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 23267270..3384d273 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -7,6 +7,7 @@ Copyright (C) 2019 Author: Felix Erben + Author: Raoul Hodgson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -58,7 +59,7 @@ public: const Gamma GammaA_right, const Gamma GammaB_right, const int parity, - const int * wick_contractions, + const bool * wick_contractions, robj &result); public: static void ContractBaryons(const PropagatorField &q1_left, @@ -68,8 +69,7 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr); template @@ -80,9 +80,9 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const int wick_contraction, const int parity, + const int nt, robj &result); private: template @@ -173,7 +173,7 @@ void BaryonUtils::baryon_site(const mobj &D1, const Gamma GammaA_right, const Gamma GammaB_right, const int parity, - const int * wick_contraction, + const bool * wick_contraction, robj &result) { @@ -279,8 +279,7 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr) { @@ -288,7 +287,6 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; @@ -298,10 +296,6 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, GridBase *grid = q1_left.Grid(); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - auto vbaryon_corr= baryon_corr.View(); auto v1 = q1_left.View(); auto v2 = q2_left.View(); @@ -311,10 +305,10 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real)); for (int ie=0; ie < 6 ; ie++){ if(ie==0 or ie==3){ - bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contraction[ie]; + bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie]; } else{ - bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contraction[ie]; + bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie]; } } Real t=0.; @@ -325,7 +319,7 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, auto D2 = v2[ss]; auto D3 = v3[ss]; vobj result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites @@ -343,16 +337,15 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const int wick_contraction, const int parity, + const int nt, robj &result) { assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; @@ -360,12 +353,13 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - int wick_contraction[6]; + bool wick_contractions[6]; for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; + wick_contractions[ie] = (ie == wick_contraction); - result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + for (int t=0; t Date: Fri, 12 Jun 2020 11:35:52 +0100 Subject: [PATCH 03/21] Added Baryon3pt code --- Grid/qcd/utils/BaryonUtils.h | 408 +++++++++++++++++++++++++++++++++++ 1 file changed, 408 insertions(+) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 23267270..a6f8b78d 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -84,6 +84,55 @@ public: const char * quarks_right, const int parity, robj &result); + private: + template + static void Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + public: + template + static void Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr); private: template static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop, @@ -371,6 +420,365 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, /*********************************************************************** * End of Baryon 2pt-function code. * * * + * The following code is for baryonGamma3pt function * + **********************************************************************/ + +/* Dq1_ti is a quark line from t_i to t_J + * Dq2_spec is a quark line from t_i to t_f + * Dq3_spec is a quark line from t_i to t_f + * Dq4_tf is a quark line from t_f to t_J */ +template +template +void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; + auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); + auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); + auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); + auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(0,0)(0,0); + auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(0,0)(0,0); + auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); + auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(0,0)(0,0); + auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); + auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; + auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; + auto Gf_D1 = GammaBf * Dq1_spec; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(0,0)(0,0); + auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); + auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(0,0)(0,0); + auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); + auto ee_Dq1_spec_ag_ac = ee* Dq1_spec ()(0,0)(0,0); + auto ee_Gf_D1_ag_bc = ee*Gf_D1 ()(0,0)(0,0); + auto ee_Dq1_spec_gg_cc = ee*Dq1_spec ()(0,0)(0,0); + auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); + auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; + auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; + auto Gf_D1 = GammaBf * Dq1_spec; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Complex ee; + + auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); + auto Gf_adjD4_g_D3_ab_bb = Gf_adjD4_g_D3 ()(0,0)(0,0); + auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); + auto adjD4_g_D3_ab_ab = adjD4_g_D3 ()(0,0)(0,0); + auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(0,0)(0,0); + auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(0,0)(0,0); + auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(0,0)(0,0); + auto adjD4_g_D3_gb_cb = adjD4_g_D3 ()(0,0)(0,0); + auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr) +{ + GridBase *grid = q_tf.Grid(); + + auto vcorr= stn_corr.View(); + auto vq_ti = q_ti.View(); + auto vq_tf = q_tf.View(); + + if (group ==1) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 2) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 3) { + thread_for(ss,grid->oSites(),{ + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + + vcorr[ss] += result; + });//end loop over lattice sites + } + +} + + +/*********************************************************************** + * End of BaryonGamma3pt-function code. * + * * * The following code is for Sigma -> N rare hypeon decays * **********************************************************************/ From 0d2f913a1a802c09d80d2fad28a360821b78dfe3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 20 Jun 2020 09:37:31 -0400 Subject: [PATCH 04/21] String.h for linux --- Grid/threads/Accelerator.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 1cb6d637..5bf17072 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -28,6 +28,8 @@ Author: paboyle /* END LEGAL */ #pragma once +#include + #ifdef HAVE_MALLOC_MALLOC_H #include #endif From 6c5fa8dcd875bb9fb5e50b25adb8534711bdc4da Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 20 Jun 2020 14:34:29 -0400 Subject: [PATCH 05/21] Aligned allocate on CPU put through this interface --- Grid/threads/Accelerator.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 5bf17072..74a3ea22 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -336,12 +336,11 @@ inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemc ////////////////////////////////////////////// // CPU Target - No accelerator just thread instead ////////////////////////////////////////////// +#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned #if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) ) #undef GRID_SIMT -#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned - #define accelerator #define accelerator_inline strong_inline #define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); @@ -367,6 +366,14 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr);}; #endif // CPU target +#ifdef HAVE_MM_MALLOC_H +inline void *acceleratorAllocCpu(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);}; +inline void acceleratorFreeCpu (void *ptr){_mm_free(ptr);}; +#else +inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);}; +inline void acceleratorFreeCpu (void *ptr){free(ptr);}; +#endif + /////////////////////////////////////////////////// // Synchronise across local threads for divergence resynch /////////////////////////////////////////////////// From c48da35921035c45dee83d1cdb251a834b199831 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 22 Jun 2020 20:21:53 -0400 Subject: [PATCH 06/21] Memory Vector UVM and Lattice alignedAllocator separate --- ...GeneralisedConjugateResidualNonHermitian.h | 241 ++++++++++++++++++ Grid/allocator/AlignedAllocator.h | 62 ++++- Grid/allocator/MemoryManager.cc | 65 ++++- Grid/allocator/MemoryManager.h | 12 +- 4 files changed, 350 insertions(+), 30 deletions(-) create mode 100644 Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h diff --git a/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h new file mode 100644 index 00000000..22b7725e --- /dev/null +++ b/Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h @@ -0,0 +1,241 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_PREC_GCR_NON_HERM_H +#define GRID_PREC_GCR_NON_HERM_H + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +//VPGCR Abe and Zhang, 2005. +//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING +//Computing and Information Volume 2, Number 2, Pages 147-161 +//NB. Likely not original reference since they are focussing on a preconditioner variant. +// but VPGCR was nicely written up in their paper +/////////////////////////////////////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); + +#define GCRLogLevel std::cout << GridLogMessage < +class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction { +public: + + RealD Tolerance; + Integer MaxIterations; + int verbose; + int mmax; + int nstep; + int steps; + int level; + GridStopWatch PrecTimer; + GridStopWatch MatTimer; + GridStopWatch LinalgTimer; + + LinearFunction &Preconditioner; + LinearOperatorBase &Linop; + + void Level(int lv) { level=lv; }; + + PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase &_Linop,LinearFunction &Prec,int _mmax,int _nstep) : + Tolerance(tol), + MaxIterations(maxit), + Linop(_Linop), + Preconditioner(Prec), + mmax(_mmax), + nstep(_nstep) + { + level=1; + verbose=1; + }; + + void operator() (const Field &src, Field &psi){ + + psi=Zero(); + RealD cp, ssq,rsq; + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; + + Field r(src.Grid()); + + PrecTimer.Reset(); + MatTimer.Reset(); + LinalgTimer.Reset(); + + GridStopWatch SolverTimer; + SolverTimer.Start(); + + steps=0; + for(int k=0;k q(mmax,grid); + std::vector p(mmax,grid); + std::vector qq(mmax); + + GCRLogLevel<< "PGCR nStep("<(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. + for(int back=0;back=0); + + b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; + p[peri_kp]=p[peri_kp]+b*p[peri_back]; + q[peri_kp]=q[peri_kp]+b*q[peri_back]; + + } + qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm + LinalgTimer.Stop(); + } + assert(0); // never reached + return cp; + } +}; +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index 6c6dd7d8..ebb3162b 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -52,41 +52,79 @@ public: pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); - profilerAllocate(bytes); - _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); - assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); - return ptr; } void deallocate(pointer __p, size_type __n) { size_type bytes = __n * sizeof(_Tp); - profilerFree(bytes); - MemoryManager::CpuFree((void *)__p,bytes); } + // FIXME: hack for the copy constructor, eventually it must be avoided + //void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; + void construct(pointer __p, const _Tp& __val) { assert(0);}; + void construct(pointer __p) { }; + void destroy(pointer __p) { }; +}; +template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } + +template +class uvmAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef uvmAllocator<_Tp1> other; }; + uvmAllocator() throw() { } + uvmAllocator(const uvmAllocator&) throw() { } + template uvmAllocator(const uvmAllocator<_Tp1>&) throw() { } + ~uvmAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + + pointer allocate(size_type __n, const void* _p= 0) + { + size_type bytes = __n*sizeof(_Tp); + profilerAllocate(bytes); + _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); + return ptr; + } + + void deallocate(pointer __p, size_type __n) + { + size_type bytes = __n * sizeof(_Tp); + profilerFree(bytes); + MemoryManager::SharedFree((void *)__p,bytes); + } + // FIXME: hack for the copy constructor, eventually it must be avoided void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; //void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; -template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } -template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +template inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; } +template inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; } //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// -template using commAllocator = alignedAllocator; -template using Vector = std::vector >; -template using commVector = std::vector >; -template using Matrix = std::vector > >; +template using commAllocator = uvmAllocator; +template using Vector = std::vector >; +template using commVector = std::vector >; +//template using Matrix = std::vector > >; NAMESPACE_END(Grid); diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index 6d638b60..fa60e820 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -7,6 +7,17 @@ NAMESPACE_BEGIN(Grid); #define CpuSmall (1) #define Acc (2) #define AccSmall (3) +#define Shared (4) +#define SharedSmall (5) +uint64_t total_shared; +uint64_t total_device; +uint64_t total_host;; +void MemoryManager::PrintBytes(void) +{ + std::cout << " MemoryManager : "<=0) && (Nc < NallocCacheMax)) { Ncache[Cpu]=Nc; Ncache[Acc]=Nc; + Ncache[Shared]=Nc; } } @@ -84,6 +122,7 @@ void MemoryManager::Init(void) if ( (Nc>=0) && (Nc < NallocCacheMax)) { Ncache[CpuSmall]=Nc; Ncache[AccSmall]=Nc; + Ncache[SharedSmall]=Nc; } } std::cout << GridLogMessage<< "MemoryManager::Init() setting up"< Date: Tue, 23 Jun 2020 11:10:26 +0100 Subject: [PATCH 07/21] Baryon bug fixes --- Grid/qcd/utils/BaryonUtils.h | 168 +++++++++++++++++------------------ 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 3384d273..297f6a2e 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -168,107 +168,107 @@ template void BaryonUtils::baryon_site(const mobj &D1, const mobj &D2, const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, + const Gamma GammaA_i, + const Gamma GammaB_i, + const Gamma GammaA_f, + const Gamma GammaB_f, const int parity, const bool * wick_contraction, robj &result) { Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) - auto gD1a = GammaA_left * GammaA_right * D1; - auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (Real)parity * gD1b); - auto gD3 = GammaB_right * D3; - auto D2g = D2 * GammaB_left; - auto pD1g = pD1 * GammaB_left; - auto gD3g = gD3 * GammaB_left; + + auto D1_GAi = D1 * GammaA_i; + auto D1_GAi_g4 = D1_GAi * g4; + auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4); + auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; + auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; - for (int ie_left=0; ie_left < 6 ; ie_left++){ - int a_left = epsilon[ie_left][0]; //a - int b_left = epsilon[ie_left][1]; //b - int c_left = epsilon[ie_left][2]; //c - for (int ie_right=0; ie_right < 6 ; ie_right++){ - int a_right = epsilon[ie_right][0]; //a' - int b_right = epsilon[ie_right][1]; //b' - int c_right = epsilon[ie_right][2]; //c' - Real ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; + auto D2_GBi = D2 * GammaB_i; + auto GBf_D2_GBi = GammaB_f * D2_GBi; + auto GAf_D2_GBi = GammaA_f * D2_GBi; + + auto GBf_D3 = GammaB_f * D3; + auto GAf_D3 = GammaA_f * D3; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + int a_f = epsilon[ie_f][0]; //a + int b_f = epsilon[ie_f][1]; //b + int c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + int a_i = epsilon[ie_i][0]; //a' + int b_i = epsilon[ie_i][1]; //b' + int c_i = epsilon[ie_i][2]; //c' + + Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - for (int gamma_left=0; gamma_left From 3e97a26f90b66876fa44fd72fd444efb8ea61cb9 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 23 Jun 2020 11:35:32 +0100 Subject: [PATCH 08/21] BaryonGamm3pt threads -> accelerator --- Grid/qcd/utils/BaryonUtils.h | 96 +++++++++++++----------------------- 1 file changed, 33 insertions(+), 63 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index a6f8b78d..d36c3cb8 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -451,17 +451,7 @@ void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); - auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); - auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); - auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(0,0)(0,0); - auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(0,0)(0,0); - auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); - auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(0,0)(0,0); - auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); - auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -476,18 +466,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group2_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(0,0)(0,0); - auto Gf_D3_ab_bb = Gf_D3 ()(0,0)(0,0); - auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(0,0)(0,0); - auto Dq3_spec_ab_ab = Dq3_spec ()(0,0)(0,0); - auto ee_Dq1_spec_ag_ac = ee* Dq1_spec ()(0,0)(0,0); - auto ee_Gf_D1_ag_bc = ee*Gf_D1 ()(0,0)(0,0); - auto ee_Dq1_spec_gg_cc = ee*Dq1_spec ()(0,0)(0,0); - auto Dq3_spec_gb_cb = Dq3_spec ()(0,0)(0,0); - auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -577,18 +557,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group3_Site( int a_f, b_f, c_f; int a_i, b_i, c_i; - Complex ee; - - auto D2_Gi_ab_aa = D2_Gi ()(0,0)(0,0); - auto Gf_adjD4_g_D3_ab_bb = Gf_adjD4_g_D3 ()(0,0)(0,0); - auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(0,0)(0,0); - auto adjD4_g_D3_ab_ab = adjD4_g_D3 ()(0,0)(0,0); - auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(0,0)(0,0); - auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(0,0)(0,0); - auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(0,0)(0,0); - auto adjD4_g_D3_gb_cb = adjD4_g_D3 ()(0,0)(0,0); - auto D2_Gi_gb_ca = D2_Gi ()(0,0)(0,0); + Real ee; for (int ie_f=0; ie_f < 6 ; ie_f++){ a_f = epsilon[ie_f][0]; //a @@ -678,18 +648,18 @@ void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt( auto vq_tf = q_tf.View(); if (group ==1) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); @@ -755,7 +725,7 @@ void BaryonUtils::Baryon_Gamma_3pt( vcorr[ss] += result; });//end loop over lattice sites } else if (group == 2) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); @@ -763,7 +733,7 @@ void BaryonUtils::Baryon_Gamma_3pt( vcorr[ss] += result; });//end loop over lattice sites } else if (group == 3) { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_ti = vq_ti[ss]; auto Dq_tf = vq_tf[ss]; sobj result=Zero(); From 4ef50ba31f2b39f39492c35b57ee9e4c10457f3b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 23 Jun 2020 11:44:20 +0100 Subject: [PATCH 09/21] Baryon speedup --- Grid/qcd/utils/BaryonUtils.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 297f6a2e..9e9a6957 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -205,11 +205,12 @@ void BaryonUtils::baryon_site(const mobj &D1, //This is the \delta_{456}^{123} part if (wick_contraction[0]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[1]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[2]){ for (int rho=0; rho::baryon_site(const mobj &D1, //This is the \delta_{456}^{132} part if (wick_contraction[3]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[4]){ for (int rho=0; rho::baryon_site(const mobj &D1, if (wick_contraction[5]){ for (int rho=0; rho Date: Tue, 23 Jun 2020 10:24:21 -0400 Subject: [PATCH 10/21] Adding code under development --- tests/solver/Test_dwf_multigrid.cc | 594 +++++++++++++++++++++++++++++ tests/solver/Test_hw_multigrid.cc | 356 +++++++++++++++++ 2 files changed, 950 insertions(+) create mode 100644 tests/solver/Test_dwf_multigrid.cc create mode 100644 tests/solver/Test_hw_multigrid.cc diff --git a/tests/solver/Test_dwf_multigrid.cc b/tests/solver/Test_dwf_multigrid.cc new file mode 100644 index 00000000..9e11c160 --- /dev/null +++ b/tests/solver/Test_dwf_multigrid.cc @@ -0,0 +1,594 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_hdcr.cc + + Copyright (C) 2015 + +Author: Antonin Portelli +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < +class MultiGridPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + CoarseOperator & _CoarseOperator; + FineOperator & _FineOperator; + Guesser & _Guess; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, + FineOperator &Fine, + FineSmoother &Smoother, + Guesser &Guess_, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _CoarseOperator(Coarse), + _FineOperator(Fine), + _Smoother(Smoother), + _Guess(Guess_), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + CoarseVector Csrc(_CoarseOperator.Grid()); + CoarseVector Csol(_CoarseOperator.Grid()); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + std::vector blockc ({2,2,2,2}); + const int nbasis= 32; + const int nbasisc= 32; + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); + LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src; + LatticeFermion result(FGrid); + LatticeGaugeField Umu(UGrid); + + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + std::cout< HermDefOp(Ddwf); + + Subspace Aggregates(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + { + int nb=nbasis/2; + Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0); + for(int n=0;n Level1Op; + typedef CoarsenedMatrix,nbasisc> Level2Op; + + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOpPV(Dpv); + + std::cout< CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + // GeneralisedMinimalResidual CoarseGMRES(tol,MaxIt,20); + + BiCGSTAB FineBiCGSTAB(tol,MaxIt); + ConjugateGradient FineCG(tol,MaxIt); + // GeneralisedMinimalResidual FineGMRES(tol,MaxIt,20); + + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + PVdagMLinearOperator FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M + SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe + SchurDiagOneOperator FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e + + MdagMLinearOperator CoarseMdagM(LDOp); + PVdagMLinearOperator CoarsePVdagM(LDOp,LDOpPV); + + std::cout< IRLCheby(0.03,12.0,71); // 1 iter + FunctionHermOp IRLOpCheby(IRLCheby,CoarseMdagM); + PlainHermOp IRLOp (CoarseMdagM); + int Nk=64; + int Nm=128; + int Nstop=Nk; + ImplicitlyRestartedLanczos IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20); + + int Nconv; + std::vector eval(Nm); + std::vector evec(Nm,Coarse5d); + IRL.calc(eval,evec,c_src,Nconv); + + std::cout< DeflCoarseGuesser(evec,eval); + NormalEquations DeflCoarseCGNE (LDOp,CoarseCG,DeflCoarseGuesser); + c_res=Zero(); + DeflCoarseCGNE(c_src,c_res); + + + std::cout< CoarseMgridCG(0.001,1000); + ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); + + typedef HDCRPreconditioner > TwoLevelHDCR; + TwoLevelHDCR TwoLevelPrecon(Aggregates, + HermIndefOp, + FineSmoother, + DeflCoarseCGNE); + TwoLevelPrecon.Level(1); + // PrecGeneralisedConjugateResidual l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + PrecGeneralisedConjugateResidualNonHermitian l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + l1PGCR.Level(1); + + f_res=Zero(); + + CoarseCG.Tolerance=0.02; + l1PGCR(f_src,f_res); + + std::cout< CoarseMgridBiCGSTAB(0.01,1000); + BiCGSTAB FineMgridBiCGSTAB(0.0,24); + ZeroGuesser CoarseZeroGuesser; + ZeroGuesser FineZeroGuesser; + + SolverWrapper FineBiCGSmoother( FinePVdagM, FineMgridBiCGSTAB, FineZeroGuesser); + SolverWrapper CoarsePVdagMSolver(CoarsePVdagM,CoarseMgridBiCGSTAB,CoarseZeroGuesser); + typedef HDCRPreconditioner > TwoLevelMG; + + TwoLevelMG _TwoLevelMG(Aggregates, + FinePVdagM, + FineBiCGSmoother, + CoarsePVdagMSolver); + _TwoLevelMG.Level(1); + + PrecGeneralisedConjugateResidualNonHermitian pvPGCR(1.0e-8,100,FinePVdagM,_TwoLevelMG,16,16); + pvPGCR.Level(1); + + f_res=Zero(); + pvPGCR(f_src,f_res); + + std::cout< +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +//#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + const int nbasis= 32; + + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds({1,2,3,4}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField Umu(UGrid); + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + + std::cout< SubspaceOp(Dw); + + Subspace Aggregates4D(Coarse4d,UGrid,0); + Subspace Aggregates5D(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + std::cout< Level1Op; + + + NonHermitianLinearOperator LinOpDwf(Ddwf); + + Level1Op LDOp (*Coarse5d,1); + LDOp.CoarsenOperator(FGrid,LinOpDwf,Aggregates5D); + + std::cout< Date: Tue, 23 Jun 2020 22:14:56 -0400 Subject: [PATCH 11/21] UVM used shared for CPU alloccations andd ddont migrate --- Grid/allocator/MemoryManager.cc | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index fa60e820..c92cd18a 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -35,8 +35,6 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes) if ( ptr == (void *) NULL ) { ptr = (void *) acceleratorAllocDevice(bytes); total_device+=bytes; - // std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "< Date: Wed, 24 Jun 2020 08:24:38 -0400 Subject: [PATCH 12/21] Memory manager initialise earlier --- Grid/util/Init.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index e93f3046..656e29a9 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -318,6 +318,11 @@ void Grid_init(int *argc,char ***argv) Grid_debug_handler_init(); } + ////////////////////////////////////////////////////////// + // Memory manager + ////////////////////////////////////////////////////////// + MemoryManager::Init(); + ////////////////////////////////////////////////////////// // MPI initialisation ////////////////////////////////////////////////////////// @@ -357,11 +362,6 @@ void Grid_init(int *argc,char ***argv) std::cout << GridLogMessage << "================================================ "< Date: Wed, 24 Jun 2020 08:54:49 -0400 Subject: [PATCH 13/21] Force initial values --- Grid/allocator/MemoryManager.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc index c92cd18a..e11ce948 100644 --- a/Grid/allocator/MemoryManager.cc +++ b/Grid/allocator/MemoryManager.cc @@ -24,7 +24,7 @@ void MemoryManager::PrintBytes(void) ////////////////////////////////////////////////////////////////////// MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; int MemoryManager::Victim[MemoryManager::NallocType]; -int MemoryManager::Ncache[MemoryManager::NallocType]; +int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 }; ////////////////////////////////////////////////////////////////////// // Actual allocation and deallocation utils @@ -112,12 +112,6 @@ void MemoryManager::CpuFree (void *_ptr,size_t bytes) ////////////////////////////////////////// void MemoryManager::Init(void) { - Ncache[Cpu] = 8; - Ncache[Acc] = 8; - Ncache[Shared] = 8; - Ncache[CpuSmall] = 32; - Ncache[AccSmall] = 32; - Ncache[SharedSmall] = 32; char * str; int Nc; From 22cfbdbbb386e4787290522e1a24010fd964e821 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 24 Jun 2020 12:52:31 -0400 Subject: [PATCH 14/21] Boost precision in inner products in single --- Grid/lattice/Lattice_reduction.h | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 67709c94..07968024 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -93,7 +93,9 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites) ssum = ssum+sumarray[i]; } - return ssum; + typedef typename vobj::scalar_object ssobj; + ssobj ret = ssum; + return ret; } @@ -154,7 +156,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - typedef decltype(innerProduct(vobj(),vobj())) inner_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -163,16 +165,15 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v(ss); - auto y_l = right_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); - }) + accelerator_for( ss, sites, 1,{ + auto x_l = left_v[ss]; + auto y_l = right_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l)); + }); } // This is in single precision and fails some tests - // Need a sumD that sums in double - nrm = TensorRemove(sumD(inner_tmp_v,sites)); + nrm = TensorRemove(sum(inner_tmp_v,sites)); return nrm; } @@ -218,16 +219,16 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, nsimd,{ - auto tmp = a*x_v(ss)+b*y_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); - coalescedWrite(z_v[ss],tmp); + accelerator_for( ss, sites, 1,{ + auto tmp = a*x_v[ss]+b*y_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp)); + z_v[ss]=tmp; }); - nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); + nrm = real(TensorRemove(sum(inner_tmp_v,sites))); grid->GlobalSum(nrm); return nrm; } @@ -243,29 +244,28 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti GridBase *grid = left.Grid(); - const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); // GPU - typedef decltype(innerProduct(vobj(),vobj())) inner_t; - typedef decltype(innerProduct(vobj(),vobj())) norm_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; + typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t; Vector inner_tmp(sites); - Vector norm_tmp(sites); + Vector norm_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; auto norm_tmp_v = &norm_tmp[0]; { autoView(left_v,left, AcceleratorRead); autoView(right_v,right,AcceleratorRead); - accelerator_for( ss, sites, nsimd,{ - auto left_tmp = left_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss))); - coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp)); + accelerator_for( ss, sites, 1,{ + auto left_tmp = left_v[ss]; + inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss])); + norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp)); }); } - tmp[0] = TensorRemove(sumD(inner_tmp_v,sites)); - tmp[1] = TensorRemove(sumD(norm_tmp_v,sites)); + tmp[0] = TensorRemove(sum(inner_tmp_v,sites)); + tmp[1] = TensorRemove(sum(norm_tmp_v,sites)); grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector ip = tmp[0]; From 936c5ecf69d240c783ce9fee8eb66e510eba71b7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 24 Jun 2020 17:28:31 -0400 Subject: [PATCH 15/21] Reduction GPU no compile fix --- Grid/lattice/Lattice_reduction.h | 20 ++++++++++---------- Grid/tensors/Tensor_class.h | 31 ++++++++++++++++--------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 07968024..c2955485 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -62,7 +62,6 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) for(int i=0;i @@ -156,7 +155,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; + typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -168,12 +167,13 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & accelerator_for( ss, sites, 1,{ auto x_l = left_v[ss]; auto y_l = right_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(x_l,y_l)); + inner_tmp_v[ss]=innerProductD(x_l,y_l); }); } // This is in single precision and fails some tests - nrm = TensorRemove(sum(inner_tmp_v,sites)); + auto anrm = sum(inner_tmp_v,sites); + nrm = anrm; return nrm; } @@ -219,13 +219,13 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - typedef decltype(Reduce(innerProductD(x_v[0],y_v[0]))) inner_t; + typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; accelerator_for( ss, sites, 1,{ auto tmp = a*x_v[ss]+b*y_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(tmp,tmp)); + inner_tmp_v[ss]=innerProductD(tmp,tmp); z_v[ss]=tmp; }); nrm = real(TensorRemove(sum(inner_tmp_v,sites))); @@ -248,8 +248,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti const uint64_t sites = grid->oSites(); // GPU - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) inner_t; - typedef decltype(Reduce(innerProductD(vobj(),vobj()))) norm_t; + typedef decltype(innerProductD(vobj(),vobj())) inner_t; + typedef decltype(innerProductD(vobj(),vobj())) norm_t; Vector inner_tmp(sites); Vector norm_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -259,8 +259,8 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti autoView(right_v,right,AcceleratorRead); accelerator_for( ss, sites, 1,{ auto left_tmp = left_v[ss]; - inner_tmp_v[ss]=Reduce(innerProductD(left_tmp,right_v[ss])); - norm_tmp_v [ss]=Reduce(innerProductD(left_tmp,left_tmp)); + inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]); + norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp); }); } diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index dbcbae8d..36becc49 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -59,6 +59,20 @@ class GridTensorBase {}; using DoublePrecision2= typename Traits::DoublePrecision2; \ static constexpr int TensorLevel = Traits::TensorLevel +/////////////////////////////////////////////////////////// +// Allows to turn scalar>>> back to double. +/////////////////////////////////////////////////////////// +template +accelerator_inline typename std::enable_if::value, T>::type +TensorRemove(T arg) { + return arg; +} +template +accelerator_inline auto TensorRemove(iScalar arg) + -> decltype(TensorRemove(arg._internal)) { + return TensorRemove(arg._internal); +} + template class iScalar { public: @@ -135,9 +149,10 @@ public: operator ComplexD() const { return (TensorRemove(_internal)); } + // instantiation of "Grid::iScalar::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, =0, =0U]" template = 0,IfNotSimd = 0> accelerator_inline operator RealD() const { - return TensorRemove(_internal); + return (RealD) TensorRemove(_internal); } template = 0, IfNotSimd = 0> accelerator_inline operator Integer() const { @@ -169,20 +184,6 @@ public: strong_inline scalar_type * end() { return begin() + Traits::count; } }; -/////////////////////////////////////////////////////////// -// Allows to turn scalar>>> back to double. -/////////////////////////////////////////////////////////// -template -accelerator_inline typename std::enable_if::value, T>::type -TensorRemove(T arg) { - return arg; -} -template -accelerator_inline auto TensorRemove(iScalar arg) - -> decltype(TensorRemove(arg._internal)) { - return TensorRemove(arg._internal); -} - template class iVector { public: From 102089798c4e048b3b47bf22e224720bf23d0278 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Thu, 25 Jun 2020 16:41:58 +0100 Subject: [PATCH 16/21] BaryonUtils: update to autoView --- Grid/qcd/utils/BaryonUtils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index c7a72812..11744d16 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -712,9 +712,9 @@ void BaryonUtils::Baryon_Gamma_3pt( { GridBase *grid = q_tf.Grid(); - auto vcorr= stn_corr.View(); - auto vq_ti = q_ti.View(); - auto vq_tf = q_tf.View(); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_ti , q_ti, CpuRead); + autoView( vq_tf , q_tf, CpuRead); if (group ==1) { accelerator_for(ss, grid->oSites(), grid->Nsimd(), { From 77af9a3ddca85a34097a5282c290601b95c1a345 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Fri, 26 Jun 2020 10:08:42 +0100 Subject: [PATCH 17/21] Baryon revert sign --- Grid/qcd/utils/BaryonUtils.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 11744d16..df16db45 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -257,7 +257,7 @@ void BaryonUtils::baryon_site(const mobj &D1, auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i); for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i); for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f::baryon_site(const mobj &D1, for (int alpha_f=0; alpha_f Date: Mon, 29 Jun 2020 09:43:01 +0100 Subject: [PATCH 18/21] Update to baryon and added comments/fix whitespace --- Grid/qcd/utils/BaryonUtils.h | 827 ++++++++++++++++++----------------- 1 file changed, 428 insertions(+), 399 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index df16db45..b268b684 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -62,6 +62,9 @@ public: const bool * wick_contractions, robj &result); public: + static void Wick_Contractions(std::string qi, + std::string qf, + bool* wick_contractions); static void ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, const PropagatorField &q3_left, @@ -80,59 +83,59 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const int wick_contraction, + const bool* wick_contractions, const int parity, const int nt, robj &result); - private: - template - static void Baryon_Gamma_3pt_Group1_Site( - const mobj &Dq1_ti, - const mobj2 &Dq2_spec, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); + private: + template + static void Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); - template - static void Baryon_Gamma_3pt_Group2_Site( - const mobj2 &Dq1_spec, - const mobj &Dq2_ti, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); + template + static void Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); - template - static void Baryon_Gamma_3pt_Group3_Site( - const mobj2 &Dq1_spec, - const mobj2 &Dq2_spec, - const mobj &Dq3_ti, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result); - public: - template - static void Baryon_Gamma_3pt( - const PropagatorField &q_ti, - const mobj &Dq_spec1, - const mobj &Dq_spec2, - const PropagatorField &q_tf, - int group, - int wick_contraction, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - SpinMatrixField &stn_corr); + template + static void Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + public: + template + static void Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr); private: template static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop, @@ -215,15 +218,15 @@ const Real BaryonUtils::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.}; template template void BaryonUtils::baryon_site(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_i, - const Gamma GammaB_i, - const Gamma GammaA_f, - const Gamma GammaB_f, - const int parity, - const bool * wick_contraction, - robj &result) + const mobj &D2, + const mobj &D3, + const Gamma GammaA_i, + const Gamma GammaB_i, + const Gamma GammaA_f, + const Gamma GammaB_f, + const int parity, + const bool * wick_contraction, + robj &result) { Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) @@ -231,101 +234,121 @@ void BaryonUtils::baryon_site(const mobj &D1, auto D1_GAi = D1 * GammaA_i; auto D1_GAi_g4 = D1_GAi * g4; auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4); - auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; - auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; + auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; + auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; - auto D2_GBi = D2 * GammaB_i; - auto GBf_D2_GBi = GammaB_f * D2_GBi; - auto GAf_D2_GBi = GammaA_f * D2_GBi; + auto D2_GBi = D2 * GammaB_i; + auto GBf_D2_GBi = GammaB_f * D2_GBi; + auto GAf_D2_GBi = GammaA_f * D2_GBi; - auto GBf_D3 = GammaB_f * D3; - auto GAf_D3 = GammaA_f * D3; + auto GBf_D3 = GammaB_f * D3; + auto GAf_D3 = GammaA_f * D3; for (int ie_f=0; ie_f < 6 ; ie_f++){ - int a_f = epsilon[ie_f][0]; //a - int b_f = epsilon[ie_f][1]; //b - int c_f = epsilon[ie_f][2]; //c + int a_f = epsilon[ie_f][0]; //a + int b_f = epsilon[ie_f][1]; //b + int c_f = epsilon[ie_f][2]; //c for (int ie_i=0; ie_i < 6 ; ie_i++){ - int a_i = epsilon[ie_i][0]; //a' - int b_i = epsilon[ie_i][1]; //b' - int c_i = epsilon[ie_i][2]; //c' + int a_i = epsilon[ie_i][0]; //a' + int b_i = epsilon[ie_i][1]; //b' + int c_i = epsilon[ie_i][2]; //c' - Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - for (int rho=0; rho +void BaryonUtils::Wick_Contractions(std::string qi, std::string qf, bool* wick_contractions) { + const int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; + for (int ie=0; ie < 6 ; ie++) { + wick_contractions[ie] = (qi.size() == 3 && qf.size() == 3 + && qi[0] == qf[epsilon[ie][0]] + && qi[1] == qf[epsilon[ie][1]] + && qi[2] == qf[epsilon[ie][2]]); + } +} + +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can be computed from the quark flavours using the * + * Wick_Contractions function above */ template void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -383,6 +406,12 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl; } + +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can also be computed from the quark flavours using the * + * Wick_Contractions function above */ template template void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, @@ -392,7 +421,7 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const int wick_contraction, + const bool* wick_contractions, const int parity, const int nt, robj &result) @@ -408,10 +437,6 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - bool wick_contractions[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contractions[ie] = (ie == wick_contraction); - for (int t=0; t::ContractBaryons_Sliced(const mobj &D1, template template void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( - const mobj &Dq1_ti, - const mobj2 &Dq2_spec, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; - auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; - auto D2_Gi = Dq2_spec * GammaBi; - auto Gf_D2_Gi = GammaBf * D2_Gi; - auto Gf_D3 = GammaBf * Dq3_spec; + auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; + auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + auto Gf_D3 = GammaBf * Dq3_spec; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group1_Site( template template void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( - const mobj2 &Dq1_spec, - const mobj &Dq2_ti, - const mobj2 &Dq3_spec, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; - auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; - auto Gf_D1 = GammaBf * Dq1_spec; - auto Gf_D3 = GammaBf * Dq3_spec; + auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; + auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; + auto Gf_D1 = GammaBf * Dq1_spec; + auto Gf_D3 = GammaBf * Dq3_spec; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f::Baryon_Gamma_3pt_Group2_Site( template template void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( - const mobj2 &Dq1_spec, - const mobj2 &Dq2_spec, - const mobj &Dq3_ti, - const mobj &Dq4_tf, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - int wick_contraction, - robj &result) + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) { - Gamma g5(Gamma::Algebra::Gamma5); + Gamma g5(Gamma::Algebra::Gamma5); - auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; - auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; - auto Gf_D1 = GammaBf * Dq1_spec; - auto D2_Gi = Dq2_spec * GammaBi; - auto Gf_D2_Gi = GammaBf * D2_Gi; + auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; + auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; + auto Gf_D1 = GammaBf * Dq1_spec; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; - int a_f, b_f, c_f; - int a_i, b_i, c_i; + int a_f, b_f, c_f; + int a_i, b_i, c_i; - Real ee; + Real ee; - for (int ie_f=0; ie_f < 6 ; ie_f++){ - a_f = epsilon[ie_f][0]; //a - b_f = epsilon[ie_f][1]; //b - c_f = epsilon[ie_f][2]; //c - for (int ie_i=0; ie_i < 6 ; ie_i++){ - a_i = epsilon[ie_i][0]; //a' - b_i = epsilon[ie_i][1]; //b' - c_i = epsilon[ie_i][2]; //c' + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' - ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; - for (int alpha_f=0; alpha_f template void BaryonUtils::Baryon_Gamma_3pt( - const PropagatorField &q_ti, - const mobj &Dq_spec1, - const mobj &Dq_spec2, - const PropagatorField &q_tf, - int group, - int wick_contraction, - const Gamma GammaJ, - const Gamma GammaBi, - const Gamma GammaBf, - SpinMatrixField &stn_corr) + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr) { - GridBase *grid = q_tf.Grid(); + GridBase *grid = q_tf.Grid(); - autoView( vcorr, stn_corr, CpuWrite); - autoView( vq_ti , q_ti, CpuRead); - autoView( vq_tf , q_tf, CpuRead); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_ti , q_ti, CpuRead); + autoView( vq_tf , q_tf, CpuRead); - if (group ==1) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - vcorr[ss] += result; - });//end loop over lattice sites - } else if (group == 2) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - vcorr[ss] += result; - });//end loop over lattice sites - } else if (group == 3) { - accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - auto Dq_ti = vq_ti[ss]; - auto Dq_tf = vq_tf[ss]; - sobj result=Zero(); - Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); - - vcorr[ss] += result; - });//end loop over lattice sites - } + if (group == 1) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 2) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 3) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } } From ee9889821d28996ed52cae1868c90300e4febe26 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 29 Jun 2020 12:59:52 -0400 Subject: [PATCH 19/21] Runs through to coarse space solve --- tests/solver/Test_hw_multigrid.cc | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/solver/Test_hw_multigrid.cc b/tests/solver/Test_hw_multigrid.cc index ccca9971..b728faa7 100644 --- a/tests/solver/Test_hw_multigrid.cc +++ b/tests/solver/Test_hw_multigrid.cc @@ -6,9 +6,7 @@ Copyright (C) 2015 -Author: Antonin Portelli Author: Peter Boyle -Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -267,7 +265,7 @@ int main (int argc, char ** argv) // Construct a coarsened grid; utility for this? /////////////////////////////////////////////////// std::vector block ({2,2,2,2}); - const int nbasis= 32; + const int nbasis= 8; auto clatt = GridDefaultLatt(); for(int d=0;d Level1Op; - NonHermitianLinearOperator LinOpDwf(Ddwf); - Level1Op LDOp (*Coarse5d,1); + Level1Op LDOp (*Coarse5d,0); + + std::cout< CoarseMdagM(LDOp); + BiCGSTAB CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + + c_res=Zero(); + CoarseCG(CoarseMdagM,c_src,c_res); + std::cout< Date: Sat, 4 Jul 2020 03:53:06 +0100 Subject: [PATCH 20/21] Fixed HMC SU(N) integrator which was causing fields to leave Lie Algebra manifold for N>2 --- Grid/qcd/action/scalar/ScalarImpl.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index 203e1824..d33cdc79 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -38,7 +38,6 @@ public: static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; - std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; } static inline RealD FieldSquareNorm(Field& U) { @@ -175,14 +174,13 @@ public: P *= scale; } - static inline Field projectForce(Field& P) {return P;} + static inline Field projectForce(Field& P) {return Ta(P);} static inline void update_field(Field &P, Field &U, double ep) { #ifndef USE_FFT_ACCELERATION double t0=usecond(); U += P*ep; - std::cout << "Field updated. Epsilon = " << std::setprecision(10) << ep << std::endl; double t1=usecond(); double total_time = (t1-t0)/1e6; std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; From 43334e88c36b40d565ef906f545a368dfcc80db4 Mon Sep 17 00:00:00 2001 From: "Henrique B.R" Date: Sat, 4 Jul 2020 16:11:16 +0100 Subject: [PATCH 21/21] Tiny change in a comment for clarity --- Grid/qcd/action/scalar/ScalarImpl.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index d33cdc79..14675b11 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -28,10 +28,9 @@ public: typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ - RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); - // CPS and UKQCD conventions not yet implemented for U(1) scalars. + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling gaussian(pRNG, P); - P *= scale; + P *= scale; } static inline Field projectForce(Field& P){return P;} @@ -150,7 +149,7 @@ public: static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { - RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // Being consistent with CPS and UKQCD conventions + RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling #ifndef USE_FFT_ACCELERATION Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);