From 0bd83cdbda5defdde82f48f0c06baef40b3dee29 Mon Sep 17 00:00:00 2001 From: RJHudspith Date: Sun, 28 Nov 2021 21:51:03 +0100 Subject: [PATCH 01/86] Fixes for Nc!=3 Nersc IO, Gauge and Gauge_NCxNC compatible with GLU. Trace normalisation changed in places removing explicit threes. Guards against non-su3 tests and tests failing when LIME is not compiled. --- Grid/Makefile.am | 2 +- Grid/parallelIO/IldgIO.h | 9 +- Grid/parallelIO/MetaData.h | 33 +- Grid/parallelIO/NerscIO.h | 46 ++- Grid/qcd/utils/WilsonLoops.h | 3 +- configure.ac | 2 +- tests/IO/Test_nersc_io.cc | 8 +- tests/core/Test_lie_generators.cc | 376 +++++++++--------- tests/core/Test_reunitarise.cc | 5 +- tests/hmc/Test_hmc_ScalarActionNxN.cc | 4 +- tests/hmc/Test_hmc_WG_Production.cc | 6 +- tests/lanczos/Test_compressed_lanczos.cc | 10 + .../Test_dwf_compressed_lanczos_reorg.cc | 11 +- tests/solver/Test_coarse_even_odd.cc | 12 +- tests/solver/Test_dwf_mrhs_cg.cc | 2 + 15 files changed, 282 insertions(+), 247 deletions(-) diff --git a/Grid/Makefile.am b/Grid/Makefile.am index 7c3c151b..aff5b9d0 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -70,7 +70,7 @@ endif lib_LIBRARIES = libGrid.a CCFILES += $(extra_sources) -HFILES += $(extra_headers) Config.h Version.h +HFILES += $(extra_headers) libGrid_a_SOURCES = $(CCFILES) libGrid_adir = $(includedir)/Grid diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h index 576f08bd..80d135d2 100644 --- a/Grid/parallelIO/IldgIO.h +++ b/Grid/parallelIO/IldgIO.h @@ -31,6 +31,7 @@ directory #include #include #include +#include #include #include @@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter { // Fill ILDG header data struct ////////////////////////////////////////////////////// ildgFormat ildgfmt ; - ildgfmt.field = std::string("su3gauge"); + const std::string stNC = std::to_string( Nc ) ; + ildgfmt.field = std::string("su"+stNC+"gauge"); if ( format == std::string("IEEE32BIG") ) { ildgfmt.precision = 32; @@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader { } else { assert(found_ildgFormat); - assert ( ildgFormat_.field == std::string("su3gauge") ); + const std::string stNC = std::to_string( Nc ) ; + assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") ); /////////////////////////////////////////////////////////////////////////////////////// // Populate our Grid metadata as best we can @@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader { std::ostringstream vers; vers << ildgFormat_.version; FieldMetaData_.hdr_version = vers.str(); - FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); + FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC); FieldMetaData_.nd=4; FieldMetaData_.dimension.resize(4); diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index af8b3f76..6b9d8708 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -6,8 +6,8 @@ Copyright (C) 2015 - Author: Peter Boyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -182,8 +182,8 @@ class GaugeStatistics public: void operator()(Lattice & data,FieldMetaData &header) { - header.link_trace=WilsonLoops::linkTrace(data); - header.plaquette =WilsonLoops::avgPlaquette(data); + header.link_trace = WilsonLoops::linkTrace(data); + header.plaquette = WilsonLoops::avgPlaquette(data); } }; typedef GaugeStatistics PeriodicGaugeStatistics; @@ -203,20 +203,24 @@ template<> inline void PrepareMetaData(Lattice 1 ) ; for(int mu=0;mu using iLorentzColour2x3 = iVector, 2>, Nd >; +template using iLorentzColour2x3 = iVector, Nc-1>, Nd >; typedef iLorentzColour2x3 LorentzColour2x3; typedef iLorentzColour2x3 LorentzColour2x3F; @@ -278,7 +282,6 @@ struct GaugeSimpleMunger{ template struct GaugeSimpleUnmunger { - void operator()(sobj &in, fobj &out) { for (int mu = 0; mu < Nd; mu++) { for (int i = 0; i < Nc; i++) { @@ -317,8 +320,8 @@ template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ for(int mu=0;mu struct Gauge3x2unmunger{ void operator() (sobj &in,fobj &out){ for(int mu=0;mu Author: Peter Boyle Author: paboyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +31,8 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H +#include + NAMESPACE_BEGIN(Grid); using namespace Grid; @@ -145,15 +148,17 @@ public: std::string format(header.floating_point); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); + const int ieee32big = (format == std::string("IEEE32BIG")); + const int ieee32 = (format == std::string("IEEE32")); + const int ieee64big = (format == std::string("IEEE64BIG")); + const int ieee64 = (format == std::string("IEEE64") || \ + format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; // munger is a function of - if ( header.data_type == std::string("4D_SU3_GAUGE") ) { + const std::string stNC = std::to_string( Nc ) ; + if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,Gauge3x2munger(), offset,format, @@ -164,7 +169,7 @@ public: (Umu,file,Gauge3x2munger(),offset,format, nersc_csum,scidac_csuma,scidac_csumb); } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + } else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,GaugeSimpleMunger(),offset,format, @@ -230,6 +235,7 @@ public: header.sequence_number = 1; header.ensemble_id = std::string("UKQCD"); header.ensemble_label = ens_label; + header.hdr_version = "1.0" ; typedef LorentzColourMatrixD fobj3D; typedef LorentzColour2x3D fobj2D; @@ -243,10 +249,14 @@ public: uint64_t offset; - // Sod it -- always write 3x3 double - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - GaugeSimpleUnmunger munge; + // Sod it -- always write NcxNc double + header.floating_point = std::string("IEEE64BIG"); + const std::string stNC = std::to_string( Nc ) ; + if( two_row ) { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE" ); + } else { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC ); + } if ( grid->IsBoss() ) { truncate(file); offset = writeHeader(header,file); @@ -254,8 +264,15 @@ public: grid->Broadcast(0,(void *)&offset,sizeof(offset)); uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, - nersc_csum,scidac_csuma,scidac_csumb); + if( two_row ) { + Gauge3x2unmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } else { + GaugeSimpleUnmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } header.checksum = nersc_csum; if ( grid->IsBoss() ) { writeHeader(header,file); @@ -287,8 +304,7 @@ public: header.plaquette=0.0; MachineCharacteristics(header); - uint64_t offset; - + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); header.data_type = std::string("RANLUX48"); @@ -328,7 +344,7 @@ public: GridBase *grid = parallel.Grid(); - uint64_t offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 0367c9fa..e002e3d5 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -125,7 +125,6 @@ public: return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME } - ////////////////////////////////////////////////// // average over all x,y,z the temporal loop ////////////////////////////////////////////////// @@ -165,7 +164,7 @@ public: double vol = Umu.Grid()->gSites(); - return p.real() / vol / 4.0 / 3.0; + return p.real() / vol / (4.0 * Nc ) ; }; ////////////////////////////////////////////////// diff --git a/configure.ac b/configure.ac index 406b0b74..9ab0595a 100644 --- a/configure.ac +++ b/configure.ac @@ -159,7 +159,7 @@ case ${ac_ZMOBIUS} in esac ############### Nc AC_ARG_ENABLE([Nc], - [AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])], + [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])], [ac_Nc=${enable_Nc}], [ac_Nc=3]) case ${ac_Nc} in diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index c15c320e..ae5b9c0d 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -147,7 +147,7 @@ int main (int argc, char ** argv) Complex p = TensorRemove(Tp); std::cout< Author: Peter Boyle Author: Guido Cossu +Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,14 +43,14 @@ directory using namespace std; using namespace Grid; - ; +; int main(int argc, char** argv) { Grid_init(&argc, &argv); std::vector latt({4, 4, 4, 8}); GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid( - latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); + latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); @@ -60,15 +61,19 @@ int main(int argc, char** argv) { << std::endl; SU2::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; + + // guard as this code fails to compile for Nc != 3 +#if (Nc == 3) + SU2Adjoint::printGenerators(); SU2::testGenerators(); SU2Adjoint::testGenerators(); - + std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; SU3::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; SU3Adjoint::printGenerators(); @@ -111,12 +116,10 @@ int main(int argc, char** argv) { // AdjointRepresentation has the predefined number of colours Nc // Representations RepresentationTypes(grid); - - LatticeGaugeField U(grid), V(grid); SU3::HotConfiguration(gridRNG, U); SU3::HotConfiguration(gridRNG, V); - + // Adjoint representation // Test group structure // (U_f * V_f)_r = U_r * V_r @@ -127,17 +130,17 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu = peekLorentz(V,mu); pokeLorentz(UV,Umu*Vmu, mu); } - + AdjRep.update_representation(UV); typename AdjointRep::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r - - + + AdjRep.update_representation(U); typename AdjointRep::LatticeField Ur = AdjRep.U; // U_r - + AdjRep.update_representation(V); typename AdjointRep::LatticeField Vr = AdjRep.U; // V_r - + typename AdjointRep::LatticeField UrVr(grid); UrVr = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -145,10 +148,10 @@ int main(int argc, char** argv) { typename AdjointRep::LatticeMatrix Vrmu = peekLorentz(Vr,mu); pokeLorentz(UrVr,Urmu*Vrmu, mu); } - + typename AdjointRep::LatticeField Diff_check = UVr - UrVr; std::cout << GridLogMessage << "Group structure SU("<::AdjointLieAlgebraMatrix(h_adj,Ar); - + // Re-extract h_adj SU3::LatticeAlgebraVector h_adj2(grid); SU_Adjoint::projectOnAlgebra(h_adj2, Ar); SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2; std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl; - + // Exponentiate typename AdjointRep::LatticeMatrix Uadj(grid); Uadj = expMat(Ar, 1.0, 16); - + typename AdjointRep::LatticeMatrix uno(grid); uno = 1.0; // Check matrix Uadj, must be real orthogonal typename AdjointRep::LatticeMatrix Ucheck = Uadj - conjugate(Uadj); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) - << std::endl; - + << std::endl; + Ucheck = Uadj * adj(Uadj) - uno; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck) - << std::endl; + << std::endl; Ucheck = adj(Uadj) * Uadj - uno; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck) - << std::endl; - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af(grid); SU3::FundamentalLieAlgebraMatrix(h_adj,Af); @@ -193,72 +195,65 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck(grid); UnitCheck = Ufund * adj(Ufund) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck) - << std::endl; + << std::endl; UnitCheck = adj(Ufund) * Ufund - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck) - << std::endl; - + << std::endl; // Tranform to the adjoint representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields - + AdjRep.update_representation(U); Ur = AdjRep.U; // U_r typename AdjointRep::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj - + typename AdjointRep::LatticeMatrix Diff_check_mat = Ur0 - Uadj; std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl; - - - // TwoIndexRep tests - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printGenerators(); - std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::testGenerators(); std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printGenerators(); std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::testGenerators(); - - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); random(gridRNG,Gauss2); @@ -276,13 +271,13 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2 = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); random(gridRNG,Gauss2a); @@ -300,11 +295,11 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2a = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure" - << std::endl; + << std::endl; // Testing HMC representation classes TwoIndexRep< Nc, Symmetric > TIndexRep(grid); @@ -313,7 +308,7 @@ int main(int argc, char** argv) { LatticeGaugeField U2(grid), V2(grid); SU3::HotConfiguration(gridRNG, U2); SU3::HotConfiguration(gridRNG, V2); - + LatticeGaugeField UV2(grid); UV2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -321,16 +316,16 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu); pokeLorentz(UV2,Umu2*Vmu2, mu); } - + TIndexRep.update_representation(UV2); typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r - + TIndexRep.update_representation(U2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r - + TIndexRep.update_representation(V2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid); Ur2Vr2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -338,11 +333,11 @@ int main(int argc, char** argv) { typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu); pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu); } - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2; std::cout << GridLogMessage << "Group structure SU("<::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym); - + // Re-extract h_sym SU3::LatticeAlgebraVector h_sym2(grid); SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym); SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; - - + // Exponentiate typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid); U2iS = expMat(Ar_sym, 1.0, 16); - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid); uno2iS = 1.0; // Check matrix U2iS, must be real orthogonal typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS) - << std::endl; - + << std::endl; + Ucheck2iS = U2iS * adj(U2iS) - uno2iS; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS) - << std::endl; + << std::endl; Ucheck2iS = adj(U2iS) * U2iS - uno2iS; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS) - << std::endl; - - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af_sym(grid); SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); @@ -386,147 +378,137 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck2(grid); UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2) - << std::endl; + << std::endl; UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2) - << std::endl; - - + << std::endl; + + // Tranform to the 2Index Sym representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields - + TIndexRep.update_representation(U); Ur2 = TIndexRep.U; // U_r typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS; std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl; - - - - + if (TwoIndexRep::Dimension != 1){ - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; - std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" - << std::endl; - // Testing HMC representation classes - TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); + std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" + << std::endl; + // Testing HMC representation classes + TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); - // Test group structure - // (U_f * V_f)_r = U_r * V_r - LatticeGaugeField U2A(grid), V2A(grid); - SU3::HotConfiguration(gridRNG, U2A); - SU3::HotConfiguration(gridRNG, V2A); + // Test group structure + // (U_f * V_f)_r = U_r * V_r + LatticeGaugeField U2A(grid), V2A(grid); + SU3::HotConfiguration(gridRNG, U2A); + SU3::HotConfiguration(gridRNG, V2A); - LatticeGaugeField UV2A(grid); - UV2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); - SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); - pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + LatticeGaugeField UV2A(grid); + UV2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); + SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); + pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + } + + TIndexRep.update_representation(UV2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r + + TIndexRep.update_representation(U2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r + + TIndexRep.update_representation(V2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); + Ur2Vr2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); + pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); + } + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; + std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); + random(gridRNG,h_Asym); + h_Asym = real(h_Asym); + SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); + + // Re-extract h_sym + SU3::LatticeAlgebraVector h_Asym2(grid); + SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); + SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; + std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; + + + // Exponentiate + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); + U2iAS = expMat(Ar_Asym, 1.0, 16); + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); + uno2iAS = 1.0; + // Check matrix U2iS, must be real orthogonal + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); + std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) + << std::endl; + + Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) + << std::endl; + Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) + << std::endl; + + + + // Construct the fundamental matrix in the group + SU3::LatticeMatrix Af_Asym(grid); + SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); + SU3::LatticeMatrix Ufund2A(grid); + Ufund2A = expMat(Af_Asym, 1.0, 16); + SU3::LatticeMatrix UnitCheck2A(grid); + UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; + std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) + << std::endl; + UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; + std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) + << std::endl; + + + // Tranform to the 2Index Sym representation + U = Zero(); // fill this with only one direction + pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields + + TIndexRepA.update_representation(U); + Ur2A = TIndexRepA.U; // U_r + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; + std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; + + } else { + std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " + "because representation is trivial (dim = 1)" + << std::endl; } - - TIndexRep.update_representation(UV2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r - - TIndexRep.update_representation(U2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r - - TIndexRep.update_representation(V2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); - Ur2Vr2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); - pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); - } - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; - std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); - random(gridRNG,h_Asym); - h_Asym = real(h_Asym); - SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); - - // Re-extract h_sym - SU3::LatticeAlgebraVector h_Asym2(grid); - SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); - SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; - std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; - - - // Exponentiate - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); - U2iAS = expMat(Ar_Asym, 1.0, 16); - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); - uno2iAS = 1.0; - // Check matrix U2iS, must be real orthogonal - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); - std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) - << std::endl; - - Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) - << std::endl; - Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) - << std::endl; - - - - // Construct the fundamental matrix in the group - SU3::LatticeMatrix Af_Asym(grid); - SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); - SU3::LatticeMatrix Ufund2A(grid); - Ufund2A = expMat(Af_Asym, 1.0, 16); - SU3::LatticeMatrix UnitCheck2A(grid); - UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; - std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) - << std::endl; - UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; - std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) - << std::endl; - - - // Tranform to the 2Index Sym representation - U = Zero(); // fill this with only one direction - pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields - - TIndexRepA.update_representation(U); - Ur2A = TIndexRepA.U; // U_r - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; - std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; - -} else { - std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " - "because representation is trivial (dim = 1)" - << std::endl; -} - - - - - - - - +#endif Grid_finalize(); } diff --git a/tests/core/Test_reunitarise.cc b/tests/core/Test_reunitarise.cc index 6644be1a..10423159 100644 --- a/tests/core/Test_reunitarise.cc +++ b/tests/core/Test_reunitarise.cc @@ -122,14 +122,15 @@ int main (int argc, char ** argv) std::cout << "Determinant defect before projection " < using namespace std; using namespace Grid; +#ifdef HAVE_LIME + template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos { @@ -249,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif // HAVE_LIME_H diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index ecd0e629..669a7b6d 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -36,7 +36,8 @@ Author: Peter Boyle using namespace std; using namespace Grid; - ; + +#ifdef HAVE_LIME template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos @@ -250,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif diff --git a/tests/solver/Test_coarse_even_odd.cc b/tests/solver/Test_coarse_even_odd.cc index dfbab747..c7127121 100644 --- a/tests/solver/Test_coarse_even_odd.cc +++ b/tests/solver/Test_coarse_even_odd.cc @@ -93,8 +93,16 @@ int main(int argc, char** argv) { // Setup of Dirac Matrix and Operator // ///////////////////////////////////////////////////////////////////////////// - LatticeGaugeField Umu(Grid_f); SU3::HotConfiguration(pRNG_f, Umu); - + LatticeGaugeField Umu(Grid_f); +#if (Nc==2) + SU2::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==3) + SU3::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==4) + SU4::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==5) + SU5::HotConfiguration(pRNG_f, Umu); +#endif RealD checkTolerance = (getPrecision::value == 1) ? 1e-7 : 1e-15; RealD mass = -0.30; diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index b912ba4f..4242f64b 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -34,6 +34,7 @@ using namespace Grid; int main (int argc, char ** argv) { +#ifdef HAVE_LIME typedef typename DomainWallFermionR::FermionField FermionField; typedef typename DomainWallFermionR::ComplexField ComplexField; typename DomainWallFermionR::ImplParams params; @@ -237,4 +238,5 @@ int main (int argc, char ** argv) } Grid_finalize(); +#endif // HAVE_LIME } From 2851870d70ae40ad540f790433f798d6160aefdc Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 00:05:43 +0100 Subject: [PATCH 02/86] expClover support via helpers template class --- Grid/qcd/action/fermion/CloverHelpers.h | 371 ++++++++++++++++++ .../fermion/CompactWilsonCloverFermion.h | 3 +- Grid/qcd/action/fermion/Fermion.h | 72 ++-- Grid/qcd/action/fermion/WilsonCloverFermion.h | 3 +- Grid/qcd/action/fermion/WilsonCloverHelpers.h | 2 + ...CompactWilsonCloverFermionImplementation.h | 114 +++--- .../WilsonCloverFermionImplementation.h | 138 +++---- ...WilsonCloverFermionInstantiation.cc.master | 5 +- ...WilsonCloverFermionInstantiation.cc.master | 7 +- 9 files changed, 562 insertions(+), 153 deletions(-) create mode 100644 Grid/qcd/action/fermion/CloverHelpers.h diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h new file mode 100644 index 00000000..432bdf3c --- /dev/null +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -0,0 +1,371 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h + + Copyright (C) 2017 - 2022 + + Author: paboyle + Author: Daniel Richtmann + Author: Mattia Bruno + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#pragma once + +#include +#include +#include + +//////////////////////////////////////////// +// Standard Clover +// (4+m0) + csw * clover_term +// Exp Clover +// (4+m0) * exp(csw/(4+m0) clover_term) +// = (4+m0) + csw * clover_term + ... +//////////////////////////////////////////// + +NAMESPACE_BEGIN(Grid); + + +////////////////////////////////// +// Generic Standard Clover +////////////////////////////////// + +template +class CloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + + static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { + GridBase *grid = CloverTerm.Grid(); + CloverTerm += diag_mass; + + int lvol = grid->lSites(); + int DimRep = Impl::Dimension; + { + autoView(CTv,CloverTerm,CpuRead); + autoView(CTIv,CloverTermInv,CpuWrite); + thread_for(site, lvol, { + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); + peekLocalSite(Qx, CTv, lcoor); + //if (csw!=0){ + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + + EigenInvCloverOp = EigenCloverOp.inverse(); + //std::cout << EigenInvCloverOp << std::endl; + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; + // } + pokeLocalSite(Qxinv, CTIv, lcoor); + }); + } + } + + + static void CloverTermDerivative(GaugeField& clover_force, + const FermionField& X, + const FermionField& Y, + const std::vector& U, + RealD csw_t, RealD csw_r) { + GaugeLinkField force_mu(clover_force.Grid()), lambda(clover_force.Grid()); + PropagatorField Lambda(clover_force.Grid()); + + /////////////////////////////////////////////////////////// + // Clover term derivative + /////////////////////////////////////////////////////////// + Impl::outerProductImpl(Lambda, X, Y); + //std::cout << "Lambda:" << Lambda << std::endl; + + Gamma::Algebra sigma[] = { + Gamma::Algebra::SigmaXY, + Gamma::Algebra::SigmaXZ, + Gamma::Algebra::SigmaXT, + Gamma::Algebra::MinusSigmaXY, + Gamma::Algebra::SigmaYZ, + Gamma::Algebra::SigmaYT, + Gamma::Algebra::MinusSigmaXZ, + Gamma::Algebra::MinusSigmaYZ, + Gamma::Algebra::SigmaZT, + Gamma::Algebra::MinusSigmaXT, + Gamma::Algebra::MinusSigmaYT, + Gamma::Algebra::MinusSigmaZT}; + + /* + sigma_{\mu \nu}= + | 0 sigma[0] sigma[1] sigma[2] | + | sigma[3] 0 sigma[4] sigma[5] | + | sigma[6] sigma[7] 0 sigma[8] | + | sigma[9] sigma[10] sigma[11] 0 | + */ + + int count = 0; + clover_force = Zero(); + for (int mu = 0; mu < 4; mu++) + { + force_mu = Zero(); + for (int nu = 0; nu < 4; nu++) + { + if (mu == nu) + continue; + + RealD factor; + if (nu == 4 || mu == 4) + { + factor = 2.0 * csw_t; + } + else + { + factor = 2.0 * csw_r; + } + PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked + Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok + force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + count++; + } + + pokeLorentz(clover_force, U[mu] * force_mu, mu); + } + } +}; + + +////////////////////////////////// +// Generic Exp Clover +////////////////////////////////// + +template +class ExpCloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef WilsonCloverHelpers Helpers; + + static void plusIdentity(const CloverField& in) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; saprec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Clover.Grid(); + CloverField ExpClover(grid); + + int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); + + // csw/(diag_mass) * clover + Clover *= (1.0/diag_mass); + + ExpClover = Clover; + plusIdentity(ExpClover); // 1 + Clover + + // Taylor expansion, slow but generic + RealD pref = 1.0; + for (int i=2; i<=NMAX; i++) { + Clover = Clover * Clover; + pref /= (RealD)(i); + ExpClover += pref * Clover; + } + + // Convert the data layout of the clover terms + Clover = ExpClover * diag_mass; + CloverInv = adj(ExpClover * (1.0/diag_mass)); + } + + static void CloverTermDerivative(GaugeField& clover_force, const FermionField& X, const FermionField& Y, + const std::vector& U, RealD csw_t, RealD csw_r) { + assert(0); + } +}; + + +////////////////////////////////// +// Compact Standard Clover +////////////////////////////////// + + +template +class CompactCloverHelpers: public CompactWilsonCloverHelpers, + public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + Clover += diag_mass; + } + + static void Instantiate(CloverDiagonalField& Diagonal, + CloverTriangleField& Triangle, + CloverDiagonalField& DiagonalInv, + CloverTriangleField& TriangleInv, + RealD csw_t, RealD diag_mass) { + // Invert the clover term in the improved layout + CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + } + + // TODO: implement Cmunu for better performances with compact layout, but don't do it + // here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); + } +}; + +////////////////////////////////// +// Compact Exp Clover +////////////////////////////////// + + +template +class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void plusIdentity(const CloverField& in) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; saprec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + // csw/(diag_mass) * clover + Clover = Clover * (1.0/diag_mass); + } + + static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, + RealD csw_t, RealD diag_mass) { + GridBase* grid = Diagonal.Grid(); + int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); + // To be optimized: too much memory traffic; implement exp in improved layout + // Felix + Fabian: replace code below with + // + // ConvertLayout Clover -> Diagonal, Triangle + // ModifyBoundaries + // EvaluateExp + + // code to be replaced + CloverField Clover(grid), ExpClover(grid); + + CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); + + ExpClover = Clover; + plusIdentity(ExpClover); // 1 + Clover + + RealD pref = 1.0; + for (int i=2; i<=NMAX; i++) { + Clover = Clover * Clover; + pref /= (RealD)(i); + ExpClover += pref * Clover; + } + + // Convert the data layout of the clover terms + CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); + Diagonal = Diagonal * diag_mass; + Triangle = Triangle * diag_mass; + CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); + DiagonalInv = DiagonalInv*(1.0/diag_mass); + TriangleInv = TriangleInv*(1.0/diag_mass); + } + + + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + assert(0); + } + +}; + + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h index 3a166134..249b20bd 100644 --- a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h @@ -31,6 +31,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid); // + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site // = 84 complex words per site -template +template class CompactWilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers, public CompactWilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 78cf7851..bfbc16b8 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -138,38 +138,62 @@ typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; // Clover fermions -typedef WilsonCloverFermion WilsonCloverFermionR; -typedef WilsonCloverFermion WilsonCloverFermionF; -typedef WilsonCloverFermion WilsonCloverFermionD; +typedef CloverHelpers CloverR; +typedef CloverHelpers CloverF; +typedef CloverHelpers CloverD; -typedef WilsonCloverFermion WilsonCloverAdjFermionR; -typedef WilsonCloverFermion WilsonCloverAdjFermionF; -typedef WilsonCloverFermion WilsonCloverAdjFermionD; +typedef ExpCloverHelpers ExpCloverR; +typedef ExpCloverHelpers ExpCloverF; +typedef ExpCloverHelpers ExpCloverD; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; +typedef WilsonCloverFermion WilsonCloverFermionR; +typedef WilsonCloverFermion WilsonCloverFermionF; +typedef WilsonCloverFermion WilsonCloverFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; +typedef WilsonCloverFermion WilsonExpCloverFermionR; +typedef WilsonCloverFermion WilsonExpCloverFermionF; +typedef WilsonCloverFermion WilsonExpCloverFermionD; + +typedef WilsonCloverFermion WilsonCloverAdjFermionR; +typedef WilsonCloverFermion WilsonCloverAdjFermionF; +typedef WilsonCloverFermion WilsonCloverAdjFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; // Compact Clover fermions -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; +typedef CompactCloverHelpers CompactCloverR; +typedef CompactCloverHelpers CompactCloverF; +typedef CompactCloverHelpers CompactCloverD; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; +typedef CompactExpCloverHelpers CompactExpCloverR; +typedef CompactExpCloverHelpers CompactExpCloverF; +typedef CompactExpCloverHelpers CompactExpCloverD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionR; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionF; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index cd19fc10..562f08f4 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -32,6 +32,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid); // csw_r = csw_t to recover the isotropic version ////////////////////////////////////////////////////////////////// -template +template class WilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/WilsonCloverHelpers.h b/Grid/qcd/action/fermion/WilsonCloverHelpers.h index 60f19317..c221d5f0 100644 --- a/Grid/qcd/action/fermion/WilsonCloverHelpers.h +++ b/Grid/qcd/action/fermion/WilsonCloverHelpers.h @@ -209,6 +209,8 @@ public: }; +//////////////////////////////////////////////////////// + template class CompactWilsonCloverHelpers { public: diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 3dfcb133..b42957ac 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -32,17 +32,18 @@ #include #include + NAMESPACE_BEGIN(Grid); -template -CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, - GridCartesian& Fgrid, - GridRedBlackCartesian& Hgrid, - const RealD _mass, - const RealD _csw_r, - const RealD _csw_t, - const RealD _cF, - const WilsonAnisotropyCoefficients& clover_anisotropy, - const ImplParams& impl_p) +template +CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, + GridCartesian& Fgrid, + GridRedBlackCartesian& Hgrid, + const RealD _mass, + const RealD _csw_r, + const RealD _csw_t, + const RealD _cF, + const WilsonAnisotropyCoefficients& clover_anisotropy, + const ImplParams& impl_p) : WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy) , csw_r(_csw_r) , csw_t(_csw_t) @@ -68,40 +69,40 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); } -template -void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { WilsonBase::Dhop(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopOE(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopEO(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { WilsonBase::DhopDir(in, out, dir, disp); if(this->open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { WilsonBase::DhopDirAll(in, out); if(this->open_boundaries) { for(auto& o : out) ApplyBoundaryMask(o); } } -template -void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc Mooee(in, Tmp); @@ -109,8 +110,8 @@ void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& o if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc MooeeDag(in, Tmp); @@ -118,20 +119,20 @@ void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { WilsonBase::Meooe(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { WilsonBase::MeooeDag(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalOdd, TriangleOdd); @@ -144,13 +145,13 @@ void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionFiel if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { Mooee(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); @@ -163,23 +164,23 @@ void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionF if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { MooeeInv(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { DhopDir(in, out, dir, disp); } -template -void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { DhopDirAll(in, out); } -template -void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { +template +void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { assert(!open_boundaries); // TODO check for changes required for open bc // NOTE: code copied from original clover term @@ -251,7 +252,7 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -261,18 +262,18 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi force += clover_force; } -template -void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, +template +void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, FermionField& out, const CloverDiagonalField& diagonal, const CloverTriangleField& triangle) { @@ -285,8 +286,8 @@ void CompactWilsonCloverFermion::MooeeInternal(const FermionField& CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle); } -template -void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { +template +void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { // NOTE: parts copied from original implementation // Import gauge into base class @@ -318,8 +319,9 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; - TmpOriginal += this->diag_mass; - + // Handle mass term based on clover policy + CloverHelpers::MassTerm(TmpOriginal, this->diag_mass); + // Convert the data layout of the clover term double t4 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); @@ -328,9 +330,9 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { double t5 = usecond(); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Invert the clover term in the improved layout + // Instantiate based on clover policy double t6 = usecond(); - CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + CloverHelpers::Instantiate(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); // Fill the remaining clover fields double t7 = usecond(); @@ -353,7 +355,7 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { << ", fill clover = " << (t4 - t3) / 1e6 << ", convert = " << (t5 - t4) / 1e6 << ", boundaries = " << (t6 - t5) / 1e6 - << ", inversions = " << (t7 - t6) / 1e6 + << ", instantiate = " << (t7 - t6) / 1e6 << ", pick cbs = " << (t8 - t7) / 1e6 << ", total = " << (t8 - t0) / 1e6 << std::endl; diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index e4d2e736..2ebf45f8 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -34,8 +34,8 @@ NAMESPACE_BEGIN(Grid); -template -WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, +template +WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, GridCartesian& Fgrid, GridRedBlackCartesian& Hgrid, const RealD _mass, @@ -74,8 +74,8 @@ WilsonCloverFermion::WilsonCloverFermion(GaugeField& } // *NOT* EO -template -void WilsonCloverFermion::M(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::M(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -89,8 +89,8 @@ void WilsonCloverFermion::M(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -104,8 +104,8 @@ void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) +template +void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { double t0 = usecond(); WilsonFermion::ImportGauge(_Umu); @@ -131,47 +131,50 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += Helpers::fillCloverXT(Ex) * csw_t; CloverTerm += Helpers::fillCloverYT(Ey) * csw_t; CloverTerm += Helpers::fillCloverZT(Ez) * csw_t; - CloverTerm += diag_mass; - + double t4 = usecond(); - int lvol = _Umu.Grid()->lSites(); - int DimRep = Impl::Dimension; + CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass); +// CloverTerm += diag_mass; +// +// double t4 = usecond(); +// int lvol = _Umu.Grid()->lSites(); +// int DimRep = Impl::Dimension; +// +// double t5 = usecond(); +// { +// autoView(CTv,CloverTerm,CpuRead); +// autoView(CTIv,CloverTermInv,CpuWrite); +// thread_for(site, lvol, { +// Coordinate lcoor; +// grid->LocalIndexToLocalCoor(site, lcoor); +// Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); +// Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); +// typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); +// peekLocalSite(Qx, CTv, lcoor); +// //if (csw!=0){ +// for (int j = 0; j < Ns; j++) +// for (int k = 0; k < Ns; k++) +// for (int a = 0; a < DimRep; a++) +// for (int b = 0; b < DimRep; b++){ +// auto zz = Qx()(j, k)(a, b); +// EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); +// } +// // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; +// +// EigenInvCloverOp = EigenCloverOp.inverse(); +// //std::cout << EigenInvCloverOp << std::endl; +// for (int j = 0; j < Ns; j++) +// for (int k = 0; k < Ns; k++) +// for (int a = 0; a < DimRep; a++) +// for (int b = 0; b < DimRep; b++) +// Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); +// // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; +// // } +// pokeLocalSite(Qxinv, CTIv, lcoor); +// }); +// } double t5 = usecond(); - { - autoView(CTv,CloverTerm,CpuRead); - autoView(CTIv,CloverTermInv,CpuWrite); - thread_for(site, lvol, { - Coordinate lcoor; - grid->LocalIndexToLocalCoor(site, lcoor); - Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); - peekLocalSite(Qx, CTv, lcoor); - //if (csw!=0){ - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++){ - auto zz = Qx()(j, k)(a, b); - EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); - } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } - pokeLocalSite(Qxinv, CTIv, lcoor); - }); - } - - double t6 = usecond(); // Separate the even and odd parts pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); @@ -184,7 +187,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); - double t7 = usecond(); + double t6 = usecond(); #if 0 std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" @@ -192,40 +195,39 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) << ", allocations = " << (t2 - t1) / 1e6 << ", field strength = " << (t3 - t2) / 1e6 << ", fill clover = " << (t4 - t3) / 1e6 - << ", misc = " << (t5 - t4) / 1e6 - << ", inversions = " << (t6 - t5) / 1e6 - << ", pick cbs = " << (t7 - t6) / 1e6 - << ", total = " << (t7 - t0) / 1e6 + << ", instantiation = " << (t5 - t4) / 1e6 + << ", pick cbs = " << (t6 - t5) / 1e6 + << ", total = " << (t6 - t0) / 1e6 << std::endl; #endif } -template -void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseNo); } -template -void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseNo); } -template -void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseYes); } -template -void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseYes); } -template -void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) +template +void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) { out.Checkerboard() = in.Checkerboard(); CloverField *Clover; @@ -278,8 +280,8 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } // MooeeInternal // Derivative parts unpreconditioned pseudofermions -template -void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) { conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), force.Grid()); @@ -349,7 +351,7 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -360,15 +362,15 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } // Derivative parts -template -void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { assert(0); } // Derivative parts -template -void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +template +void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { assert(0); // not implemented yet } diff --git a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master index 7c91c252..1ea1549c 100644 --- a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master @@ -9,6 +9,7 @@ Author: paboyle Author: Guido Cossu Author: Daniel Richtmann + Author: Mattia Bruno This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,10 +33,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class CompactWilsonCloverFermion; +template class CompactWilsonCloverFermion>; +template class CompactWilsonCloverFermion>; NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master index af99dfb6..2cea3656 100644 --- a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master @@ -8,7 +8,8 @@ Author: paboyle Author: Guido Cossu - + Author: Mattia Bruno + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -31,10 +32,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class WilsonCloverFermion; +template class WilsonCloverFermion>; +template class WilsonCloverFermion>; NAMESPACE_END(Grid); From 3d44aa9cb9ed629fc227822a252d5037d518a5ef Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 01:10:19 +0100 Subject: [PATCH 03/86] cleaned up cloverhelpers; fixed test compact_clover which runs --- Grid/qcd/action/fermion/CloverHelpers.h | 85 +++---------------- .../WilsonCloverFermionImplementation.h | 39 --------- .../Test_compact_wilson_clover_speedup.cc | 4 +- 3 files changed, 12 insertions(+), 116 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 432bdf3c..af712852 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -98,71 +98,10 @@ public: } } - - static void CloverTermDerivative(GaugeField& clover_force, - const FermionField& X, - const FermionField& Y, - const std::vector& U, - RealD csw_t, RealD csw_r) { - GaugeLinkField force_mu(clover_force.Grid()), lambda(clover_force.Grid()); - PropagatorField Lambda(clover_force.Grid()); - - /////////////////////////////////////////////////////////// - // Clover term derivative - /////////////////////////////////////////////////////////// - Impl::outerProductImpl(Lambda, X, Y); - //std::cout << "Lambda:" << Lambda << std::endl; - - Gamma::Algebra sigma[] = { - Gamma::Algebra::SigmaXY, - Gamma::Algebra::SigmaXZ, - Gamma::Algebra::SigmaXT, - Gamma::Algebra::MinusSigmaXY, - Gamma::Algebra::SigmaYZ, - Gamma::Algebra::SigmaYT, - Gamma::Algebra::MinusSigmaXZ, - Gamma::Algebra::MinusSigmaYZ, - Gamma::Algebra::SigmaZT, - Gamma::Algebra::MinusSigmaXT, - Gamma::Algebra::MinusSigmaYT, - Gamma::Algebra::MinusSigmaZT}; - - /* - sigma_{\mu \nu}= - | 0 sigma[0] sigma[1] sigma[2] | - | sigma[3] 0 sigma[4] sigma[5] | - | sigma[6] sigma[7] 0 sigma[8] | - | sigma[9] sigma[10] sigma[11] 0 | - */ - - int count = 0; - clover_force = Zero(); - for (int mu = 0; mu < 4; mu++) - { - force_mu = Zero(); - for (int nu = 0; nu < 4; nu++) - { - if (mu == nu) - continue; - - RealD factor; - if (nu == 4 || mu == 4) - { - factor = 2.0 * csw_t; - } - else - { - factor = 2.0 * csw_r; - } - PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked - Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked - count++; - } - - pokeLorentz(clover_force, U[mu] * force_mu, mu); - } + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); } + }; @@ -227,15 +166,14 @@ public: ExpClover += pref * Clover; } - // Convert the data layout of the clover terms Clover = ExpClover * diag_mass; CloverInv = adj(ExpClover * (1.0/diag_mass)); } - static void CloverTermDerivative(GaugeField& clover_force, const FermionField& X, const FermionField& Y, - const std::vector& U, RealD csw_t, RealD csw_r) { + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); } + }; @@ -329,14 +267,9 @@ public: RealD csw_t, RealD diag_mass) { GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - // To be optimized: too much memory traffic; implement exp in improved layout - // Felix + Fabian: replace code below with - // - // ConvertLayout Clover -> Diagonal, Triangle - // ModifyBoundaries - // EvaluateExp - // code to be replaced + // To be optimized: implement exp in improved layout + // START: code to be replaced CloverField Clover(grid), ExpClover(grid); CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); @@ -353,9 +286,11 @@ public: // Convert the data layout of the clover terms CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); + CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); + // END: code to be replaced + Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; - CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); DiagonalInv = DiagonalInv*(1.0/diag_mass); TriangleInv = TriangleInv*(1.0/diag_mass); } diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 2ebf45f8..0991e274 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -134,45 +134,6 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Um double t4 = usecond(); CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass); -// CloverTerm += diag_mass; -// -// double t4 = usecond(); -// int lvol = _Umu.Grid()->lSites(); -// int DimRep = Impl::Dimension; -// -// double t5 = usecond(); -// { -// autoView(CTv,CloverTerm,CpuRead); -// autoView(CTIv,CloverTermInv,CpuWrite); -// thread_for(site, lvol, { -// Coordinate lcoor; -// grid->LocalIndexToLocalCoor(site, lcoor); -// Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); -// Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); -// typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); -// peekLocalSite(Qx, CTv, lcoor); -// //if (csw!=0){ -// for (int j = 0; j < Ns; j++) -// for (int k = 0; k < Ns; k++) -// for (int a = 0; a < DimRep; a++) -// for (int b = 0; b < DimRep; b++){ -// auto zz = Qx()(j, k)(a, b); -// EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); -// } -// // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; -// -// EigenInvCloverOp = EigenCloverOp.inverse(); -// //std::cout << EigenInvCloverOp << std::endl; -// for (int j = 0; j < Ns; j++) -// for (int k = 0; k < Ns; k++) -// for (int a = 0; a < DimRep; a++) -// for (int b = 0; b < DimRep; b++) -// Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); -// // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; -// // } -// pokeLocalSite(Qxinv, CTIv, lcoor); -// }); -// } double t5 = usecond(); // Separate the even and odd parts diff --git a/tests/core/Test_compact_wilson_clover_speedup.cc b/tests/core/Test_compact_wilson_clover_speedup.cc index 83e3da1f..7a74ab19 100644 --- a/tests/core/Test_compact_wilson_clover_speedup.cc +++ b/tests/core/Test_compact_wilson_clover_speedup.cc @@ -117,8 +117,8 @@ void runBenchmark(int* argc, char*** argv) { // type definitions typedef WilsonImpl WImpl; - typedef WilsonCloverFermion WilsonCloverOperator; - typedef CompactWilsonCloverFermion CompactWilsonCloverOperator; + typedef WilsonCloverFermion> WilsonCloverOperator; + typedef CompactWilsonCloverFermion> CompactWilsonCloverOperator; typedef typename WilsonCloverOperator::FermionField Fermion; typedef typename WilsonCloverOperator::GaugeField Gauge; From 11437930c56f86be3ee80d893a2d789d9c2f7c4f Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 10:45:16 +0100 Subject: [PATCH 04/86] cleaned up definitions of wilsonclover fermions --- Grid/qcd/action/fermion/Fermion.h | 78 ++++++++++++++----------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index bfbc16b8..223ff9dd 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -138,62 +138,52 @@ typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; // Clover fermions -typedef CloverHelpers CloverR; -typedef CloverHelpers CloverF; -typedef CloverHelpers CloverD; +template using WilsonClover = WilsonCloverFermion>; +template using WilsonExpClover = WilsonCloverFermion>; -typedef ExpCloverHelpers ExpCloverR; -typedef ExpCloverHelpers ExpCloverF; -typedef ExpCloverHelpers ExpCloverD; +typedef WilsonClover WilsonCloverFermionR; +typedef WilsonClover WilsonCloverFermionF; +typedef WilsonClover WilsonCloverFermionD; -typedef WilsonCloverFermion WilsonCloverFermionR; -typedef WilsonCloverFermion WilsonCloverFermionF; -typedef WilsonCloverFermion WilsonCloverFermionD; +typedef WilsonExpClover WilsonExpCloverFermionR; +typedef WilsonExpClover WilsonExpCloverFermionF; +typedef WilsonExpClover WilsonExpCloverFermionD; -typedef WilsonCloverFermion WilsonExpCloverFermionR; -typedef WilsonCloverFermion WilsonExpCloverFermionF; -typedef WilsonCloverFermion WilsonExpCloverFermionD; +typedef WilsonClover WilsonCloverAdjFermionR; +typedef WilsonClover WilsonCloverAdjFermionF; +typedef WilsonClover WilsonCloverAdjFermionD; -typedef WilsonCloverFermion WilsonCloverAdjFermionR; -typedef WilsonCloverFermion WilsonCloverAdjFermionF; -typedef WilsonCloverFermion WilsonCloverAdjFermionD; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; - -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionD; // Compact Clover fermions -typedef CompactCloverHelpers CompactCloverR; -typedef CompactCloverHelpers CompactCloverF; -typedef CompactCloverHelpers CompactCloverD; +template using CompactWilsonClover = CompactWilsonCloverFermion>; +template using CompactWilsonExpClover = CompactWilsonCloverFermion>; -typedef CompactExpCloverHelpers CompactExpCloverR; -typedef CompactExpCloverHelpers CompactExpCloverF; -typedef CompactExpCloverHelpers CompactExpCloverD; +typedef CompactWilsonClover CompactWilsonCloverFermionR; +typedef CompactWilsonClover CompactWilsonCloverFermionF; +typedef CompactWilsonClover CompactWilsonCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionR; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionF; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionD; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionR; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionF; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; - -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; From 71034f828e76e066d51f67269ce7916c8f6c6cba Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Wed, 23 Feb 2022 01:02:27 +0100 Subject: [PATCH 05/86] attempt to fix broken WilsonExpClover; Compact version still broken will be replaced by F.Joswig --- Grid/qcd/action/fermion/CloverHelpers.h | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index af712852..7f7ad2b7 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -119,7 +119,8 @@ public: template using iImplClover = iScalar, Ns>>; typedef WilsonCloverHelpers Helpers; - static void plusIdentity(const CloverField& in) { + // Can this be avoided? + static void IdentityTimesC(const CloverField& in, RealD c) { int DimRep = Impl::Dimension; autoView(in_v, in, AcceleratorWrite); @@ -127,7 +128,7 @@ public: accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++) + cn[i] = cn[i-1] / RealD(i); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * Clover + cn[i]; Clover = ExpClover * diag_mass; - CloverInv = adj(ExpClover * (1.0/diag_mass)); + CloverInv = adj(ExpClover) * (1.0/diag_mass); } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { From 8a3002c03be467740deff36ab43152b4481b876d Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Thu, 24 Feb 2022 22:02:56 +0100 Subject: [PATCH 06/86] separate left and right masses for CayleyFermion5D --- Grid/qcd/action/fermion/CayleyFermion5D.h | 13 ++++-- .../CayleyFermion5DImplementation.h | 42 +++++++++++-------- benchmarks/Benchmark_mooee.cc | 4 +- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index c7d68d73..3e06aa26 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -68,9 +68,16 @@ public: /////////////////////////////////////////////////////////////// // Support for MADWF tricks /////////////////////////////////////////////////////////////// - RealD Mass(void) { return mass; }; + RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; }; + RealD MassPlus(void) { return mass_plus; }; + RealD MassMinus(void) { return mass_minus; }; void SetMass(RealD _mass) { - mass=_mass; + mass_plus=mass_minus=_mass; + SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs + } ; + void SetMass(RealD _mass_plus, RealD _mass_minus) { + mass_plus=_mass_plus; + mass_minus=_mass_minus; SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs } ; void P(const FermionField &psi, FermionField &chi); @@ -108,7 +115,7 @@ public: void MeooeDag5D (const FermionField &in, FermionField &out); // protected: - RealD mass; + RealD mass_plus, mass_minus; // Save arguments to SetCoefficientsInternal Vector _gamma; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 1ed66bda..ad3a925f 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -47,7 +47,7 @@ CayleyFermion5D::CayleyFermion5D(GaugeField &_Umu, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,_M5,p), - mass(_mass) + mass_plus(_mass), mass_minus(_mass) { } @@ -209,8 +209,8 @@ void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; Vector diag (Ls,1.0); - Vector upper(Ls,-1.0); upper[Ls-1]=mass; - Vector lower(Ls,-1.0); lower[0] =mass; + Vector upper(Ls,-1.0); upper[Ls-1]=mass_minus; + Vector lower(Ls,-1.0); lower[0] =mass_plus; M5D(psi,chi,chi,lower,diag,upper); } template @@ -220,8 +220,8 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D Vector diag = bs; Vector upper= cs; Vector lower= cs; - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,Din,lower,diag,upper); } // FIXME Redunant with the above routine; check this and eliminate @@ -235,8 +235,8 @@ template void CayleyFermion5D::Meo5D (const FermionField & upper[i]=-ceo[i]; lower[i]=-ceo[i]; } - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,chi,lower,diag,upper); } template @@ -250,8 +250,8 @@ void CayleyFermion5D::Mooee (const FermionField &psi, FermionField & upper[i]=-cee[i]; lower[i]=-cee[i]; } - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,chi,lower,diag,upper); } template @@ -266,9 +266,9 @@ void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField & // Assemble the 5d matrix if ( s==0 ) { upper[s] = -cee[s+1] ; - lower[s] = mass*cee[Ls-1]; + lower[s] = mass_minus*cee[Ls-1]; } else if ( s==(Ls-1)) { - upper[s] = mass*cee[0]; + upper[s] = mass_plus*cee[0]; lower[s] = -cee[s-1]; } else { upper[s]=-cee[s+1]; @@ -291,8 +291,8 @@ void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) Vector diag(Ls,1.0); Vector upper(Ls,-1.0); Vector lower(Ls,-1.0); - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_plus*upper[Ls-1]; + lower[0] =-mass_minus*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); } @@ -307,9 +307,9 @@ void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField for (int s=0;s::SetCoefficientsInternal(RealD zolo_hi,Vector::SetCoefficientsInternal(RealD zolo_hi,Vector::SetCoefficientsInternal(RealD zolo_hi,Vector::ContractConservedCurrent( PropagatorField &q_in_1, Current curr_type, unsigned int mu) { + + assert(mass_plus == mass_minus); + RealD mass = mass_plus; + #if (!defined(GRID_HIP)) Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -777,6 +781,8 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, assert(mu>=0); assert(mu diag = Dw.bs; Vector upper= Dw.cs; Vector lower= Dw.cs; - upper[Ls-1]=-Dw.mass*upper[Ls-1]; - lower[0] =-Dw.mass*lower[0]; + upper[Ls-1]=-Dw.mass_minus*upper[Ls-1]; + lower[0] =-Dw.mass_plus*lower[0]; LatticeFermion r_eo(FGrid); LatticeFermion src_e (FrbGrid); From 013dc2ef338bfb60d01947df65b6037e5d87412e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:13:47 +0000 Subject: [PATCH 07/86] tests: core tests for wilson clover and wilson exp clover including compact version extended/added --- tests/core/Test_wilson_clover.cc | 214 ++++++++++- tests/core/Test_wilson_exp_clover.cc | 530 +++++++++++++++++++++++++++ 2 files changed, 724 insertions(+), 20 deletions(-) create mode 100644 tests/core/Test_wilson_exp_clover.cc diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 642c30a8..8f143070 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -2,11 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./benchmarks/Benchmark_wilson.cc + Source file: ./tests/core/Test_wilson_clover.cc Copyright (C) 2015 Author: Guido Cossu + Fabian Joswig This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,8 +68,6 @@ int main(int argc, char **argv) tmp = Zero(); FermionField err(&Grid); err = Zero(); - FermionField err2(&Grid); - err2 = Zero(); FermionField phi(&Grid); random(pRNG, phi); FermionField chi(&Grid); @@ -77,6 +76,8 @@ int main(int argc, char **argv) SU::HotConfiguration(pRNG, Umu); std::vector U(4, &Grid); + double tolerance = 1e-4; + double volume = 1; for (int mu = 0; mu < Nd; mu++) { @@ -88,7 +89,7 @@ int main(int argc, char **argv) RealD csw_t = 1.0; WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - //Dwc.ImportGauge(Umu); // not necessary, included in the constructor + CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; @@ -112,7 +113,24 @@ int main(int argc, char **argv) setCheckerboard(r_eo, r_e); err = ref - r_eo; - std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; @@ -152,6 +170,22 @@ int main(int argc, char **argv) std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl; @@ -169,7 +203,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; @@ -188,7 +236,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; @@ -207,7 +269,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; @@ -249,7 +325,7 @@ int main(int argc, char **argv) ///////////////// WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - Dwc_prime.ImportGauge(U_prime); + CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); tmp = Omega * src; pickCheckerboard(Even, src_e, tmp); @@ -262,7 +338,37 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = chi - adj(Omega) * phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "=================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; @@ -272,7 +378,6 @@ int main(int argc, char **argv) phi = Zero(); WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); - Dw.ImportGauge(Umu); Dw.M(src, result); Dwc.M(src, chi); @@ -280,13 +385,24 @@ int main(int argc, char **argv) Dwc_prime.M(Omega * src, phi); WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); - Dw_prime.ImportGauge(U_prime); Dw_prime.M(Omega * src, result2); + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); err = chi - adj(Omega) * phi; - err2 = result - adj(Omega) * result2; - std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; - std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err2) << std::endl; + std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; @@ -296,7 +412,6 @@ int main(int argc, char **argv) phi = Zero(); err = Zero(); WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 - Dwc_csw0.ImportGauge(Umu); pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); @@ -316,7 +431,34 @@ int main(int argc, char **argv) setCheckerboard(src, src_o); err = chi - phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; @@ -348,9 +490,41 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = ref - phi; - std::cout << GridLogMessage << "ref (unpreconditioned operator) diff :" << norm2(ref) << std::endl; - std::cout << GridLogMessage << "phi (EO decomposition) diff :" << norm2(phi) << std::endl; - std::cout << GridLogMessage << "norm diff :" << norm2(err) << std::endl; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); Grid_finalize(); } diff --git a/tests/core/Test_wilson_exp_clover.cc b/tests/core/Test_wilson_exp_clover.cc new file mode 100644 index 00000000..8516d0dc --- /dev/null +++ b/tests/core/Test_wilson_exp_clover.cc @@ -0,0 +1,530 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/core/Test_wilson_exp_clover.cc + + Copyright (C) 2022 + + Author: Guido Cossu + Fabian Joswig + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main(int argc, char **argv) +{ + Grid_init(&argc, &argv); + + auto latt_size = GridDefaultLatt(); + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALF" << sizeof(RealF) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALD" << sizeof(RealD) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REAL" << sizeof(Real) << std::endl; + + std::vector seeds({1, 2, 3, 4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename WilsonExpCloverFermionR::FermionField FermionField; + typename WilsonExpCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + + FermionField src(&Grid); + random(pRNG, src); + FermionField result(&Grid); + result = Zero(); + FermionField result2(&Grid); + result2 = Zero(); + FermionField ref(&Grid); + ref = Zero(); + FermionField tmp(&Grid); + tmp = Zero(); + FermionField err(&Grid); + err = Zero(); + FermionField phi(&Grid); + random(pRNG, phi); + FermionField chi(&Grid); + random(pRNG, chi); + LatticeGaugeField Umu(&Grid); + SU::HotConfiguration(pRNG, Umu); + std::vector U(4, &Grid); + + double tolerance = 1e-4; + + double volume = 1; + for (int mu = 0; mu < Nd; mu++) + { + volume = volume * latt_size[mu]; + } + + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + + WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + FermionField src_e(&RBGrid); + FermionField src_o(&RBGrid); + FermionField r_e(&RBGrid); + FermionField r_o(&RBGrid); + FermionField r_eo(&Grid); + pickCheckerboard(Even, src_e, src); + pickCheckerboard(Odd, src_o, src); + + Dwc.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; + std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + FermionField chi_e(&RBGrid); + FermionField chi_o(&RBGrid); + + FermionField dchi_e(&RBGrid); + FermionField dchi_o(&RBGrid); + + FermionField phi_e(&RBGrid); + FermionField phi_o(&RBGrid); + + FermionField dphi_e(&RBGrid); + FermionField dphi_o(&RBGrid); + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Meooe(chi_e, dchi_o); + Dwc.Meooe(chi_o, dchi_e); + Dwc.MeooeDag(phi_e, dphi_o); + Dwc.MeooeDag(phi_o, dphi_e); + + ComplexD pDce = innerProduct(phi_e, dchi_e); + ComplexD pDco = innerProduct(phi_o, dchi_o); + ComplexD cDpe = innerProduct(chi_e, dphi_e); + ComplexD cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.Mooee(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.Mooee(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInvDag(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + ////////////////////// Gauge Transformation + std::vector seeds2({5, 6, 7, 8}); + GridParallelRNG pRNG2(&Grid); + pRNG2.SeedFixedIntegers(seeds2); + LatticeColourMatrix Omega(&Grid); + LatticeColourMatrix ShiftedOmega(&Grid); + LatticeGaugeField U_prime(&Grid); + U_prime = Zero(); + LatticeColourMatrix U_prime_mu(&Grid); + U_prime_mu = Zero(); + SU::LieRandomize(pRNG2, Omega, 1.0); + for (int mu = 0; mu < Nd; mu++) + { + U[mu] = peekLorentz(Umu, mu); + ShiftedOmega = Cshift(Omega, mu, 1); + U_prime_mu = Omega * U[mu] * adj(ShiftedOmega); + pokeLorentz(U_prime, U_prime_mu, mu); + } + ///////////////// + + WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_prime.Mooee(src_e, phi_e); + Dwc_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "=================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + + WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); + + Dw.M(src, result); + Dwc.M(src, chi); + + Dwc_prime.M(Omega * src, phi); + + WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); + Dw_prime.M(Omega * src, result2); + + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff WilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_csw0.Mooee(src_e, phi_e); + Dwc_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + Dwc.Meooe(src_o, phi_e); + Dwc.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Grid_finalize(); +} From 239e2c1ee6a031be03c67c4b8de16e9538323982 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:26:34 +0000 Subject: [PATCH 08/86] tests: wilson clover cg tests now include compact variant as well as exponential wilson clover operators --- tests/solver/Test_wilsonclover_cg_prec.cc | 27 +++++++++++++++-- tests/solver/Test_wilsonclover_cg_schur.cc | 32 ++++++++++++++++----- tests/solver/Test_wilsonclover_cg_unprec.cc | 27 +++++++++++++++-- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/tests/solver/Test_wilsonclover_cg_prec.cc b/tests/solver/Test_wilsonclover_cg_prec.cc index 5ef2dc09..abf64a1f 100644 --- a/tests/solver/Test_wilsonclover_cg_prec.cc +++ b/tests/solver/Test_wilsonclover_cg_prec.cc @@ -71,7 +71,12 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + // HermitianOperator HermOp(Dw); // ConjugateGradient CG(1.0e-8,10000); @@ -80,12 +85,28 @@ int main (int argc, char ** argv) LatticeFermion src_o(&RBGrid); LatticeFermion result_o(&RBGrid); pickCheckerboard(Odd,src_o,src); - result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO(Dw); + result_o=Zero(); CG(HermOpEO,src_o,result_o); - + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_compact(Dw_compact); + result_o=Zero(); + CG(HermOpEO_compact,src_o,result_o); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp(Dwe); + result_o=Zero(); + CG(HermOpEO_exp,src_o,result_o); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp_compact(Dwe_compact); + result_o=Zero(); + CG(HermOpEO_exp_compact,src_o,result_o); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_schur.cc b/tests/solver/Test_wilsonclover_cg_schur.cc index 567a8283..50d06af7 100644 --- a/tests/solver/Test_wilsonclover_cg_schur.cc +++ b/tests/solver/Test_wilsonclover_cg_schur.cc @@ -60,18 +60,36 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); - LatticeFermion result(&Grid); result=Zero(); - LatticeFermion resid(&Grid); - - RealD mass = -0.1; - RealD csw_r = 1.0; - RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + LatticeFermion result(&Grid); + LatticeFermion resid(&Grid); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); + RealD mass = -0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + RealD cF = 1.0; + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); SchurSolver(Dw,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dw_compact,src,result); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); + SchurSolver(Dwe,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dwe_compact,src,result); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_unprec.cc b/tests/solver/Test_wilsonclover_cg_unprec.cc index 755d80e1..2a859f11 100644 --- a/tests/solver/Test_wilsonclover_cg_unprec.cc +++ b/tests/solver/Test_wilsonclover_cg_unprec.cc @@ -59,7 +59,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); - LatticeFermion result(&Grid); result=Zero(); + LatticeFermion result(&Grid); LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; @@ -70,11 +70,34 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); - MdagMLinearOperator HermOp(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + MdagMLinearOperator HermOp(Dw); + result=Zero(); CG(HermOp,src,result); + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + MdagMLinearOperator HermOp_compact(Dw_compact); + result=Zero(); + CG(HermOp_compact,src,result); + + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp(Dwe); + result=Zero(); + CG(HermOp_exp,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp_compact(Dwe_compact); + result=Zero(); + CG(HermOp_exp_compact,src,result); + Grid_finalize(); } From 438caab25f66918dc5f29bd4a873f52baef2aeeb Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:27:18 +0000 Subject: [PATCH 09/86] generate_instantiations.sh now correctly produces instantiations for CompactClover variant, redundant instantiations removed. --- ...ilsonKernelsInstantiationWilsonAdjImplD.cc | 52 +------------------ ...ilsonKernelsInstantiationWilsonAdjImplF.cc | 52 +------------------ .../WilsonKernelsInstantiationWilsonImplD.cc | 52 +------------------ .../WilsonKernelsInstantiationWilsonImplF.cc | 52 +------------------ ...tiationWilsonTwoIndexAntiSymmetricImplD.cc | 52 +------------------ ...tiationWilsonTwoIndexAntiSymmetricImplF.cc | 52 +------------------ ...stantiationWilsonTwoIndexSymmetricImplD.cc | 52 +------------------ ...stantiationWilsonTwoIndexSymmetricImplF.cc | 52 +------------------ .../WilsonKernelsInstantiationZWilsonImplD.cc | 52 +------------------ .../WilsonKernelsInstantiationZWilsonImplF.cc | 52 +------------------ .../instantiation/generate_instantiations.sh | 24 +++++++-- 11 files changed, 29 insertions(+), 515 deletions(-) mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index a09fd420..d6845e75 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \ GparityWilsonImplF \ GparityWilsonImplD " +COMPACT_WILSON_IMPL_LIST=" \ + WilsonImplF \ + WilsonImplD " + DWF_IMPL_LIST=" \ WilsonImplF \ WilsonImplD \ @@ -40,13 +44,23 @@ EOF done -CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" +CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" for impl in $WILSON_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc +done +done + +CC_LIST="CompactWilsonCloverFermionInstantiation" + +for impl in $COMPACT_WILSON_IMPL_LIST +do +for f in $CC_LIST +do + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done @@ -63,14 +77,14 @@ for impl in $DWF_IMPL_LIST $GDWF_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done # overwrite the .cc file in Gparity directories for impl in $GDWF_IMPL_LIST do - ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc + ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc done @@ -84,7 +98,7 @@ for impl in $STAG_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done From d5b2323a579ca3b8e95fb6686a0fdf7d151a4503 Mon Sep 17 00:00:00 2001 From: Felix Ziegler Date: Mon, 7 Mar 2022 14:44:24 +0000 Subject: [PATCH 10/86] included Cayley-Hamilton exponentiation for the compact Wilson exp clover, bug fix for inverse of exp clover --- Grid/qcd/action/fermion/CloverHelpers.h | 296 +++++++++++++++++++----- 1 file changed, 240 insertions(+), 56 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 7f7ad2b7..134ee161 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -35,7 +35,7 @@ #include //////////////////////////////////////////// -// Standard Clover +// Standard Clover // (4+m0) + csw * clover_term // Exp Clover // (4+m0) * exp(csw/(4+m0) clover_term) @@ -46,10 +46,10 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////// -// Generic Standard Clover +// Generic Standard Clover ////////////////////////////////// -template +template class CloverHelpers: public WilsonCloverHelpers { public: @@ -61,7 +61,7 @@ public: static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { GridBase *grid = CloverTerm.Grid(); CloverTerm += diag_mass; - + int lvol = grid->lSites(); int DimRep = Impl::Dimension; { @@ -83,7 +83,7 @@ public: EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); } // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - + EigenInvCloverOp = EigenCloverOp.inverse(); //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) @@ -106,10 +106,10 @@ public: ////////////////////////////////// -// Generic Exp Clover +// Generic Exp Clover ////////////////////////////////// -template +template class ExpCloverHelpers: public WilsonCloverHelpers { public: @@ -122,9 +122,9 @@ public: // Can this be avoided? static void IdentityTimesC(const CloverField& in, RealD c) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa=0; i--) ExpClover = ExpClover * Clover + cn[i]; - + + // prepare inverse + CloverInv = (-1.0)*Clover; + Clover = ExpClover * diag_mass; - CloverInv = adj(ExpClover) * (1.0/diag_mass); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * CloverInv + cn[i]; + + CloverInv = ExpClover * (1.0/diag_mass); + } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { @@ -182,11 +192,11 @@ public: ////////////////////////////////// -// Compact Standard Clover +// Compact Standard Clover ////////////////////////////////// -template +template class CompactCloverHelpers: public CompactWilsonCloverHelpers, public WilsonCloverHelpers { public: @@ -197,7 +207,7 @@ public: typedef WilsonCloverHelpers Helpers; typedef CompactWilsonCloverHelpers CompactHelpers; - + static void MassTerm(CloverField& Clover, RealD diag_mass) { Clover += diag_mass; } @@ -219,10 +229,9 @@ public: }; ////////////////////////////////// -// Compact Exp Clover +// Compact Exp Clover ////////////////////////////////// - template class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { public: @@ -230,22 +239,28 @@ public: INHERIT_IMPL_TYPES(Impl); INHERIT_CLOVER_TYPES(Impl); INHERIT_COMPACT_CLOVER_TYPES(Impl); - - template using iImplClover = iScalar, Ns>>; + + template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; - + +/* static void plusIdentity(const CloverField& in) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa> &t, RealD R) {return getNMAX(1e-12,R);} static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} - - static void MassTerm(CloverField& Clover, RealD diag_mass) { - // csw/(diag_mass) * clover - Clover = Clover * (1.0/diag_mass); - } - - static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + + static void ExponentiateHermitean6by6(const iMatrix &arg, const RealD& alpha, const std::vector& cN, const int Niter, iMatrix& dest){ + + typedef iMatrix mat; + + RealD qn[6]; + RealD qnold[6]; + RealD p[5]; + RealD trA2, trA3, trA4; + + mat A2, A3, A4, A5; + A2 = alpha * alpha * arg * arg; + A3 = alpha * arg * A2; + A4 = A2 * A2; + A5 = A2 * A3; + + trA2 = toReal( trace(A2) ); + trA3 = toReal( trace(A3) ); + trA4 = toReal( trace(A4)); + + p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0; + p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0; + p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2; + p[3] = trA3 / 3.0; + p[4] = 0.5 * trA2; + + qnold[0] = cN[Niter]; + qnold[1] = 0.0; + qnold[2] = 0.0; + qnold[3] = 0.0; + qnold[4] = 0.0; + qnold[5] = 0.0; + + for(int i = Niter-1; i >= 0; i--) + { + qn[0] = p[0] * qnold[5] + cN[i]; + qn[1] = p[1] * qnold[5] + qnold[0]; + qn[2] = p[2] * qnold[5] + qnold[1]; + qn[3] = p[3] * qnold[5] + qnold[2]; + qn[4] = p[4] * qnold[5] + qnold[3]; + qn[5] = qnold[4]; + + qnold[0] = qn[0]; + qnold[1] = qn[1]; + qnold[2] = qn[2]; + qnold[3] = qn[3]; + qnold[4] = qn[4]; + qnold[5] = qn[5]; + } + + mat unit(1.0); + + dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5); + + } + + static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - - // To be optimized: implement exp in improved layout - // START: code to be replaced - CloverField Clover(grid), ExpClover(grid); - - CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); - - ExpClover = Clover; - plusIdentity(ExpClover); // 1 + Clover - - RealD pref = 1.0; - for (int i=2; i<=NMAX; i++) { - Clover = Clover * Clover; - pref /= (RealD)(i); - ExpClover += pref * Clover; + + // + // Implementation completely in Daniel's layout + // + + // Taylor expansion with Cayley-Hamilton recursion + // underlying Horner scheme as above + std::vector cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++){ + cn[i] = cn[i-1] / RealD(i); } - - // Convert the data layout of the clover terms - CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); - CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); - // END: code to be replaced - + + // Taken over from Daniel's implementation + conformable(Diagonal, Triangle); + + long lsites = grid->lSites(); + + typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; + typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; + typedef iMatrix mat; + + autoView(diagonal_v, Diagonal, CpuRead); + autoView(triangle_v, Triangle, CpuRead); + autoView(diagonalExp_v, Diagonal, CpuWrite); + autoView(triangleExp_v, Triangle, CpuWrite); + autoView(diagonalExpInv_v, DiagonalInv, CpuWrite); + autoView(triangleExpInv_v, TriangleInv, CpuWrite); + + thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite + + mat srcCloverOpUL(0.0); // upper left block + mat srcCloverOpLR(0.0); // lower right block + mat ExpCloverOp; + + scalar_object_diagonal diagonal_tmp = Zero(); + scalar_object_diagonal diagonal_exp_tmp = Zero(); + scalar_object_triangle triangle_tmp = Zero(); + scalar_object_triangle triangle_exp_tmp = Zero(); + + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + + peekLocalSite(diagonal_tmp, diagonal_v, lcoor); + peekLocalSite(triangle_tmp, triangle_v, lcoor); + + // block = 0 + int block; + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpUL(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpUL(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + // block = 1 + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpLR(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpLR(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + + // exp(Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); + + // inverse exp(-Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExpInv_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExpInv_v, lcoor); + }); + Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; + DiagonalInv = DiagonalInv*(1.0/diag_mass); TriangleInv = TriangleInv*(1.0/diag_mass); } - + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); } - + }; From 56c089d347aa7f40cb17b05f12dec7021a356066 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 7 Mar 2022 16:29:19 +0000 Subject: [PATCH 11/86] Removed leftover comments --- Grid/qcd/action/fermion/CloverHelpers.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 134ee161..f7f81421 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -74,7 +74,7 @@ public: Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); peekLocalSite(Qx, CTv, lcoor); - //if (csw!=0){ + for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) @@ -82,17 +82,13 @@ public: auto zz = Qx()(j, k)(a, b); EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } pokeLocalSite(Qxinv, CTIv, lcoor); }); } @@ -153,7 +149,6 @@ public: int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); - // csw/(diag_mass) * clover Clover *= (1.0/diag_mass); // Taylor expansion, slow but generic @@ -243,19 +238,6 @@ public: template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; -/* - static void plusIdentity(const CloverField& in) { - int DimRep = Impl::Dimension; - - autoView(in_v, in, AcceleratorWrite); - - accelerator_for(ss, in.Grid()->oSites(), 1, { - for (int sa=0; sa Date: Mon, 7 Mar 2022 17:43:33 +0000 Subject: [PATCH 12/86] Reintroduced explicit inversion of the Clover term in case of the CompactExpClover because of the open boundary O(a) improvement. Changed the timing output to GridLogDebug --- Grid/qcd/action/fermion/CloverHelpers.h | 46 ++----------------- ...CompactWilsonCloverFermionImplementation.h | 41 +++++++++-------- .../WilsonCloverFermionImplementation.h | 19 ++++---- 3 files changed, 34 insertions(+), 72 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index f7f81421..ad456bde 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -207,13 +207,13 @@ public: Clover += diag_mass; } - static void Instantiate(CloverDiagonalField& Diagonal, + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { - // Invert the clover term in the improved layout - CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + + // Do nothing } // TODO: implement Cmunu for better performances with compact layout, but don't do it @@ -313,7 +313,7 @@ public: } - static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { @@ -345,8 +345,6 @@ public: autoView(triangle_v, Triangle, CpuRead); autoView(diagonalExp_v, Diagonal, CpuWrite); autoView(triangleExp_v, Triangle, CpuWrite); - autoView(diagonalExpInv_v, DiagonalInv, CpuWrite); - autoView(triangleExpInv_v, TriangleInv, CpuWrite); thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite @@ -421,46 +419,10 @@ public: pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); - - // inverse exp(-Clover) - - ExponentiateHermitean6by6(srcCloverOpUL,-1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 0; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - ExponentiateHermitean6by6(srcCloverOpLR,-1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 1; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - pokeLocalSite(diagonal_exp_tmp, diagonalExpInv_v, lcoor); - pokeLocalSite(triangle_exp_tmp, triangleExpInv_v, lcoor); }); Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; - - DiagonalInv = DiagonalInv*(1.0/diag_mass); - TriangleInv = TriangleInv*(1.0/diag_mass); } diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index b42957ac..5120680b 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -326,16 +326,20 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie double t4 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); - // Possible modify the boundary values + // Exponentiate the clover (nothing happens in case of the standard clover) double t5 = usecond(); + CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + + // Possible modify the boundary values + double t6 = usecond(); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Instantiate based on clover policy - double t6 = usecond(); - CloverHelpers::Instantiate(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + // Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions) + double t7 = usecond(); + CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); // Fill the remaining clover fields - double t7 = usecond(); + double t8 = usecond(); pickCheckerboard(Even, DiagonalEven, Diagonal); pickCheckerboard(Even, TriangleEven, Triangle); pickCheckerboard(Odd, DiagonalOdd, Diagonal); @@ -346,20 +350,19 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); // Report timings - double t8 = usecond(); -#if 0 - std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", convert = " << (t5 - t4) / 1e6 - << ", boundaries = " << (t6 - t5) / 1e6 - << ", instantiate = " << (t7 - t6) / 1e6 - << ", pick cbs = " << (t8 - t7) / 1e6 - << ", total = " << (t8 - t0) / 1e6 - << std::endl; -#endif + double t9 = usecond(); + + std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl; + std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl; } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 0991e274..2a7e7535 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -150,17 +150,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Um pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); double t6 = usecond(); -#if 0 - std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", instantiation = " << (t5 - t4) / 1e6 - << ", pick cbs = " << (t6 - t5) / 1e6 - << ", total = " << (t6 - t0) / 1e6 - << std::endl; -#endif + std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl; } template From 0c0c2b1e20d3290ca2d54fe3a8a9339aabc6d9dd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 8 Mar 2022 09:44:51 +0000 Subject: [PATCH 13/86] Unnecessary arguments of CloverHelpers::Exponentiate_Clover removed. --- Grid/qcd/action/fermion/CloverHelpers.h | 6 +----- .../CompactWilsonCloverFermionImplementation.h | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index ad456bde..cd62f515 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -209,8 +209,6 @@ public: static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, - CloverDiagonalField& DiagonalInv, - CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { // Do nothing @@ -313,9 +311,7 @@ public: } - static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, - CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, - RealD csw_t, RealD diag_mass) { + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) { GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 5120680b..2a53a254 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -328,7 +328,7 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie // Exponentiate the clover (nothing happens in case of the standard clover) double t5 = usecond(); - CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass); // Possible modify the boundary values double t6 = usecond(); From 76c294a7bae149e9e52b980396a4265c77d3d2d1 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Tue, 8 Mar 2022 13:55:16 +0100 Subject: [PATCH 14/86] open bc fix --- .../CompactWilsonCloverFermionImplementation.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 3dfcb133..4e534f6e 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -64,8 +64,11 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, csw_r /= clover_anisotropy.xi_0; ImportGauge(_Umu); - if (open_boundaries) + if (open_boundaries) { + this->BoundaryMaskEven.Checkerboard() = Even; + this->BoundaryMaskOdd.Checkerboard() = Odd; CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); + } } template From 317bdcf158165d30ae1fd009026b253384357e08 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Thu, 24 Mar 2022 13:10:47 +0100 Subject: [PATCH 15/86] nerscio parametrization --- Grid/parallelIO/NerscIO.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h index 99011e25..dabef9e5 100644 --- a/Grid/parallelIO/NerscIO.h +++ b/Grid/parallelIO/NerscIO.h @@ -209,26 +209,27 @@ public: template static inline void writeConfiguration(Lattice &Umu, std::string file, - std::string ens_label = std::string("DWF")) + std::string ens_label = std::string("DWF"), + std::string ens_id = std::string("UKQCD"), + unsigned int sequence_number = 1) { - writeConfiguration(Umu,file,0,1,ens_label); + writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number); } template static inline void writeConfiguration(Lattice &Umu, std::string file, int two_row, int bits32, - std::string ens_label = std::string("DWF")) + std::string ens_label = std::string("DWF"), + std::string ens_id = std::string("UKQCD"), + unsigned int sequence_number = 1) { typedef vLorentzColourMatrixD vobj; typedef typename vobj::scalar_object sobj; FieldMetaData header; - /////////////////////////////////////////// - // Following should become arguments - /////////////////////////////////////////// - header.sequence_number = 1; - header.ensemble_id = std::string("UKQCD"); + header.sequence_number = sequence_number; + header.ensemble_id = ens_id; header.ensemble_label = ens_label; typedef LorentzColourMatrixD fobj3D; From 0542eaf1da28daad4f6e346b78c2c8317e9566fe Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:02:09 +0100 Subject: [PATCH 16/86] First version of conserved current contraction for Wilson type quarks --- .../WilsonFermionImplementation.h | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 84ac25c1..12e65d24 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -603,7 +603,37 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + + PropagatorField tmp_shifted(UGrid); + PropagatorField g5Lg5(UGrid); + PropagatorField R(UGrid); + PropagatorField gmuR(UGrid); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + tmp_shifted=Cshift(q_in_1,mu,1); + g5Lg5=g5*tmp_shifted*g5; + R=q_in_2; + gmuR=gmu*R; + + qout=adj(g5Lg5)*R; + qout+=adj(g5Lg5)*gmuR; + + g5Lg5=g5*q_in_1*g5; + tmp_shifted=Cshift(q_in_2,mu,1); + Impl::multLinkField(R,this->Umu,tmp_shifted,mu); + gmuR=gmu*R; + + qout-=adj(g5Lg5)*R; + qout+=adj(g5Lg5)*gmuR; + + qout/=2; } From cdf31d52c15109102b31f88b135bbaff8f216d30 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:04:35 +0100 Subject: [PATCH 17/86] GaugeGrid and typo fixed --- .../implementation/WilsonFermionImplementation.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 12e65d24..60ccdc2e 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -603,6 +603,7 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); + auto UGrid= this->GaugeGrid(); PropagatorField tmp_shifted(UGrid); PropagatorField g5Lg5(UGrid); @@ -622,18 +623,18 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, R=q_in_2; gmuR=gmu*R; - qout=adj(g5Lg5)*R; - qout+=adj(g5Lg5)*gmuR; + q_out=adj(g5Lg5)*R; + q_out+=adj(g5Lg5)*gmuR; g5Lg5=g5*q_in_1*g5; tmp_shifted=Cshift(q_in_2,mu,1); Impl::multLinkField(R,this->Umu,tmp_shifted,mu); gmuR=gmu*R; - qout-=adj(g5Lg5)*R; - qout+=adj(g5Lg5)*gmuR; + q_out-=adj(g5Lg5)*R; + q_out+=adj(g5Lg5)*gmuR; - qout/=2; + q_out/=2; } From fe993c0836fe6c234e5e34112beecf0a5d98cadd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:08:17 +0100 Subject: [PATCH 18/86] /=2 replaced by *=0.5 --- .../action/fermion/implementation/WilsonFermionImplementation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 60ccdc2e..3e7bb291 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -634,7 +634,7 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, q_out-=adj(g5Lg5)*R; q_out+=adj(g5Lg5)*gmuR; - q_out/=2; + q_out*=0.5; } From 603fd967477fd2b5b8ec06dfbcb948c13b46b151 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 10:58:56 +0100 Subject: [PATCH 19/86] Missing link multiplication added. --- .../fermion/implementation/WilsonFermionImplementation.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 3e7bb291..c82dd172 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -619,7 +619,8 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Gamma gmu=Gamma(Gmu[mu]); tmp_shifted=Cshift(q_in_1,mu,1); - g5Lg5=g5*tmp_shifted*g5; + Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu); + g5Lg5=g5*g5Lg5*g5; R=q_in_2; gmuR=gmu*R; From 9e82c468ab8ad6d929e982b1759c0049da7f6254 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 15:54:43 +0100 Subject: [PATCH 20/86] Multiplication of diagonal mass in exponentiate fixed for gpus --- Grid/qcd/action/fermion/CloverHelpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index cd62f515..72727279 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -417,8 +417,8 @@ public: pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); - Diagonal = Diagonal * diag_mass; - Triangle = Triangle * diag_mass; + Diagonal *= diag_mass; + Triangle *= diag_mass; } From 427c8695feaba4de96e865a3754e07f669e6e121 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 16:20:21 +0100 Subject: [PATCH 21/86] Change signs and prefactors for conserved current to mimic the 5d version. --- .../WilsonFermionImplementation.h | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index c82dd172..40e774b3 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -618,24 +618,22 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, }; Gamma gmu=Gamma(Gmu[mu]); + g5Lg5=g5*q_in_1*g5; + tmp_shifted=Cshift(q_in_2,mu,1); + Impl::multLinkField(R,this->Umu,tmp_shifted,mu); + gmuR=gmu*R; + + q_out=adj(g5Lg5)*R; + q_out-=adj(g5Lg5)*gmuR; + tmp_shifted=Cshift(q_in_1,mu,1); Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu); g5Lg5=g5*g5Lg5*g5; R=q_in_2; gmuR=gmu*R; - q_out=adj(g5Lg5)*R; - q_out+=adj(g5Lg5)*gmuR; - - g5Lg5=g5*q_in_1*g5; - tmp_shifted=Cshift(q_in_2,mu,1); - Impl::multLinkField(R,this->Umu,tmp_shifted,mu); - gmuR=gmu*R; - q_out-=adj(g5Lg5)*R; - q_out+=adj(g5Lg5)*gmuR; - - q_out*=0.5; + q_out-=adj(g5Lg5)*gmuR; } From 6577a03d1641df1f94cc53f1accd0d5485e325d0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 18:39:12 +0100 Subject: [PATCH 22/86] Explcitly closed views in Exponentiate_Clover --- Grid/qcd/action/fermion/CloverHelpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 72727279..67417fcd 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -417,6 +417,11 @@ public: pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); + diagonal_v.ViewClose(); + triangle_v.ViewClose(); + diagonalExp_v.ViewClose(); + triangleExp_v.ViewClose(); + Diagonal *= diag_mass; Triangle *= diag_mass; } From f23626a6b80a653c68d2a9d2860dc65c5be98f09 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sat, 2 Apr 2022 11:32:15 +0100 Subject: [PATCH 23/86] End scope by additional block in CloverHelpers.h --- Grid/qcd/action/fermion/CloverHelpers.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 67417fcd..c72a79e8 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -332,7 +332,7 @@ public: conformable(Diagonal, Triangle); long lsites = grid->lSites(); - + { typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; typedef iMatrix mat; @@ -416,11 +416,7 @@ public: pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); - - diagonal_v.ViewClose(); - triangle_v.ViewClose(); - diagonalExp_v.ViewClose(); - triangleExp_v.ViewClose(); + } Diagonal *= diag_mass; Triangle *= diag_mass; From c8a824425bbb5b543d41340d91a78ecc9273011e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 10:58:22 +0100 Subject: [PATCH 24/86] Error message added if another conserved current than vector is requested for Wilson type fermions. --- .../WilsonFermionImplementation.h | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 40e774b3..69a7fb16 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -599,16 +599,22 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Current curr_type, unsigned int mu) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + Gamma g5(Gamma::Algebra::Gamma5); conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); auto UGrid= this->GaugeGrid(); - PropagatorField tmp_shifted(UGrid); - PropagatorField g5Lg5(UGrid); - PropagatorField R(UGrid); - PropagatorField gmuR(UGrid); + PropagatorField tmp_shifted(UGrid); + PropagatorField g5Lg5(UGrid); + PropagatorField R(UGrid); + PropagatorField gmuR(UGrid); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -647,6 +653,12 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, unsigned int tmax, ComplexField &lattice_cmplx) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); assert(0); From d7191e5a026412c6c6c76a1f29961b4cb007506c Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 11:48:56 +0100 Subject: [PATCH 25/86] SeqConservedCurrent implemented for Wilson fermions --- .../WilsonFermionImplementation.h | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 69a7fb16..6bfb9305 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -659,9 +659,45 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, exit(1); } + int tshift = (mu == Nd-1) ? 1 : 0; + unsigned int LLt = GridDefaultLatt()[Tp]; conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + auto UGrid= this->GaugeGrid(); + + PropagatorField tmp(UGrid); + PropagatorField Utmp(UGrid); + PropagatorField L(UGrid); + PropagatorField zz (UGrid); + zz=Zero(); + LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + tmp = Cshift(q_in,mu,1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu); + tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop + tmp = where((lcoor>=tmin),tmp,zz); // Mask the time + q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated + + tmp = q_in *lattice_cmplx; + tmp = Cshift(tmp,mu,-1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link + tmp = -( Utmp + gmu*Utmp ); + // Mask the time + if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice + unsigned int t0 = 0; + tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz); + } else { + tmp = where((lcoor>=tmin+tshift),tmp,zz); + } + q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated } NAMESPACE_END(Grid); From 82aecbf4cfdb76a44c90855292e5c08628e48b62 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 15:26:39 +0100 Subject: [PATCH 26/86] Test_wilson_conserved_current added --- tests/core/Test_wilson_conserved_current.cc | 290 ++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 tests/core/Test_wilson_conserved_current.cc diff --git a/tests/core/Test_wilson_conserved_current.cc b/tests/core/Test_wilson_conserved_current.cc new file mode 100644 index 00000000..3eba4fdc --- /dev/null +++ b/tests/core/Test_wilson_conserved_current.cc @@ -0,0 +1,290 @@ +/************************************************************************************* +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_cayley_cg.cc + +Copyright (C) 2022 + +Author: Peter Boyle +Author: Fabian Joswig + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; + + +template +void TestConserved(What & Ddwf, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5, + What *Ddwfrev=nullptr); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout< omegas; + std::vector < ComplexD > omegasrev(Ls); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplexF::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + std::vector seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::HotConfiguration(RNG4,Umu); + } + + RealD mass=0.3; + RealD M5 =1.0; + std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} + + + +template +void TestConserved(Action & Ddwf, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5, + Action * Ddwfrev) +{ + LatticePropagator phys_src(UGrid); + LatticePropagator seqsrc(FGrid); + LatticePropagator prop5(FGrid); + LatticePropagator prop5rev(FGrid); + LatticePropagator prop4(UGrid); + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + LatticeComplex PA (UGrid); + LatticeComplex SV (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + LatticePropagator seqprop(UGrid); + + SpinColourMatrix kronecker; kronecker=1.0; + Coordinate coor({0,0,0,0}); + phys_src=Zero(); + pokeSite(kronecker,phys_src,coor); + + ConjugateGradient CG(1.0e-16,100000); + SchurRedBlackDiagTwoSolve schur(CG); + ZeroGuesser zpg; + for(int s=0;s(src4,phys_src,s,c); + + LatticeFermion src5 (FGrid); + Ddwf.ImportPhysicalFermionSource(src4,src5); + + LatticeFermion result5(FGrid); result5=Zero(); + schur(Ddwf,src5,result5,zpg); + std::cout<(prop5,result5,s,c); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(prop4,result4,s,c); + + if( Ddwfrev ) { + Ddwfrev->ImportPhysicalFermionSource(src4,src5); + result5 = Zero(); + schur(*Ddwfrev,src5,result5,zpg); + } + FermToProp(prop5rev,result5,s,c); + } + } + + auto curr = Current::Vector; + const int mu_J=0; + const int t_J=0; + + LatticeComplex ph (UGrid); ph=1.0; + + Ddwf.SeqConservedCurrent(prop5, + seqsrc, + phys_src, + curr, + mu_J, + t_J, + t_J,// whole lattice + ph); + + for(int s=0;s(src5,seqsrc,s,c); + + LatticeFermion result5(FGrid); result5=Zero(); + schur(Ddwf,src5,result5,zpg); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(seqprop,result4,s,c); + } + } + + Gamma g5(Gamma::Algebra::Gamma5); + Gamma gT(Gamma::Algebra::GammaT); + + std::vector sumPA; + std::vector sumSV; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Ddwf.ContractConservedCurrent(prop5rev,prop5,Axial_mu,phys_src,Current::Axial,Tdir); + Ddwf.ContractConservedCurrent(prop5rev,prop5,Vector_mu,phys_src,Current::Vector,Tdir); + Ddwf.ContractJ5q(prop5,PJ5q); + + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + SV = trace(Vector_mu); // Scalar-Vector conserved current + VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current + PP = trace(adj(prop4)*prop4); // Pseudoscalar density + + // Spatial sum + sliceSum(PA,sumPA,Tdir); + sliceSum(SV,sumSV,Tdir); + sliceSum(VV,sumVV,Tdir); + sliceSum(PP,sumPP,Tdir); + sliceSum(PJ5q,sumPJ5q,Tdir); + + const int Nt{static_cast(sumPA.size())}; + std::cout< check_buf; + + test_S = trace(qSite*g); + test_V = trace(qSite*g*Gamma::gmu[mu_J]); + + Ddwf.ContractConservedCurrent(prop5rev,prop5,cur,phys_src,curr,mu_J); + + c = trace(cur*g); + sliceSum(c, check_buf, Tp); + check_S = TensorRemove(check_buf[t_J]); + + auto gmu=Gamma::gmu[mu_J]; + c = trace(cur*g*gmu); + sliceSum(c, check_buf, Tp); + check_V = TensorRemove(check_buf[t_J]); + + + std::cout< -#include using namespace std; using namespace Grid; template -void TestConserved(What & Ddwf, +void TestConserved(What & Dw, LatticeGaugeField &Umu, - GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, - RealD mass, RealD M5, - GridParallelRNG *RNG4, - GridParallelRNG *RNG5, - What *Ddwfrev=nullptr); + GridParallelRNG *RNG4); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -57,28 +52,12 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< omegas; - std::vector < ComplexD > omegasrev(Ls); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); - GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); - GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - - - GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), - GridDefaultSimd(Nd,vComplexF::Nsimd()), - GridDefaultMpi()); - GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); - GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); - GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); - std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG4(UGrid); std::vector seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4); @@ -95,19 +74,41 @@ int main (int argc, char ** argv) SU::HotConfiguration(RNG4,Umu); } - RealD mass=0.3; - RealD M5 =1.0; - std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + typename WilsonCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; - std::cout<(Dw,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(Dwc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dwcc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewcc,Umu,UGrid,UrbGrid,&RNG4); Grid_finalize(); } @@ -115,27 +116,17 @@ int main (int argc, char ** argv) template -void TestConserved(Action & Ddwf, +void TestConserved(Action & Dw, LatticeGaugeField &Umu, - GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, - RealD mass, RealD M5, - GridParallelRNG *RNG4, - GridParallelRNG *RNG5, - Action * Ddwfrev) + GridParallelRNG *RNG4) { LatticePropagator phys_src(UGrid); - LatticePropagator seqsrc(FGrid); - LatticePropagator prop5(FGrid); - LatticePropagator prop5rev(FGrid); + LatticePropagator seqsrc(UGrid); LatticePropagator prop4(UGrid); - LatticePropagator Axial_mu(UGrid); LatticePropagator Vector_mu(UGrid); - LatticeComplex PA (UGrid); LatticeComplex SV (UGrid); LatticeComplex VV (UGrid); - LatticeComplex PJ5q(UGrid); - LatticeComplex PP (UGrid); LatticePropagator seqprop(UGrid); SpinColourMatrix kronecker; kronecker=1.0; @@ -151,25 +142,11 @@ void TestConserved(Action & Ddwf, LatticeFermion src4 (UGrid); PropToFerm(src4,phys_src,s,c); - LatticeFermion src5 (FGrid); - Ddwf.ImportPhysicalFermionSource(src4,src5); - - LatticeFermion result5(FGrid); result5=Zero(); - schur(Ddwf,src5,result5,zpg); - std::cout<(prop5,result5,s,c); - - LatticeFermion result4(UGrid); - Ddwf.ExportPhysicalFermionSolution(result5,result4); + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); + std::cout<(prop4,result4,s,c); - - if( Ddwfrev ) { - Ddwfrev->ImportPhysicalFermionSource(src4,src5); - result5 = Zero(); - schur(*Ddwfrev,src5,result5,zpg); - } - FermToProp(prop5rev,result5,s,c); } } @@ -179,7 +156,7 @@ void TestConserved(Action & Ddwf, LatticeComplex ph (UGrid); ph=1.0; - Ddwf.SeqConservedCurrent(prop5, + Dw.SeqConservedCurrent(prop4, seqsrc, phys_src, curr, @@ -191,14 +168,12 @@ void TestConserved(Action & Ddwf, for(int s=0;s(src5,seqsrc,s,c); + LatticeFermion src4 (UGrid); + PropToFerm(src4,seqsrc,s,c); - LatticeFermion result5(FGrid); result5=Zero(); - schur(Ddwf,src5,result5,zpg); + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); - LatticeFermion result4(UGrid); - Ddwf.ExportPhysicalFermionSolution(result5,result4); FermToProp(seqprop,result4,s,c); } } @@ -206,46 +181,32 @@ void TestConserved(Action & Ddwf, Gamma g5(Gamma::Algebra::Gamma5); Gamma gT(Gamma::Algebra::GammaT); - std::vector sumPA; std::vector sumSV; std::vector sumVV; - std::vector sumPP; - std::vector sumPJ5q; - Ddwf.ContractConservedCurrent(prop5rev,prop5,Axial_mu,phys_src,Current::Axial,Tdir); - Ddwf.ContractConservedCurrent(prop5rev,prop5,Vector_mu,phys_src,Current::Vector,Tdir); - Ddwf.ContractJ5q(prop5,PJ5q); + Dw.ContractConservedCurrent(prop4,prop4,Vector_mu,phys_src,Current::Vector,Tdir); - PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current SV = trace(Vector_mu); // Scalar-Vector conserved current VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current - PP = trace(adj(prop4)*prop4); // Pseudoscalar density // Spatial sum - sliceSum(PA,sumPA,Tdir); sliceSum(SV,sumSV,Tdir); sliceSum(VV,sumVV,Tdir); - sliceSum(PP,sumPP,Tdir); - sliceSum(PJ5q,sumPJ5q,Tdir); - const int Nt{static_cast(sumPA.size())}; + const int Nt{static_cast(sumSV.size())}; + std::cout< &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { GridBase *grid = U[0].Grid(); From 8b12a61097b25f5a3916f587c312336bf77f78ed Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 9 May 2022 11:53:22 +0100 Subject: [PATCH 29/86] fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am --- Grid/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/Makefile.am b/Grid/Makefile.am index aff5b9d0..7c3c151b 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -70,7 +70,7 @@ endif lib_LIBRARIES = libGrid.a CCFILES += $(extra_sources) -HFILES += $(extra_headers) +HFILES += $(extra_headers) Config.h Version.h libGrid_a_SOURCES = $(CCFILES) libGrid_adir = $(includedir)/Grid From 32facbd02a96595ff3fdb92aaa310bbcd0d356d3 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 10 May 2022 10:53:22 +0100 Subject: [PATCH 30/86] fix: assert for dimensions of compact Wilson clover moved to constructor. --- Grid/qcd/action/fermion/WilsonCloverTypes.h | 2 -- .../implementation/CompactWilsonCloverFermionImplementation.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonCloverTypes.h b/Grid/qcd/action/fermion/WilsonCloverTypes.h index a5a95d93..5ce3b91b 100644 --- a/Grid/qcd/action/fermion/WilsonCloverTypes.h +++ b/Grid/qcd/action/fermion/WilsonCloverTypes.h @@ -47,8 +47,6 @@ class CompactWilsonCloverTypes { public: INHERIT_IMPL_TYPES(Impl); - static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions"); - static constexpr int Nred = Nc * Nhs; // 6 static constexpr int Nblock = Nhs; // 2 static constexpr int Ndiagonal = Nred; // 6 diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 2496f3f1..e4864730 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -59,6 +59,8 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(Gaug , BoundaryMask(&Fgrid) , BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid) { + assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3); + csw_r *= 0.5; csw_t *= 0.5; if (clover_anisotropy.isAnisotropic) From b051e00de044bee7a6756b9d51f943fe5189b32d Mon Sep 17 00:00:00 2001 From: James Richings Date: Mon, 16 May 2022 00:25:13 +0100 Subject: [PATCH 31/86] Additional Local Coherance Deflation operator() --- Grid/algorithms/iterative/Deflation.h | 38 ++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index 2eb28bf9..a69d1549 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -113,7 +113,43 @@ public: blockPromote(guess_coarse,guess,subspace); guess.Checkerboard() = src.Checkerboard(); }; -}; + + void operator()(const std::vector &src,std::vector &guess) { + int Nevec = (int)evec_coarse.size(); + int Nsrc = (int)src.size(); + // make temp variables + std::vector src_coarse(Nsrc,evec_coarse[0].Grid()); + std::vector guess_coarse(Nsrc,evec_coarse[0].Grid()); + //Preporcessing + std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl; + for (int j=0;j Date: Mon, 16 May 2022 00:28:28 +0100 Subject: [PATCH 32/86] bugfix: eo operator called in correct location --- benchmarks/Benchmark_wilson_sweep.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index 986a1831..d88e2d39 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -110,8 +110,8 @@ int main (int argc, char ** argv) bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << "\t"; // EO - bench_wilson(src,result,Dw,volume,DaggerNo); - bench_wilson(src,result,Dw,volume,DaggerYes); + bench_wilson_eo(src,result,Dw,volume,DaggerNo); + bench_wilson_eo(src,result,Dw,volume,DaggerYes); std::cout << std::endl; } } From 4b1997e2f3a23abc0ee61a7b19b612b50548c5a5 Mon Sep 17 00:00:00 2001 From: James Richings Date: Mon, 16 May 2022 15:58:33 +0100 Subject: [PATCH 33/86] wilson sweep test --- benchmarks/Benchmark_wilson_sweep.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index d88e2d39..50ad1d03 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -44,6 +44,13 @@ void bench_wilson ( double const volume, int const dag ); +void bench_wilson_eo ( + LatticeFermion & src, + LatticeFermion & result, + WilsonFermionR & Dw, + double const volume, + int const dag ); + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -110,8 +117,8 @@ int main (int argc, char ** argv) bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << "\t"; // EO - bench_wilson_eo(src,result,Dw,volume,DaggerNo); - bench_wilson_eo(src,result,Dw,volume,DaggerYes); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerNo); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerYes); std::cout << std::endl; } } From b8ee19691c31e54ecd7fb009828d2d805b0f6ca2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 17 May 2022 09:08:12 -0700 Subject: [PATCH 34/86] Updated config for PM --- systems/Perlmutter/config-command | 4 ++++ systems/Perlmutter/dwf4.slurm | 24 ++++++++++-------------- systems/Perlmutter/sourceme.sh | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/systems/Perlmutter/config-command b/systems/Perlmutter/config-command index b399c535..b62704dc 100644 --- a/systems/Perlmutter/config-command +++ b/systems/Perlmutter/config-command @@ -1,9 +1,13 @@ +DIR=`pwd` +PREFIX=$DIR/../Prequisites/install/ ../../configure \ --enable-comms=mpi \ --enable-simd=GPU \ --enable-shm=nvlink \ --enable-gen-simd-width=64 \ --enable-accelerator=cuda \ + --disable-accelerator-cshift \ + --with-gmp=$PREFIX \ --disable-fermion-reps \ --disable-unified \ --disable-gparity \ diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index ba198595..f6d6a2a9 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -1,24 +1,20 @@ #!/bin/bash -#SBATCH -A mp13 +#SBATCH -A m3886_g #SBATCH -C gpu -#SBATCH -q regular -#SBATCH -t 0:20:00 -#SBATCH -n 16 +#SBATCH -q debug +#SBATCH -t 0:10:00 +#SBATCH -n 4 #SBATCH --ntasks-per-node=4 #SBATCH -c 32 #SBATCH --exclusive #SBATCH --gpus-per-task=1 -#SBATCH --gpu-bind=map_gpu:0,1,2,3 +#SBATCH --gpu-bind=none export SLURM_CPU_BIND="cores" -export MPICH_RDMA_ENABLED_CUDA=1 export MPICH_GPU_SUPPORT_ENABLED=1 -srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node - -OPT="--comms-overlap --comms-concurrent --shm-mpi 0" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0 - +export MPICH_RDMA_ENABLED_CUDA=0 +export MPICH_GPU_IPC_ENABLED=0 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=1 OPT="--comms-overlap --comms-concurrent --shm-mpi 1" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1 +srun ./benchmarks/Benchmark_ITT --mpi 2.1.1.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > ITT.log diff --git a/systems/Perlmutter/sourceme.sh b/systems/Perlmutter/sourceme.sh index 9359dea9..6d09b1c9 100644 --- a/systems/Perlmutter/sourceme.sh +++ b/systems/Perlmutter/sourceme.sh @@ -1,4 +1,4 @@ export CRAY_ACCEL_TARGET=nvidia80 -module load PrgEnv-gnu cpe-cuda cuda +module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4 From 79e34b3eb44b0c62a316736ffa5b0a7c0e2249f7 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Thu, 19 May 2022 14:53:17 +0100 Subject: [PATCH 35/86] Local Coherence batch deflation --- Grid/algorithms/iterative/Deflation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index a69d1549..1a8f97c9 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -128,7 +128,7 @@ public: std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl; blockProject(src_coarse[j],src[j],subspace); } - //deflation set up for eigen vector batchsize 1 and source batch size equal number of batches + //deflation set up for eigen vector batchsize 1 and source batch size equal number of sources std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl; for (int i=0;i Date: Thu, 19 May 2022 16:44:39 -0700 Subject: [PATCH 36/86] Updated for new Dirichlet interface --- benchmarks/Benchmark_dwf_fp32.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_dwf_fp32.cc b/benchmarks/Benchmark_dwf_fp32.cc index f79797fa..9695cf5a 100644 --- a/benchmarks/Benchmark_dwf_fp32.cc +++ b/benchmarks/Benchmark_dwf_fp32.cc @@ -249,8 +249,9 @@ void Benchmark(int Ls, Coordinate Dirichlet) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" < Date: Thu, 19 May 2022 16:45:02 -0700 Subject: [PATCH 37/86] Dirichlet BCs --- Grid/qcd/action/ActionParams.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Grid/qcd/action/ActionParams.h b/Grid/qcd/action/ActionParams.h index 0e6a11c6..180dbbbf 100644 --- a/Grid/qcd/action/ActionParams.h +++ b/Grid/qcd/action/ActionParams.h @@ -37,24 +37,32 @@ NAMESPACE_BEGIN(Grid); // These can move into a params header and be given MacroMagic serialisation struct GparityWilsonImplParams { Coordinate twists; - GparityWilsonImplParams() : twists(Nd, 0) {}; + Coordinate dirichlet; // Blocksize of dirichlet BCs + GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {}; }; struct WilsonImplParams { bool overlapCommsCompute; + Coordinate dirichlet; // Blocksize of dirichlet BCs AcceleratorVector twist_n_2pi_L; AcceleratorVector boundary_phases; WilsonImplParams() { + dirichlet.resize(Nd,0); boundary_phases.resize(Nd, 1.0); twist_n_2pi_L.resize(Nd, 0.0); }; WilsonImplParams(const AcceleratorVector phi) : boundary_phases(phi), overlapCommsCompute(false) { twist_n_2pi_L.resize(Nd, 0.0); + dirichlet.resize(Nd,0); } }; struct StaggeredImplParams { - StaggeredImplParams() {}; + Coordinate dirichlet; // Blocksize of dirichlet BCs + StaggeredImplParams() + { + dirichlet.resize(Nd,0); + }; }; struct OneFlavourRationalParams : Serializable { From 2594e3c23091897b03744f707a5b30227b2db328 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 19 May 2022 16:45:19 -0700 Subject: [PATCH 38/86] Dirichlet option --- Grid/stencil/SimpleCompressor.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Grid/stencil/SimpleCompressor.h b/Grid/stencil/SimpleCompressor.h index 1150b234..b36d954f 100644 --- a/Grid/stencil/SimpleCompressor.h +++ b/Grid/stencil/SimpleCompressor.h @@ -52,6 +52,11 @@ public: return arg; } }; +class SimpleStencilParams{ +public: + Coordinate dirichlet; + SimpleStencilParams() {}; +}; NAMESPACE_END(Grid); From b52e8ef65a42ecd10c97b0ea594d5a8ebe5a1a39 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 19 May 2022 16:45:41 -0700 Subject: [PATCH 39/86] Dirichlet changes --- examples/Example_Laplacian.cc | 9 +++++---- tests/Test_stencil.cc | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/Example_Laplacian.cc b/examples/Example_Laplacian.cc index fa8466cf..77e5fa35 100644 --- a/examples/Example_Laplacian.cc +++ b/examples/Example_Laplacian.cc @@ -93,14 +93,14 @@ template class FreeLaplacianStencil : public SparseMatrixBase StencilImpl; + typedef CartesianStencil StencilImpl; GridBase *grid; StencilImpl Stencil; SimpleCompressor Compressor; FreeLaplacianStencil(GridBase *_grid) - : Stencil (_grid,6,Even,directions,displacements,0), grid(_grid) + : Stencil (_grid,6,Even,directions,displacements,SimpleStencilParams()), grid(_grid) { }; virtual GridBase *Grid(void) { return grid; }; @@ -168,7 +168,8 @@ public: typedef iImplDoubledGaugeField SiteDoubledGaugeField; typedef Lattice DoubledGaugeField; - typedef CartesianStencil StencilImpl; + typedef CartesianStencil StencilImpl; + SimpleStencilParams p; GridBase *grid; StencilImpl Stencil; @@ -177,7 +178,7 @@ public: CovariantLaplacianStencil(GaugeField &Umu) : grid(Umu.Grid()), - Stencil (grid,6,Even,directions,displacements,0), + Stencil (grid,6,Even,directions,displacements,p), Uds(grid) { for (int mu = 0; mu < Nd; mu++) { diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 93402a1c..f17faa27 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -31,7 +31,6 @@ Author: paboyle using namespace std; using namespace Grid; - ; int main(int argc, char ** argv) { Grid_init(&argc, &argv); @@ -80,7 +79,8 @@ int main(int argc, char ** argv) { Foo=lex; } - typedef CartesianStencil Stencil; + typedef CartesianStencil Stencil; + SimpleStencilParams p; for(int dir=0;dir<4;dir++){ for(int disp=0;disp directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil myStencil(&Fine,npoint,0,directions,displacements,0); + Stencil myStencil(&Fine,npoint,0,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil EStencil(&rbFine,npoint,Even,directions,displacements,0); - Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,0); + Stencil EStencil(&rbFine,npoint,Even,directions,displacements,p); + Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o Date: Thu, 19 May 2022 19:17:11 -0700 Subject: [PATCH 40/86] Dirichlet improved --- Grid/stencil/Stencil.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 3e356a1c..791d79eb 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -131,7 +131,6 @@ class CartesianStencilAccelerator { int _checkerboard; int _npoints; // Move to template param? int _osites; - int _dirichlet; StencilVector _directions; StencilVector _distances; StencilVector _comms_send; @@ -650,16 +649,14 @@ public: /// Introduce a block structure and switch off comms on boundaries void DirichletBlock(const Coordinate &dirichlet_block) { - this->_dirichlet = 1; for(int ii=0;ii_npoints;ii++){ int dimension = this->_directions[ii]; int displacement = this->_distances[ii]; - int shift = displacement; int gd = _grid->_gdimensions[dimension]; int fd = _grid->_fdimensions[dimension]; int pd = _grid->_processors [dimension]; - int ld = gd/pd; int pc = _grid->_processor_coor[dimension]; + int ld = fd/pd; /////////////////////////////////////////// // Figure out dirichlet send and receive // on this leg of stencil. @@ -668,7 +665,7 @@ public: int block = dirichlet_block[dimension]; this->_comms_send[ii] = comm_dim; this->_comms_recv[ii] = comm_dim; - if ( block ) { + if ( block && comm_dim ) { assert(abs(displacement) < ld ); if( displacement > 0 ) { @@ -677,16 +674,16 @@ public: // | | | // noR // noS - if ( (ld*(pc+1) ) % block == 0 ) this->_comms_recv[ii] = 0; - if ( ( ld*pc ) % block == 0 ) this->_comms_send[ii] = 0; + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_recv[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_send[ii] = 0; } else { // High side, low side // | <--B--->| // | | | // noS // noR - if ( (ld*(pc+1) ) % block == 0 ) this->_comms_send[ii] = 0; - if ( ( ld*pc ) % block == 0 ) this->_comms_recv[ii] = 0; + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0; } } } @@ -698,7 +695,6 @@ public: const std::vector &distances, Parameters p) { - this->_dirichlet = 0; face_table_computed=0; _grid = grid; this->parameters=p; @@ -715,6 +711,8 @@ public: this->_comms_recv.resize(npoints); this->same_node.resize(npoints); + if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up + _unified_buffer_size=0; surface_list.resize(0); @@ -1106,6 +1104,7 @@ public: Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); face_idx++; + int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes,cbmask); if ( (!duplicate) ) { // Force comms for now @@ -1126,7 +1125,7 @@ public: words,Decompressions); } u_comm_offset+=words; - } + } } return 0; } @@ -1206,8 +1205,8 @@ public: face_table[face_idx].size()*sizeof(face_table_host[0])); } - // if ( comms_send ) - Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); + if ( comms_send ) + Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); face_idx++; //spointers[0] -- low From 617c5362c1a492a5f4c185ec6b1048abfd94d155 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 11:37:33 +0100 Subject: [PATCH 41/86] fix: fixed warning: missing return statement at end of non-void function in CloverHelpers --- Grid/qcd/action/fermion/CloverHelpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index c72a79e8..57e71998 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -181,6 +181,7 @@ public: static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); + return lambda; } }; @@ -425,6 +426,7 @@ public: static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); + return lambda; } }; From bab8aa8eb049749402f1fbb0b33f1e086ce44403 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:27:40 +0100 Subject: [PATCH 42/86] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in DisableWarnings.h --- Grid/DisableWarnings.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Grid/DisableWarnings.h b/Grid/DisableWarnings.h index 4bd1edd0..64e4faf4 100644 --- a/Grid/DisableWarnings.h +++ b/Grid/DisableWarnings.h @@ -44,14 +44,22 @@ directory #ifdef __NVCC__ //disables nvcc specific warning in json.hpp #pragma clang diagnostic ignored "-Wdeprecated-register" + +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) + //disables nvcc specific warning in json.hpp +#pragma nv_diag_suppress unsigned_compare_with_zero +#pragma nv_diag_suppress cast_to_qualified_type + //disables nvcc specific warning in many files +#pragma nv_diag_suppress esa_on_defaulted_function_ignored +#pragma nv_diag_suppress extra_semicolon +#else + //disables nvcc specific warning in json.hpp #pragma diag_suppress unsigned_compare_with_zero #pragma diag_suppress cast_to_qualified_type - //disables nvcc specific warning in many files #pragma diag_suppress esa_on_defaulted_function_ignored #pragma diag_suppress extra_semicolon - -//Eigen only +#endif #endif // Disable vectorisation in Eigen on the Power8/9 and PowerPC From e909aeedf082f0aaba98b31cccd8119b6c38d86f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:29:42 +0100 Subject: [PATCH 43/86] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in Grid_Eigen_Dense.h --- Grid/Grid_Eigen_Dense.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index c62d9cdb..5aee81de 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -14,7 +14,11 @@ /* NVCC save and restore compile environment*/ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress code_is_unreachable +#else #pragma diag_suppress code_is_unreachable +#endif #pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") From 7937ac2bab3eaef32222e6b28c6d4dda9f5af6d9 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:31:03 +0100 Subject: [PATCH 44/86] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in pugixml/pugixml.cc --- Grid/pugixml/pugixml.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index 45e6496a..81973964 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -16,8 +16,12 @@ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" +#else #pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" #endif +#endif #include "pugixml.h" From 3f31afa4fc0c66282588dd5dbe40ae1b94ff5223 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 24 May 2022 18:18:51 -0700 Subject: [PATCH 45/86] Clean up verbose --- Grid/communicator/Communicator_mpi3.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index ecdf1e53..1e9e7840 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -390,16 +390,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vectorShmBufferTranslate(dest,recv); assert(shm!=NULL); - // std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl; acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); } } if ( CommunicatorPolicy == CommunicatorPolicySequential ) { this->StencilSendToRecvFromComplete(list,dir); + list.resize(0); } return off_node_bytes; From 47b4e914736740f914af421182757f21ece0eeb6 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 24 May 2022 18:19:18 -0700 Subject: [PATCH 46/86] Verbose change --- Grid/qcd/action/filters/DirichletFilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/filters/DirichletFilter.h b/Grid/qcd/action/filters/DirichletFilter.h index 95353dab..4571c1de 100644 --- a/Grid/qcd/action/filters/DirichletFilter.h +++ b/Grid/qcd/action/filters/DirichletFilter.h @@ -55,7 +55,7 @@ struct DirichletFilter: public MomentumFilterBase for(int mu=0;muGlobalDimensions()[mu] ) ) { // If costly could provide Grid earlier and precompute masks - std::cout << " Dirichlet in mu="<_simd_layout[dimension]>1 && (comm_dim); int rotate_dim = _grid->_simd_layout[dimension]>2; - this->_comms_send[ii] = comm_dim; - this->_comms_recv[ii] = comm_dim; - assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported int sshift[2]; @@ -909,25 +904,30 @@ public: } // Wrap locally dirichlet support case OR node local - if ( (offnode==0) || (comms_recv==0) ) { + if ( offnode==0 ) { int permute_slice=0; CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); - + } else { + if ( comms_recv==0 ) { + + int permute_slice=1; + CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); + + } else { + + ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase + + } + + } + + if ( offnode ) { int words = buffer_size; if (cbmask != 0x3) words=words>>1; - - // int rank = grid->_processor; - // int recv_from_rank; - // int xmit_to_rank; - - int unified_buffer_offset = _unified_buffer_size; _unified_buffer_size += words; - - ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase - } } } @@ -1058,8 +1058,6 @@ public: int comm_proc = ((x+sshift)/rd)%pd; if (comm_proc) { - - int words = buffer_size; if (cbmask != 0x3) words=words>>1; @@ -1067,64 +1065,69 @@ public: int bytes = words * compress.CommDatumSize(); int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - if ( !face_table_computed ) { - face_table.resize(face_idx+1); - std::vector > face_table_host ; - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); - face_table[face_idx].resize(face_table_host.size()); - acceleratorCopyToDevice(&face_table_host[0], - &face_table[face_idx][0], - face_table[face_idx].size()*sizeof(face_table_host[0])); - } + int comm_off = u_comm_offset; - // int rank = _grid->_processor; int recv_from_rank; int xmit_to_rank; + cobj *recv_buf; + cobj *send_buf; _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); assert (xmit_to_rank != _grid->ThisRank()); assert (recv_from_rank != _grid->ThisRank()); - cobj *recv_buf; - if ( compress.DecompressionStep() ) { - recv_buf=u_simd_recv_buf[0]; - } else { - recv_buf=this->u_recv_buf_p; + if( comms_send ) { + + if ( !face_table_computed ) { + face_table.resize(face_idx+1); + std::vector > face_table_host ; + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); + face_table[face_idx].resize(face_table_host.size()); + acceleratorCopyToDevice(&face_table_host[0], + &face_table[face_idx][0], + face_table[face_idx].size()*sizeof(face_table_host[0])); + } + + + if ( compress.DecompressionStep() ) { + recv_buf=u_simd_recv_buf[0]; + } else { + recv_buf=this->u_recv_buf_p; + } + + send_buf = this->u_send_buf_p; // Gather locally, must send + + //////////////////////////////////////////////////////// + // Gather locally + //////////////////////////////////////////////////////// + assert(send_buf!=NULL); + + Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so); } - - cobj *send_buf; - send_buf = this->u_send_buf_p; // Gather locally, must send - - //////////////////////////////////////////////////////// - // Gather locally - //////////////////////////////////////////////////////// - assert(send_buf!=NULL); - if ( comms_send ) - Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); - face_idx++; - - - int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes,cbmask); + int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask); if ( (!duplicate) ) { // Force comms for now /////////////////////////////////////////////////////////// // Build a list of things to do after we synchronise GPUs // Start comms now??? /////////////////////////////////////////////////////////// - AddPacket((void *)&send_buf[u_comm_offset], - (void *)&recv_buf[u_comm_offset], + AddPacket((void *)&send_buf[comm_off], + (void *)&recv_buf[comm_off], xmit_to_rank, comms_send, recv_from_rank, comms_recv, bytes); } - if ( compress.DecompressionStep() ) { - AddDecompress(&this->u_recv_buf_p[u_comm_offset], - &recv_buf[u_comm_offset], + if ( compress.DecompressionStep() && comms_recv ) { + AddDecompress(&this->u_recv_buf_p[comm_off], + &recv_buf[comm_off], words,Decompressions); } + u_comm_offset+=words; + face_idx++; + } } return 0; @@ -1154,7 +1157,6 @@ public: int permute_type=_grid->PermuteType(dimension); - // std::cout << "SimdNew permute type "< > face_table_host ; - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); face_table[face_idx].resize(face_table_host.size()); acceleratorCopyToDevice(&face_table_host[0], &face_table[face_idx][0], @@ -1225,8 +1228,8 @@ public: int nbr_plane = nbr_ic; assert (sx == nbr_ox); - auto rp = &u_simd_recv_buf[i ][u_comm_offset]; - auto sp = &u_simd_send_buf[nbr_plane][u_comm_offset]; + auto rp = &u_simd_recv_buf[i ][comm_off]; + auto sp = &u_simd_send_buf[nbr_plane][comm_off]; if(nbr_proc){ @@ -1252,9 +1255,10 @@ public: } } - AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers); + AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers); u_comm_offset +=buffer_size; + } } return 0; From f9f05e995bfd57885261fde425338cfc6c2aa8e1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 24 May 2022 18:24:38 -0700 Subject: [PATCH 48/86] Update perlmutter --- systems/Perlmutter/config-command | 1 + 1 file changed, 1 insertion(+) diff --git a/systems/Perlmutter/config-command b/systems/Perlmutter/config-command index b62704dc..4f7ecee3 100644 --- a/systems/Perlmutter/config-command +++ b/systems/Perlmutter/config-command @@ -6,6 +6,7 @@ PREFIX=$DIR/../Prequisites/install/ --enable-shm=nvlink \ --enable-gen-simd-width=64 \ --enable-accelerator=cuda \ + --enable-setdevice \ --disable-accelerator-cshift \ --with-gmp=$PREFIX \ --disable-fermion-reps \ From d83beaa8900a45daff806d03e272cffe803f1e65 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 24 May 2022 18:25:00 -0700 Subject: [PATCH 49/86] Update perlmutter --- systems/Perlmutter/dwf4.slurm | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index f6d6a2a9..8a37a266 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -2,19 +2,26 @@ #SBATCH -A m3886_g #SBATCH -C gpu #SBATCH -q debug -#SBATCH -t 0:10:00 +#SBATCH -t 0:20:00 +#SBATCH -c 32 +#SBATCH -N 1 #SBATCH -n 4 #SBATCH --ntasks-per-node=4 -#SBATCH -c 32 -#SBATCH --exclusive #SBATCH --gpus-per-task=1 +#SBATCH --exclusive #SBATCH --gpu-bind=none export SLURM_CPU_BIND="cores" export MPICH_GPU_SUPPORT_ENABLED=1 -export MPICH_RDMA_ENABLED_CUDA=0 -export MPICH_GPU_IPC_ENABLED=0 +export MPICH_RDMA_ENABLED_CUDA=1 +export MPICH_GPU_IPC_ENABLED=1 export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 -export MPICH_GPU_NO_ASYNC_MEMCPY=1 -OPT="--comms-overlap --comms-concurrent --shm-mpi 1" -srun ./benchmarks/Benchmark_ITT --mpi 2.1.1.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > ITT.log +export MPICH_GPU_NO_ASYNC_MEMCPY=0 +#export MPICH_SMP_SINGLE_COPY_MODE=CMA + +OPT="--comms-overlap --shm-mpi 1" +VOL=64.64.32.32 +srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT + From 4f997c5f04d469b69e2f62691b0a672cb1de7414 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 25 May 2022 11:15:25 -0700 Subject: [PATCH 50/86] Remove extra face kernels in Dirichlet --- Grid/stencil/Stencil.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index e2f17d15..07598265 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -1255,7 +1255,9 @@ public: } } - AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers); + if ( comms_recv ) { + AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers); + } u_comm_offset +=buffer_size; From 7eb29cf5292b9889427bd9afde7e9a95737b1ae5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 28 May 2022 15:51:34 -0700 Subject: [PATCH 51/86] MPI fix --- Grid/communicator/Communicator_mpi3.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index 1e9e7840..0d0a3443 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -372,7 +372,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector Date: Sat, 28 May 2022 15:52:39 -0700 Subject: [PATCH 52/86] Extra easier signature for peek --- Grid/lattice/Lattice_peekpoke.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 5caab214..f3b485a4 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -125,6 +125,12 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// +template +typename vobj::scalar_object peekSite(const Lattice &l,const Coordinate &site){ + typename vobj::scalar_object s; + peekSite(s,l,site); + return s; +} template void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ From 34faa39f4f070a1ea230e63cc8e97976125bbdab Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 28 May 2022 17:18:08 -0700 Subject: [PATCH 53/86] Clean up Dirichlet. Big oops fix --- Grid/stencil/Stencil.h | 18 ++++++++++-------- systems/Perlmutter/dwf4.slurm | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 07598265..19eb19fb 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -665,7 +665,7 @@ public: this->_comms_recv[ii] = comm_dim; if ( block && comm_dim ) { assert(abs(displacement) < ld ); - + // Quiesce communication across block boundaries if( displacement > 0 ) { // High side, low side // | <--B--->| @@ -730,7 +730,7 @@ public: int gd = _grid->_gdimensions[dimension]; int fd = _grid->_fdimensions[dimension]; int pd = _grid->_processors [dimension]; - int ld = gd/pd; + // int ld = gd/pd; int rd = _grid->_rdimensions[dimension]; int pc = _grid->_processor_coor[dimension]; this->_permute_type[point]=_grid->PermuteType(dimension); @@ -871,12 +871,14 @@ public: for(int x=0;xPermuteType(dimension); + int permute_slice; int sx = (x+sshift)%rd; int offnode = 0; if ( simd_layout > 1 ) { + permute_slice=1; for(int i=0;i>(permute_type+1)); @@ -893,6 +895,7 @@ public: } else { int comm_proc = ((x+sshift)/rd)%pd; offnode = (comm_proc!= 0); + permute_slice=0; } int wraparound=0; @@ -906,19 +909,18 @@ public: // Wrap locally dirichlet support case OR node local if ( offnode==0 ) { - int permute_slice=0; + permute_slice=0; CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); } else { - if ( comms_recv==0 ) { + if ( comms_recv ) { - int permute_slice=1; - CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); + ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase } else { - ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase + CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); } @@ -1208,7 +1210,7 @@ public: face_table[face_idx].size()*sizeof(face_table_host[0])); } - if ( comms_send ) + if ( comms_send || comms_recv ) Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); face_idx++; diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index 8a37a266..426573d9 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -19,8 +19,8 @@ export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 export MPICH_GPU_NO_ASYNC_MEMCPY=0 #export MPICH_SMP_SINGLE_COPY_MODE=CMA -OPT="--comms-overlap --shm-mpi 1" -VOL=64.64.32.32 +OPT="--comms-sequential --shm-mpi 1" +VOL=64.64.64.64 srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT #srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT #srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT From e762c940c24c6a99e7bde30daaef1986aa91e51c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 14:29:25 -0700 Subject: [PATCH 54/86] Reduce the loop over exterior for GPU to indirection table --- Grid/qcd/action/fermion/WilsonCompressor.h | 8 +++++--- .../implementation/WilsonKernelsImplementation.h | 11 +++++++++++ Grid/stencil/Stencil.h | 4 +++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index e0e08c1c..eba04abf 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -297,7 +297,7 @@ public: void ZeroCountersi(void) { } void Reporti(int calls) { } - std::vector surface_list; + // Vector surface_list; WilsonStencil(GridBase *grid, int npoints, @@ -307,10 +307,11 @@ public: : CartesianStencil (grid,npoints,checkerboard,directions,distances,p) { ZeroCountersi(); - surface_list.resize(0); + // surface_list.resize(0); this->same_node.resize(npoints); }; + /* void BuildSurfaceList(int Ls,int vol4){ // find same node for SHM @@ -331,7 +332,8 @@ public: } } } - + */ + template < class compressor> void HaloExchangeOpt(const Lattice &source,compressor &compress) { diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 9228b84c..623da5cf 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -440,6 +440,17 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); +#define KERNEL_CALL_EXT(A) \ + const uint64_t NN = Nsite*Ls; \ + const uint64_t sz = st.surface_list.size(); \ + auto ptr = &st.surface_list[0]; \ + accelerator_forNB( ss, sz, Simd::Nsimd(), { \ + int sF = ptr[ss]; \ + int sU = ss/Ls; \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); \ + accelerator_barrier(); + #define ASM_CALL(A) \ thread_for( ss, Nsite, { \ int sU = ss; \ diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 19eb19fb..eb73ba5f 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -640,7 +640,9 @@ public: } } if(local == 0) { - surface_list.push_back(site); + for(int s=0;s Date: Wed, 1 Jun 2022 19:25:42 -0400 Subject: [PATCH 55/86] Faster RNG init --- Grid/lattice/Lattice_rng.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index e5e63716..34df8da2 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -424,9 +424,30 @@ public: // MT implementation does not implement fast discard even though // in principle this is possible //////////////////////////////////////////////// +#if 1 + thread_for( lidx, _grid->lSites(), { + int gidx; + Coordinate pcoor; + Coordinate lcoor; + Coordinate gcoor; + _grid->LocalIndexToLocalCoor(lidx,lcoor); + pcoor=_grid->ThisProcessorCoor(); + _grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor); + + int o_idx; + int i_idx; + int rank; + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + assert(rank == _grid->ThisRank() ); + int l_idx=generator_idx(o_idx,i_idx); + _generators[l_idx] = master_engine; + Skip(_generators[l_idx],gidx); // Skip to next RNG sequence + }); +#else // Everybody loops over global volume. thread_for( gidx, _grid->_gsites, { + // Where is it? int rank; int o_idx; @@ -443,6 +464,7 @@ public: Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } }); +#endif #else //////////////////////////////////////////////////////////////// // Machine and thread decomposition dependent seeding is efficient From 6f1a2e132beaac29be9acfe3f5e5bc375bfd8af0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 19:26:06 -0400 Subject: [PATCH 56/86] SSC mark causing problems --- Grid/perfmon/PerfCount.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index dd25b41e..540b75c5 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -72,17 +72,9 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, inline uint64_t cyclecount(void){ return 0; } -#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx") -#define __SSC_STOP __SSC_MARK(0x110) -#define __SSC_START __SSC_MARK(0x111) - #else -#define __SSC_MARK(mark) -#define __SSC_STOP -#define __SSC_START - /* * cycle counters arch dependent */ From a25b32847f6e62fff99fc4dd223baa42188b55fd Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 19:26:37 -0400 Subject: [PATCH 57/86] Crusher patch --- configure.ac | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 9ab0595a..c8023939 100644 --- a/configure.ac +++ b/configure.ac @@ -394,11 +394,10 @@ case ${CXXTEST} in fi ;; hipcc) -# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" CXXLD=${CXX} if test $ac_openmp = yes; then - CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" + CXXFLAGS="$CXXFLAGS -fopenmp" fi ;; dpcpp) From 58a86c9164842ca5cdfa0098f3043f12482a4286 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 19:27:06 -0400 Subject: [PATCH 58/86] SSC mark removal --- benchmarks/Benchmark_dwf.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 707f330c..c6814cdc 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -191,9 +191,7 @@ int main (int argc, char ** argv) std::cout<Barrier(); From 583f7c52f316ad0a803f87a4728dce98d34b8867 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 19:27:29 -0400 Subject: [PATCH 59/86] SSC mark --- benchmarks/Benchmark_dwf_fp32.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/Benchmark_dwf_fp32.cc b/benchmarks/Benchmark_dwf_fp32.cc index 9695cf5a..5ee764c4 100644 --- a/benchmarks/Benchmark_dwf_fp32.cc +++ b/benchmarks/Benchmark_dwf_fp32.cc @@ -262,9 +262,7 @@ void Benchmark(int Ls, Coordinate Dirichlet) std::cout<Barrier(); From b49db84b089bc8d91056bd878da5affa42c62bb4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 1 Jun 2022 19:27:42 -0400 Subject: [PATCH 60/86] Slurm updates --- systems/Crusher/dwf8.slurm | 24 +++++------------------- systems/Crusher/sourceme.sh | 2 +- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/systems/Crusher/dwf8.slurm b/systems/Crusher/dwf8.slurm index 2113dcd1..866ec775 100644 --- a/systems/Crusher/dwf8.slurm +++ b/systems/Crusher/dwf8.slurm @@ -6,9 +6,8 @@ #SBATCH -J DWF #SBATCH -o DWF.%J #SBATCH -e DWF.%J -#SBATCH -N 8 -#SBATCH -n 64 -#SBATCH --exclusive +#SBATCH -N 1 +#SBATCH -n 1 #SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 DIR=. @@ -17,25 +16,12 @@ source setup.sh export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -export MPICH_SMP_SINGLE_COPY_MODE=XPMEM +#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM #export MPICH_SMP_SINGLE_COPY_MODE=CMA #export MPICH_SMP_SINGLE_COPY_MODE=NONE -export OMP_NUM_THREADS=1 +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -for vol in 64.64.64.256 64.64.64.128 32.32.32.256 32.32.32.128 -do -PARAMS=" --accelerator-threads 8 --grid $vol --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.${vol}.8node.shm-mpi1 -done - -PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node - -PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0 +srun --gpus-per-task 1 -N1 -n1 ./tests/Test_dwf_mixedcg_prec diff --git a/systems/Crusher/sourceme.sh b/systems/Crusher/sourceme.sh index 051014dc..ad0d6582 100644 --- a/systems/Crusher/sourceme.sh +++ b/systems/Crusher/sourceme.sh @@ -1,6 +1,6 @@ module load PrgEnv-gnu module load rocm/5.1.0 -module load cray-mpich/8.1.15 +module load cray-mpich/8.1.16 module load gmp #module load cray-fftw module load craype-accel-amd-gfx90a From e9648a1635f3ee50f9e0e732b8e7aa05cf66726c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 14 Jun 2022 23:40:04 -0400 Subject: [PATCH 61/86] Useful periodic print. CG convergence bound is remarkably accurate on low eigenvalue in numerical tests --- Grid/algorithms/iterative/ConjugateGradient.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 14f3d306..c20e267d 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -152,8 +152,13 @@ public: LinearCombTimer.Stop(); LinalgTimer.Stop(); - std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + if ( (k % 500) == 0 ) { + std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; + } else { + std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; + } // Stopping condition if (cp <= rsq) { From 05ca7dc2529e488acc15b5477fb208204690d474 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 14 Jun 2022 23:41:05 -0400 Subject: [PATCH 62/86] Const correctness --- Grid/qcd/action/fermion/FermionOperator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index 0c159300..66644d7f 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -49,7 +49,7 @@ public: virtual FermionField &tmp(void) = 0; - virtual void DirichletBlock(Coordinate & _Block) { assert(0); }; + virtual void DirichletBlock(const Coordinate & _Block) { assert(0); }; GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; From 501bb117bf349bd2a447d9d60ebe97b82b368998 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 00:04:09 -0400 Subject: [PATCH 63/86] Const correct --- Grid/qcd/action/fermion/WilsonFermion5D.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 91abf86a..eced6b81 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -178,16 +178,8 @@ public: GridRedBlackCartesian &FourDimRedBlackGrid, double _M5,const ImplParams &p= ImplParams()); - virtual void DirichletBlock(Coordinate & block) + virtual void DirichletBlock(const Coordinate & block) { - assert(block.size()==Nd+1); - if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ - Dirichlet = 1; - Block = block; - Stencil.DirichletBlock(block); - StencilEven.DirichletBlock(block); - StencilOdd.DirichletBlock(block); - } } // Constructors /* From fdef7a1a8cedad899a073d4ad46c58c7514d069d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 00:05:20 -0400 Subject: [PATCH 64/86] Dirichlet fix --- .../implementation/WilsonFermion5DImplementation.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 7775ad9d..681a6914 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -92,6 +92,15 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); } + if ( p.dirichlet.size() ) { + Coordinate block = p.dirichlet; + assert(block.size()==Nd+1); + if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ + Dirichlet = 1; + Block = block; + } + } + if (Impl::LsVectorised) { int nsimd = Simd::Nsimd(); From 6efd80f104c48d08e24dd79e70679b5d77bf18b7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 18:23:46 -0400 Subject: [PATCH 65/86] Printing --- Grid/qcd/hmc/integrators/Integrator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 070cbea1..1985caf0 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -145,7 +145,7 @@ protected: MomFilter->applyFilter(force); std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x]) Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR; From 0e9666bc92341b8ff31af298aa13cdc2b5f84cab Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 19:18:42 -0400 Subject: [PATCH 66/86] Test update --- systems/Crusher/dwf8.slurm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/systems/Crusher/dwf8.slurm b/systems/Crusher/dwf8.slurm index 866ec775..4bc1917a 100644 --- a/systems/Crusher/dwf8.slurm +++ b/systems/Crusher/dwf8.slurm @@ -7,8 +7,9 @@ #SBATCH -o DWF.%J #SBATCH -e DWF.%J #SBATCH -N 1 -#SBATCH -n 1 -#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#SBATCH -n 8 +##SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#SBATCH --gpu-bind=map_gpu:0,1,2,3,6,7,4,5 DIR=. source setup.sh @@ -23,5 +24,5 @@ export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -srun --gpus-per-task 1 -N1 -n1 ./tests/Test_dwf_mixedcg_prec +srun --gpus-per-task 1 -N1 -n8 ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.1 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 From d10d30dda84bd8f113197fe759048e7be2e0e349 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 19:18:58 -0400 Subject: [PATCH 67/86] Script update --- systems/Crusher/dwf4.slurm | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/systems/Crusher/dwf4.slurm b/systems/Crusher/dwf4.slurm index 6bb953c4..8e2a6e4c 100644 --- a/systems/Crusher/dwf4.slurm +++ b/systems/Crusher/dwf4.slurm @@ -7,21 +7,19 @@ #SBATCH -o DWF.%J #SBATCH -e DWF.%J #SBATCH -N 1 -#SBATCH -n 4 -#SBATCH --exclusive +#SBATCH -n 2 +#SBATCH --gpu-bind=map_gpu:0,1 DIR=. -module list +source setup.sh + +export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=4 +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads 8 --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0" -srun --gpus-per-task 1 -n4 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS +srun --gpus-per-task 1 -N1 -n2 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.16.32.64 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 From 31efa5c4da075683d636f05faea7b6b965f648ce Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 15 Jun 2022 19:19:44 -0400 Subject: [PATCH 68/86] Script updates for current summit --- systems/Summit/config-command | 1 + systems/Summit/dwf16.lsf | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/systems/Summit/config-command b/systems/Summit/config-command index b565addc..46e37af1 100644 --- a/systems/Summit/config-command +++ b/systems/Summit/config-command @@ -7,6 +7,7 @@ --enable-setdevice \ --disable-fermion-reps \ --enable-accelerator=cuda \ + --disable-accelerator-cshift \ --prefix /ccs/home/paboyle/prefix \ CXX=nvcc \ LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \ diff --git a/systems/Summit/dwf16.lsf b/systems/Summit/dwf16.lsf index ef8c21a5..3242fc86 100644 --- a/systems/Summit/dwf16.lsf +++ b/systems/Summit/dwf16.lsf @@ -1,25 +1,39 @@ #!/bin/bash #BSUB -P LGT104 -#BSUB -W 2:00 +#BSUB -W 0:20 #BSUB -nnodes 16 #BSUB -J DWF + export OMP_NUM_THREADS=6 export PAMI_IBV_ADAPTER_AFFINITY=1 export PAMI_ENABLE_STRIPING=1 -export OPT="--comms-concurrent --comms-overlap " -APP="./benchmarks/Benchmark_comms_host_device --mpi 4.4.4.3 " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log +DIR=. +source sourceme.sh -APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log +echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log +VOLS=( 32.32.32.16 32.32.32.64 64.32.32.64 64.32.64.64 64.64.64.64 64.64.64.128 64.64.64.256 64.64.64.512 128.64.64.64.512) +MPI=( 1.1.1.1 1.1.1.4 2.1.1.4 2.1.2.4 2.2.2.4 2.2.2.8 2.2.2.16 2.2.2.32 4.4.2.32 ) +RANKS=( 1 4 8 16 32 64 128 256 1024) +NODES=( 1 1 2 4 8 16 32 64 128) +INTS=( 0 1 2 3 4 5 6 7 8) +for i in 5 +do + vol=${VOLS[$i]} + nodes=${NODES[$i]} + mpi=${MPI[$i]} + ranks=${RANKS[$i]} + JSRUN="jsrun --nrs $nodes -a4 -g4 -c42 -dpacked -b packed:10 --latency_priority gpu-cpu --smpiargs=-gpu" + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-sequential --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.seq.ker + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-overlap --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.over.ker +done From e1e7b1e2245a2db5686ae1dd8ff27bf1f0b37314 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 27 Jun 2022 12:09:52 -0400 Subject: [PATCH 69/86] RNG fix --- Grid/lattice/Lattice_rng.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 34df8da2..6857dc84 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -428,18 +428,20 @@ public: thread_for( lidx, _grid->lSites(), { int gidx; + int o_idx; + int i_idx; + int rank; Coordinate pcoor; Coordinate lcoor; Coordinate gcoor; _grid->LocalIndexToLocalCoor(lidx,lcoor); pcoor=_grid->ThisProcessorCoor(); _grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor); + _grid->GlobalCoorToGlobalIndex(gcoor,gidx); - int o_idx; - int i_idx; - int rank; _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); assert(rank == _grid->ThisRank() ); + int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; Skip(_generators[l_idx],gidx); // Skip to next RNG sequence From efd7338a0031c972cd3e01866c0e2c0f7f480ead Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 27 Jun 2022 12:10:27 -0400 Subject: [PATCH 70/86] Allow dirichlet at round the world link --- Grid/qcd/action/filters/DirichletFilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/filters/DirichletFilter.h b/Grid/qcd/action/filters/DirichletFilter.h index 4571c1de..e388891f 100644 --- a/Grid/qcd/action/filters/DirichletFilter.h +++ b/Grid/qcd/action/filters/DirichletFilter.h @@ -53,7 +53,7 @@ struct DirichletFilter: public MomentumFilterBase LatticeInteger coor(grid); LatCM zz(grid); zz = Zero(); for(int mu=0;muGlobalDimensions()[mu] ) ) { + if ( (Block[mu]) && (Block[mu] <= grid->GlobalDimensions()[mu] ) ) { // If costly could provide Grid earlier and precompute masks std::cout << GridLogMessage << " Dirichlet in mu="< Date: Mon, 27 Jun 2022 12:11:08 -0400 Subject: [PATCH 71/86] More verbose CG --- Grid/algorithms/iterative/ConjugateGradient.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index c20e267d..9a644c7e 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -175,13 +175,13 @@ public: << "\tTrue residual " << true_residual << "\tTarget " << Tolerance << std::endl; - std::cout << GridLogIterative << "Time breakdown "< Date: Mon, 27 Jun 2022 12:14:57 -0400 Subject: [PATCH 72/86] Spelling correction --- Grid/algorithms/iterative/ConjugateGradientMultiShift.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Grid/algorithms/iterative/ConjugateGradientMultiShift.h b/Grid/algorithms/iterative/ConjugateGradientMultiShift.h index 3b079e99..0e70916f 100644 --- a/Grid/algorithms/iterative/ConjugateGradientMultiShift.h +++ b/Grid/algorithms/iterative/ConjugateGradientMultiShift.h @@ -44,7 +44,7 @@ public: using OperatorFunction::operator(); - RealD Tolerance; + // RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion std::vector IterationsToCompleteShift; // Iterations for this shift @@ -324,8 +324,8 @@ public: std::cout << GridLogMessage << "Time Breakdown "< Date: Mon, 27 Jun 2022 12:15:55 -0400 Subject: [PATCH 73/86] Allow frequency=0 to disable --- Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h index 2c2402f8..b4417cac 100644 --- a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h +++ b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h @@ -185,7 +185,7 @@ NAMESPACE_BEGIN(Grid); auto grid = NumOp.FermionGrid(); auto r=rand(); grid->Broadcast(0,r); - if ( (r%param.BoundsCheckFreq)==0 ) { + if ( param.BoundsCheckFreq && ((r%param.BoundsCheckFreq)==0) ) { FermionField gauss(NumOp.FermionRedBlackGrid()); gauss = Noise; HighBoundCheck(MdagM,gauss,param.hi); From 4ac1094856cd52c3e3c2b817762ee63b17a720c6 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 27 Jun 2022 12:16:24 -0400 Subject: [PATCH 74/86] Updated config commands --- configure.ac | 11 +++++++---- systems/Crusher/config-command | 1 + systems/Summit/config-command | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index c8023939..528f0125 100644 --- a/configure.ac +++ b/configure.ac @@ -556,16 +556,19 @@ esac AC_ARG_ENABLE([setdevice],[AC_HELP_STRING([--enable-setdevice | --disable-setdevice], [Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no]) case ${ac_SETDEVICE} in - yes);; - no) + yes) + echo ENABLE SET DEVICE + ;; + *) AC_DEFINE([GRID_DEFAULT_GPU],[1],[GRID_DEFAULT_GPU] ) + echo DISABLE SET DEVICE ;; esac ######################################################### ###################### Shared memory intranode ######### ######################################################### -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no|none], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no]) case ${ac_SHM} in @@ -585,7 +588,7 @@ case ${ac_SHM} in AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) ;; - shmnone | no) + shmnone | no | none) AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) ;; diff --git a/systems/Crusher/config-command b/systems/Crusher/config-command index c93ea9c8..9e68a354 100644 --- a/systems/Crusher/config-command +++ b/systems/Crusher/config-command @@ -1,6 +1,7 @@ ../../configure --enable-comms=mpi-auto \ --enable-unified=no \ --enable-shm=nvlink \ +--enable-debug \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --enable-simd=GPU \ diff --git a/systems/Summit/config-command b/systems/Summit/config-command index 46e37af1..4caf652e 100644 --- a/systems/Summit/config-command +++ b/systems/Summit/config-command @@ -2,12 +2,12 @@ --enable-simd=GPU \ --enable-gen-simd-width=32 \ --enable-unified=no \ - --enable-shm=nvlink \ + --enable-shm=no \ --disable-gparity \ - --enable-setdevice \ + --disable-setdevice \ --disable-fermion-reps \ --enable-accelerator=cuda \ - --disable-accelerator-cshift \ + --enable-accelerator-cshift \ --prefix /ccs/home/paboyle/prefix \ CXX=nvcc \ LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \ From 87ad76d81be5c15fdc18016d60b96efa17c00f17 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Jun 2022 13:42:46 -0400 Subject: [PATCH 75/86] Initialise timeval --- Grid/perfmon/Timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h index 2a44faee..02c23a62 100644 --- a/Grid/perfmon/Timer.h +++ b/Grid/perfmon/Timer.h @@ -39,9 +39,9 @@ NAMESPACE_BEGIN(Grid) // C++11 time facilities better? inline double usecond(void) { struct timeval tv; -#ifdef TIMERS_ON + tv.tv_sec = 0; + tv.tv_usec = 0; gettimeofday(&tv,NULL); -#endif return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec; } From 220050822a6e51dbbc921f9cb0d0bb02018e1151 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Jun 2022 13:43:27 -0400 Subject: [PATCH 76/86] Speed up M5D and M5Ddag --- .../implementation/CayleyFermion5Dcache.h | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index d2537ccf..1581bee4 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -66,18 +66,17 @@ CayleyFermion5D::M5D(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss= sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; - for(int s=0;s::M5Ddag(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss=sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2; - for(int s=0;s Date: Thu, 30 Jun 2022 13:44:09 -0400 Subject: [PATCH 77/86] Rough flop counting, need to add M5D, M5Ddag, MooeeInv flops --- tests/Test_dwf_mixedcg_prec.cc | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index da0b54cd..da71f72d 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -46,7 +46,7 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - const int Ls=8; + const int Ls=12; std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; @@ -94,13 +94,32 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); - mCG(src_o,result_o); - + double t1,t2,flops; + int iters; + for(int i=0;i<100;i++){ + result_o = Zero(); + t1=usecond(); + mCG(src_o,result_o); + t2=usecond(); + iters = mCG.TotalInnerIterations; //Number of inner CG iterations + flops = 1320.0*2*FGrid->gSites()*iters; + std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< CG(1.0e-8,10000); - CG(HermOpEO,src_o,result_o_2); - - MemoryManager::Print(); + for(int i=0;i<100;i++){ + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = 1320.0*2*FGrid->gSites()*iters; + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< Date: Thu, 30 Jun 2022 13:45:07 -0400 Subject: [PATCH 78/86] Dirichlet implementation --- .../implementation/WilsonFermion5DImplementation.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 681a6914..51c7df57 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -92,15 +92,19 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); } - if ( p.dirichlet.size() ) { + if ( p.dirichlet.size() == Nd+1) { Coordinate block = p.dirichlet; - assert(block.size()==Nd+1); if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ Dirichlet = 1; Block = block; } + } else { + Coordinate block(Nd+1,0); + Block = block; } - + + ZeroCounters(); + if (Impl::LsVectorised) { int nsimd = Simd::Nsimd(); From d03152fac47d7f2d1d0d5e3da317833e34210db3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Jun 2022 13:49:35 -0400 Subject: [PATCH 79/86] New file under debug --- HMC/Mobius2p1f_DD_RHMC_96I.cc | 419 ++++++++++++++++++++++++++++++++++ 1 file changed, 419 insertions(+) create mode 100644 HMC/Mobius2p1f_DD_RHMC_96I.cc diff --git a/HMC/Mobius2p1f_DD_RHMC_96I.cc b/HMC/Mobius2p1f_DD_RHMC_96I.cc new file mode 100644 index 00000000..a6a2f26c --- /dev/null +++ b/HMC/Mobius2p1f_DD_RHMC_96I.cc @@ -0,0 +1,419 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_EODWFRatio.cc + +Copyright (C) 2015-2016 + +Author: Peter Boyle +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + + // Typedefs to simplify notation + typedef WilsonImplR FermionImplPolicy; + typedef MobiusFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + + typedef Grid::XmlReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 6; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + // Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real beta = 2.13; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-4; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-7; + OFRp.mdtolerance= 1.0e-4; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + // Block4[0] = Dirichlet[1]; + // Block4[1] = Dirichlet[2]; + // Block4[2] = Dirichlet[3]; + Block4[0] = 0; + Block4[1] = 0; + Block4[2] = 0; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grid + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector *> Quotients; + std::vector *> Bdys; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h SdagS(StrangeOp); + HighBoundCheck(SdagS,vec,SFRp.hi); + ChebyBoundsCheck(SdagS,vec,SFRp.lo,SFRp.hi); + std::cout << "Strange inversion"<Mass() < UdagU(*Denominators[0]); + HighBoundCheck(UdagU,vec,OFRp.hi); + ChebyBoundsCheck(UdagU,vec,OFRp.lo,OFRp.hi); + std::cout << "light inversion"< SddagSd(StrangeOpDir); + HighBoundCheck(SddagSd,vec,OFRp.hi); + ChebyBoundsCheck(SddagSd,vec,OFRp.lo,OFRp.hi); + std::cout << "strange dirichlet inversion"<Mass()< UddagUd(*Numerators[0]); + HighBoundCheck(UddagUd,vec,OFRp.hi); + ChebyBoundsCheck(UddagUd,vec,OFRp.lo,OFRp.hi); + std::cout << "light dirichlet inversion"< Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UddagUd); + PlainHermOp Op (UddagUd); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid);src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int ev=0;ev Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UdagU); + PlainHermOp Op (UdagU); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid); src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int e=0;e Date: Thu, 30 Jun 2022 13:49:50 -0400 Subject: [PATCH 80/86] Create a new RNG file --- HMC/RNGstate.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 HMC/RNGstate.cc diff --git a/HMC/RNGstate.cc b/HMC/RNGstate.cc new file mode 100644 index 00000000..a595a9e6 --- /dev/null +++ b/HMC/RNGstate.cc @@ -0,0 +1,53 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: + +Copyright (C) 2015-2016 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) +{ + using namespace Grid; + + Grid_init(&argc, &argv); + + Coordinate latt4 = GridDefaultLatt(); + Coordinate mpi = GridDefaultMpi(); + Coordinate simd = GridDefaultSimd(Nd,vComplexD::Nsimd()); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd,mpi); + + GridSerialRNG sRNG; sRNG.SeedUniqueString(std::string("The Serial RNG")); + GridParallelRNG pRNG(UGrid); pRNG.SeedUniqueString(std::string("The 4D RNG")); + + std::string rngfile("ckpoint_rng.0"); + NerscIO::writeRNGState(sRNG, pRNG, rngfile); + + Grid_finalize(); +} + + + From 808bb592067012985be06ab90d8dec702ffaf41e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 30 Jun 2022 13:50:09 -0400 Subject: [PATCH 81/86] Mixed prec DD-RHMC --- HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc | 444 ++++++++++++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc diff --git a/HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc b/HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc new file mode 100644 index 00000000..a9b5dc7e --- /dev/null +++ b/HMC/Mobius2p1f_DD_RHMC_96I_mixed.cc @@ -0,0 +1,444 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_EODWFRatio.cc + +Copyright (C) 2015-2016 + +Author: Peter Boyle +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + MixedPrecisionConjugateGradient MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 4; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-6; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-6; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-10; + double MDStoppingCondition = 1e-7; + double MDStoppingConditionLoose = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector *> Bdys; + std::vector ActionMPCG; + std::vector MPCG; + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h Date: Thu, 30 Jun 2022 14:53:12 -0400 Subject: [PATCH 82/86] Remove debug --- systems/Crusher/config-command | 1 - 1 file changed, 1 deletion(-) diff --git a/systems/Crusher/config-command b/systems/Crusher/config-command index 9e68a354..c93ea9c8 100644 --- a/systems/Crusher/config-command +++ b/systems/Crusher/config-command @@ -1,7 +1,6 @@ ../../configure --enable-comms=mpi-auto \ --enable-unified=no \ --enable-shm=nvlink \ ---enable-debug \ --enable-accelerator=hip \ --enable-gen-simd-width=64 \ --enable-simd=GPU \ From 751a4562d709f15cb7aeba72941554a6c6dbd04e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 1 Jul 2022 09:41:43 -0400 Subject: [PATCH 83/86] Timing improvement --- Grid/algorithms/iterative/ConjugateGradient.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 9a644c7e..b65eea46 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -120,6 +120,9 @@ public: SolverTimer.Start(); int k; for (k = 1; k <= MaxIterations; k++) { + + GridStopWatch IterationTimer; + IterationTimer.Start(); c = cp; MatrixTimer.Start(); @@ -152,12 +155,13 @@ public: LinearCombTimer.Stop(); LinalgTimer.Stop(); + IterationTimer.Stop(); if ( (k % 500) == 0 ) { std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; } else { std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k - << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; + << " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl; } // Stopping condition From 57b442d0deb18d44a98322b52b54a3892f0fc9dc Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 1 Jul 2022 09:42:17 -0400 Subject: [PATCH 84/86] Log memory operations --- Grid/log/Log.cc | 6 ++++-- Grid/log/Log.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Grid/log/Log.cc b/Grid/log/Log.cc index 31f9a3f3..63bc454f 100644 --- a/Grid/log/Log.cc +++ b/Grid/log/Log.cc @@ -65,6 +65,7 @@ GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); +GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL"); GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE"); @@ -72,9 +73,10 @@ GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); void GridLogConfigure(std::vector &logstreams) { - GridLogError.Active(0); + GridLogError.Active(1); GridLogWarning.Active(0); GridLogMessage.Active(1); // at least the messages should be always on + GridLogMemory.Active(0); // at least the messages should be always on GridLogIterative.Active(0); GridLogDebug.Active(0); GridLogPerformance.Active(0); @@ -83,7 +85,7 @@ void GridLogConfigure(std::vector &logstreams) { GridLogHMC.Active(1); for (int i = 0; i < logstreams.size(); i++) { - if (logstreams[i] == std::string("Error")) GridLogError.Active(1); + if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1); if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); diff --git a/Grid/log/Log.h b/Grid/log/Log.h index b1696fee..4d512ff6 100644 --- a/Grid/log/Log.h +++ b/Grid/log/Log.h @@ -183,6 +183,7 @@ extern GridLogger GridLogPerformance; extern GridLogger GridLogIterative ; extern GridLogger GridLogIntegrator ; extern GridLogger GridLogHMC; +extern GridLogger GridLogMemory; extern Colours GridLogColours; std::string demangle(const char* name) ; From bd99fd608c01c7778b3c50033942a9558ee572ea Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 1 Jul 2022 09:42:53 -0400 Subject: [PATCH 85/86] Introduce a non-default stream for compute operatoins --- Grid/threads/Accelerator.cc | 4 ++++ Grid/threads/Accelerator.h | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index cbc798a9..092d46b3 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -16,6 +16,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; #ifdef GRID_CUDA cudaDeviceProp *gpu_props; cudaStream_t copyStream; +cudaStream_t cpuStream; void acceleratorInit(void) { int nDevices = 1; @@ -98,6 +99,7 @@ void acceleratorInit(void) cudaSetDevice(device); cudaStreamCreate(©Stream); + cudaStreamCreate(&cpuStream); const int len=64; char busid[len]; if( rank == world_rank ) { @@ -112,6 +114,7 @@ void acceleratorInit(void) #ifdef GRID_HIP hipDeviceProp_t *gpu_props; hipStream_t copyStream; +hipStream_t cpuStream; void acceleratorInit(void) { int nDevices = 1; @@ -180,6 +183,7 @@ void acceleratorInit(void) #endif hipSetDevice(device); hipStreamCreate(©Stream); + hipStreamCreate(&cpuStream); const int len=64; char busid[len]; if( rank == world_rank ) { diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index a5fb7aa8..d88c08b4 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -107,6 +107,7 @@ void acceleratorInit(void); extern int acceleratorAbortOnGpuError; extern cudaStream_t copyStream; +extern cudaStream_t cpuStream; accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -134,7 +135,7 @@ inline void cuda_mem(void) }; \ dim3 cu_threads(nsimd,acceleratorThreads(),1); \ dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ - LambdaApply<<>>(num1,num2,nsimd,lambda); \ + LambdaApply<<>>(num1,num2,nsimd,lambda); \ } #define accelerator_for6dNB(iter1, num1, \ @@ -153,7 +154,7 @@ inline void cuda_mem(void) }; \ dim3 cu_blocks (num1,num2,num3); \ dim3 cu_threads(num4,num5,num6); \ - Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ + Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ } template __global__ @@ -189,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, #define accelerator_barrier(dummy) \ { \ - cudaDeviceSynchronize(); \ + cudaStreamSynchronize(cpuStream); \ cudaError err = cudaGetLastError(); \ if ( cudaSuccess != err ) { \ printf("accelerator_barrier(): Cuda error %s \n", \ @@ -339,6 +340,7 @@ NAMESPACE_BEGIN(Grid); #define accelerator_inline __host__ __device__ inline extern hipStream_t copyStream; +extern hipStream_t cpuStream; /*These routines define mapping from thread grid to loop & vector lane indexing */ accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -360,12 +362,12 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \ hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,cpuStream, \ + num1,num2,nsimd, lambda); \ } else { \ hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,cpuStream, \ + num1,num2,nsimd, lambda); \ } \ } @@ -398,7 +400,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) #define accelerator_barrier(dummy) \ { \ - hipDeviceSynchronize(); \ + hipStreamSynchronize(cpuStream); \ auto err = hipGetLastError(); \ if ( err != hipSuccess ) { \ printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \ From 588c2f3cb191d9c438399e5039847285d3f9e4a9 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 1 Jul 2022 09:44:58 -0400 Subject: [PATCH 86/86] Faster axpy_norm and innerProduct --- Grid/lattice/Lattice_reduction.h | 51 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 0ddac437..16feb856 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -232,6 +232,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. +#if 0 typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -241,15 +242,31 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... - accelerator_for( ss, sites, 1,{ - auto x_l = left_v[ss]; - auto y_l = right_v[ss]; - inner_tmp_v[ss]=innerProductD(x_l,y_l); + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l)); }); } +#else + typedef decltype(innerProduct(vobj(),vobj())) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + { + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); + // GPU - SIMT lane compliance... + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + }); + } +#endif // This is in single precision and fails some tests - auto anrm = sum(inner_tmp_v,sites); + auto anrm = sumD(inner_tmp_v,sites); nrm = anrm; return nrm; } @@ -283,7 +300,7 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt conformable(x,y); typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_typeD vector_type; + // typedef typename vobj::vector_typeD vector_type; RealD nrm; GridBase *grid = x.Grid(); @@ -295,17 +312,29 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( x_v, x, AcceleratorRead); autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - +#if 0 typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, 1,{ - auto tmp = a*x_v[ss]+b*y_v[ss]; - inner_tmp_v[ss]=innerProductD(tmp,tmp); - z_v[ss]=tmp; + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); }); nrm = real(TensorRemove(sum(inner_tmp_v,sites))); +#else + typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); + }); + nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); +#endif grid->GlobalSum(nrm); return nrm; }