From 0b905a72ddfafcf02bcc4b6738ff31c74be79ed5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 29 Oct 2021 02:22:22 +0100 Subject: [PATCH 01/93] Better reduction for GPUs --- Grid/lattice/Lattice_reduction_gpu.h | 48 ++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c2875052..823e497e 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -23,7 +23,7 @@ unsigned int nextPow2(Iterator x) { } template -void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) { +int getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) { int device; #ifdef GRID_CUDA @@ -37,13 +37,13 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock; Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock; Iterator multiProcessorCount = gpu_props[device].multiProcessorCount; - + /* std::cout << GridLogDebug << "GPU has:" << std::endl; std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl; std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl; std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl; std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl; - + */ if (warpSize != WARP_SIZE) { std::cout << GridLogError << "The warp size of the GPU in use does not match the warp size set when compiling Grid." << std::endl; exit(EXIT_FAILURE); @@ -53,12 +53,12 @@ void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator threads = warpSize; if ( threads*sizeofsobj > sharedMemPerBlock ) { std::cout << GridLogError << "The object is too large for the shared memory." << std::endl; - exit(EXIT_FAILURE); + return 0; } while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2; // keep all the streaming multiprocessors busy blocks = nextPow2(multiProcessorCount); - + return 1; } template @@ -198,7 +198,7 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { // Possibly promote to double and sum ///////////////////////////////////////////////////////////////////////////////////////////////////////// template -inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) +inline typename vobj::scalar_objectD sumD_gpu_internal(const vobj *lat, Integer osites) { typedef typename vobj::scalar_objectD sobj; typedef decltype(lat) Iterator; @@ -208,6 +208,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) Integer numThreads, numBlocks; getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); + Integer smemSize = numThreads * sizeof(sobj); Vector buffer(numBlocks); @@ -218,6 +219,41 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) auto result = buffer_v[0]; return result; } +template +inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) +{ + typedef typename vobj::vector_type vector; + typedef typename vobj::scalar_typeD scalarD; + typedef typename vobj::scalar_objectD sobj; + sobj ret; + scalarD *ret_p = (scalarD *)&ret; + + const int words = sizeof(vobj)/sizeof(vector); + + Integer nsimd= vobj::Nsimd(); + Integer size = osites*nsimd; + Integer numThreads, numBlocks; + int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); + + if ( ok ) { + ret = sumD_gpu_internal(lat,osites); + } else { + std::cout << GridLogWarning << " dropping to summing word by word for large object size "< buffer(osites); + vector *dat = (vector *)lat; + vector *buf = &buffer[0]; + iScalar *tbuf =(iScalar *) &buffer[0]; + for(int w=0;w Date: Fri, 29 Oct 2021 02:23:08 +0100 Subject: [PATCH 02/93] Verbosity --- Grid/lattice/Lattice_reduction_gpu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index 823e497e..73a704f5 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -238,7 +238,6 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) if ( ok ) { ret = sumD_gpu_internal(lat,osites); } else { - std::cout << GridLogWarning << " dropping to summing word by word for large object size "< buffer(osites); vector *dat = (vector *)lat; vector *buf = &buffer[0]; From 0bd83cdbda5defdde82f48f0c06baef40b3dee29 Mon Sep 17 00:00:00 2001 From: RJHudspith Date: Sun, 28 Nov 2021 21:51:03 +0100 Subject: [PATCH 03/93] Fixes for Nc!=3 Nersc IO, Gauge and Gauge_NCxNC compatible with GLU. Trace normalisation changed in places removing explicit threes. Guards against non-su3 tests and tests failing when LIME is not compiled. --- Grid/Makefile.am | 2 +- Grid/parallelIO/IldgIO.h | 9 +- Grid/parallelIO/MetaData.h | 33 +- Grid/parallelIO/NerscIO.h | 46 ++- Grid/qcd/utils/WilsonLoops.h | 3 +- configure.ac | 2 +- tests/IO/Test_nersc_io.cc | 8 +- tests/core/Test_lie_generators.cc | 376 +++++++++--------- tests/core/Test_reunitarise.cc | 5 +- tests/hmc/Test_hmc_ScalarActionNxN.cc | 4 +- tests/hmc/Test_hmc_WG_Production.cc | 6 +- tests/lanczos/Test_compressed_lanczos.cc | 10 + .../Test_dwf_compressed_lanczos_reorg.cc | 11 +- tests/solver/Test_coarse_even_odd.cc | 12 +- tests/solver/Test_dwf_mrhs_cg.cc | 2 + 15 files changed, 282 insertions(+), 247 deletions(-) diff --git a/Grid/Makefile.am b/Grid/Makefile.am index 7c3c151b..aff5b9d0 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -70,7 +70,7 @@ endif lib_LIBRARIES = libGrid.a CCFILES += $(extra_sources) -HFILES += $(extra_headers) Config.h Version.h +HFILES += $(extra_headers) libGrid_a_SOURCES = $(CCFILES) libGrid_adir = $(includedir)/Grid diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h index 576f08bd..80d135d2 100644 --- a/Grid/parallelIO/IldgIO.h +++ b/Grid/parallelIO/IldgIO.h @@ -31,6 +31,7 @@ directory #include #include #include +#include #include #include @@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter { // Fill ILDG header data struct ////////////////////////////////////////////////////// ildgFormat ildgfmt ; - ildgfmt.field = std::string("su3gauge"); + const std::string stNC = std::to_string( Nc ) ; + ildgfmt.field = std::string("su"+stNC+"gauge"); if ( format == std::string("IEEE32BIG") ) { ildgfmt.precision = 32; @@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader { } else { assert(found_ildgFormat); - assert ( ildgFormat_.field == std::string("su3gauge") ); + const std::string stNC = std::to_string( Nc ) ; + assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") ); /////////////////////////////////////////////////////////////////////////////////////// // Populate our Grid metadata as best we can @@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader { std::ostringstream vers; vers << ildgFormat_.version; FieldMetaData_.hdr_version = vers.str(); - FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); + FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC); FieldMetaData_.nd=4; FieldMetaData_.dimension.resize(4); diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index af8b3f76..6b9d8708 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -6,8 +6,8 @@ Copyright (C) 2015 - Author: Peter Boyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -182,8 +182,8 @@ class GaugeStatistics public: void operator()(Lattice & data,FieldMetaData &header) { - header.link_trace=WilsonLoops::linkTrace(data); - header.plaquette =WilsonLoops::avgPlaquette(data); + header.link_trace = WilsonLoops::linkTrace(data); + header.plaquette = WilsonLoops::avgPlaquette(data); } }; typedef GaugeStatistics PeriodicGaugeStatistics; @@ -203,20 +203,24 @@ template<> inline void PrepareMetaData(Lattice 1 ) ; for(int mu=0;mu using iLorentzColour2x3 = iVector, 2>, Nd >; +template using iLorentzColour2x3 = iVector, Nc-1>, Nd >; typedef iLorentzColour2x3 LorentzColour2x3; typedef iLorentzColour2x3 LorentzColour2x3F; @@ -278,7 +282,6 @@ struct GaugeSimpleMunger{ template struct GaugeSimpleUnmunger { - void operator()(sobj &in, fobj &out) { for (int mu = 0; mu < Nd; mu++) { for (int i = 0; i < Nc; i++) { @@ -317,8 +320,8 @@ template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ for(int mu=0;mu struct Gauge3x2unmunger{ void operator() (sobj &in,fobj &out){ for(int mu=0;mu Author: Peter Boyle Author: paboyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +31,8 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H +#include + NAMESPACE_BEGIN(Grid); using namespace Grid; @@ -145,15 +148,17 @@ public: std::string format(header.floating_point); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); + const int ieee32big = (format == std::string("IEEE32BIG")); + const int ieee32 = (format == std::string("IEEE32")); + const int ieee64big = (format == std::string("IEEE64BIG")); + const int ieee64 = (format == std::string("IEEE64") || \ + format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; // munger is a function of - if ( header.data_type == std::string("4D_SU3_GAUGE") ) { + const std::string stNC = std::to_string( Nc ) ; + if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,Gauge3x2munger(), offset,format, @@ -164,7 +169,7 @@ public: (Umu,file,Gauge3x2munger(),offset,format, nersc_csum,scidac_csuma,scidac_csumb); } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + } else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,GaugeSimpleMunger(),offset,format, @@ -230,6 +235,7 @@ public: header.sequence_number = 1; header.ensemble_id = std::string("UKQCD"); header.ensemble_label = ens_label; + header.hdr_version = "1.0" ; typedef LorentzColourMatrixD fobj3D; typedef LorentzColour2x3D fobj2D; @@ -243,10 +249,14 @@ public: uint64_t offset; - // Sod it -- always write 3x3 double - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - GaugeSimpleUnmunger munge; + // Sod it -- always write NcxNc double + header.floating_point = std::string("IEEE64BIG"); + const std::string stNC = std::to_string( Nc ) ; + if( two_row ) { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE" ); + } else { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC ); + } if ( grid->IsBoss() ) { truncate(file); offset = writeHeader(header,file); @@ -254,8 +264,15 @@ public: grid->Broadcast(0,(void *)&offset,sizeof(offset)); uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, - nersc_csum,scidac_csuma,scidac_csumb); + if( two_row ) { + Gauge3x2unmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } else { + GaugeSimpleUnmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } header.checksum = nersc_csum; if ( grid->IsBoss() ) { writeHeader(header,file); @@ -287,8 +304,7 @@ public: header.plaquette=0.0; MachineCharacteristics(header); - uint64_t offset; - + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); header.data_type = std::string("RANLUX48"); @@ -328,7 +344,7 @@ public: GridBase *grid = parallel.Grid(); - uint64_t offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 0367c9fa..e002e3d5 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -125,7 +125,6 @@ public: return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME } - ////////////////////////////////////////////////// // average over all x,y,z the temporal loop ////////////////////////////////////////////////// @@ -165,7 +164,7 @@ public: double vol = Umu.Grid()->gSites(); - return p.real() / vol / 4.0 / 3.0; + return p.real() / vol / (4.0 * Nc ) ; }; ////////////////////////////////////////////////// diff --git a/configure.ac b/configure.ac index 406b0b74..9ab0595a 100644 --- a/configure.ac +++ b/configure.ac @@ -159,7 +159,7 @@ case ${ac_ZMOBIUS} in esac ############### Nc AC_ARG_ENABLE([Nc], - [AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])], + [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])], [ac_Nc=${enable_Nc}], [ac_Nc=3]) case ${ac_Nc} in diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index c15c320e..ae5b9c0d 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -147,7 +147,7 @@ int main (int argc, char ** argv) Complex p = TensorRemove(Tp); std::cout< Author: Peter Boyle Author: Guido Cossu +Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,14 +43,14 @@ directory using namespace std; using namespace Grid; - ; +; int main(int argc, char** argv) { Grid_init(&argc, &argv); std::vector latt({4, 4, 4, 8}); GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid( - latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); + latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); @@ -60,15 +61,19 @@ int main(int argc, char** argv) { << std::endl; SU2::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; + + // guard as this code fails to compile for Nc != 3 +#if (Nc == 3) + SU2Adjoint::printGenerators(); SU2::testGenerators(); SU2Adjoint::testGenerators(); - + std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; SU3::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; SU3Adjoint::printGenerators(); @@ -111,12 +116,10 @@ int main(int argc, char** argv) { // AdjointRepresentation has the predefined number of colours Nc // Representations RepresentationTypes(grid); - - LatticeGaugeField U(grid), V(grid); SU3::HotConfiguration(gridRNG, U); SU3::HotConfiguration(gridRNG, V); - + // Adjoint representation // Test group structure // (U_f * V_f)_r = U_r * V_r @@ -127,17 +130,17 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu = peekLorentz(V,mu); pokeLorentz(UV,Umu*Vmu, mu); } - + AdjRep.update_representation(UV); typename AdjointRep::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r - - + + AdjRep.update_representation(U); typename AdjointRep::LatticeField Ur = AdjRep.U; // U_r - + AdjRep.update_representation(V); typename AdjointRep::LatticeField Vr = AdjRep.U; // V_r - + typename AdjointRep::LatticeField UrVr(grid); UrVr = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -145,10 +148,10 @@ int main(int argc, char** argv) { typename AdjointRep::LatticeMatrix Vrmu = peekLorentz(Vr,mu); pokeLorentz(UrVr,Urmu*Vrmu, mu); } - + typename AdjointRep::LatticeField Diff_check = UVr - UrVr; std::cout << GridLogMessage << "Group structure SU("<::AdjointLieAlgebraMatrix(h_adj,Ar); - + // Re-extract h_adj SU3::LatticeAlgebraVector h_adj2(grid); SU_Adjoint::projectOnAlgebra(h_adj2, Ar); SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2; std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl; - + // Exponentiate typename AdjointRep::LatticeMatrix Uadj(grid); Uadj = expMat(Ar, 1.0, 16); - + typename AdjointRep::LatticeMatrix uno(grid); uno = 1.0; // Check matrix Uadj, must be real orthogonal typename AdjointRep::LatticeMatrix Ucheck = Uadj - conjugate(Uadj); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) - << std::endl; - + << std::endl; + Ucheck = Uadj * adj(Uadj) - uno; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck) - << std::endl; + << std::endl; Ucheck = adj(Uadj) * Uadj - uno; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck) - << std::endl; - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af(grid); SU3::FundamentalLieAlgebraMatrix(h_adj,Af); @@ -193,72 +195,65 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck(grid); UnitCheck = Ufund * adj(Ufund) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck) - << std::endl; + << std::endl; UnitCheck = adj(Ufund) * Ufund - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck) - << std::endl; - + << std::endl; // Tranform to the adjoint representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields - + AdjRep.update_representation(U); Ur = AdjRep.U; // U_r typename AdjointRep::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj - + typename AdjointRep::LatticeMatrix Diff_check_mat = Ur0 - Uadj; std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl; - - - // TwoIndexRep tests - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printGenerators(); - std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::testGenerators(); std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printGenerators(); std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::testGenerators(); - - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); random(gridRNG,Gauss2); @@ -276,13 +271,13 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2 = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); random(gridRNG,Gauss2a); @@ -300,11 +295,11 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2a = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure" - << std::endl; + << std::endl; // Testing HMC representation classes TwoIndexRep< Nc, Symmetric > TIndexRep(grid); @@ -313,7 +308,7 @@ int main(int argc, char** argv) { LatticeGaugeField U2(grid), V2(grid); SU3::HotConfiguration(gridRNG, U2); SU3::HotConfiguration(gridRNG, V2); - + LatticeGaugeField UV2(grid); UV2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -321,16 +316,16 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu); pokeLorentz(UV2,Umu2*Vmu2, mu); } - + TIndexRep.update_representation(UV2); typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r - + TIndexRep.update_representation(U2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r - + TIndexRep.update_representation(V2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid); Ur2Vr2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -338,11 +333,11 @@ int main(int argc, char** argv) { typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu); pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu); } - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2; std::cout << GridLogMessage << "Group structure SU("<::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym); - + // Re-extract h_sym SU3::LatticeAlgebraVector h_sym2(grid); SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym); SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; - - + // Exponentiate typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid); U2iS = expMat(Ar_sym, 1.0, 16); - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid); uno2iS = 1.0; // Check matrix U2iS, must be real orthogonal typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS) - << std::endl; - + << std::endl; + Ucheck2iS = U2iS * adj(U2iS) - uno2iS; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS) - << std::endl; + << std::endl; Ucheck2iS = adj(U2iS) * U2iS - uno2iS; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS) - << std::endl; - - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af_sym(grid); SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); @@ -386,147 +378,137 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck2(grid); UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2) - << std::endl; + << std::endl; UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2) - << std::endl; - - + << std::endl; + + // Tranform to the 2Index Sym representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields - + TIndexRep.update_representation(U); Ur2 = TIndexRep.U; // U_r typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS; std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl; - - - - + if (TwoIndexRep::Dimension != 1){ - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; - std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" - << std::endl; - // Testing HMC representation classes - TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); + std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" + << std::endl; + // Testing HMC representation classes + TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); - // Test group structure - // (U_f * V_f)_r = U_r * V_r - LatticeGaugeField U2A(grid), V2A(grid); - SU3::HotConfiguration(gridRNG, U2A); - SU3::HotConfiguration(gridRNG, V2A); + // Test group structure + // (U_f * V_f)_r = U_r * V_r + LatticeGaugeField U2A(grid), V2A(grid); + SU3::HotConfiguration(gridRNG, U2A); + SU3::HotConfiguration(gridRNG, V2A); - LatticeGaugeField UV2A(grid); - UV2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); - SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); - pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + LatticeGaugeField UV2A(grid); + UV2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); + SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); + pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + } + + TIndexRep.update_representation(UV2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r + + TIndexRep.update_representation(U2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r + + TIndexRep.update_representation(V2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); + Ur2Vr2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); + pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); + } + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; + std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); + random(gridRNG,h_Asym); + h_Asym = real(h_Asym); + SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); + + // Re-extract h_sym + SU3::LatticeAlgebraVector h_Asym2(grid); + SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); + SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; + std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; + + + // Exponentiate + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); + U2iAS = expMat(Ar_Asym, 1.0, 16); + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); + uno2iAS = 1.0; + // Check matrix U2iS, must be real orthogonal + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); + std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) + << std::endl; + + Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) + << std::endl; + Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) + << std::endl; + + + + // Construct the fundamental matrix in the group + SU3::LatticeMatrix Af_Asym(grid); + SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); + SU3::LatticeMatrix Ufund2A(grid); + Ufund2A = expMat(Af_Asym, 1.0, 16); + SU3::LatticeMatrix UnitCheck2A(grid); + UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; + std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) + << std::endl; + UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; + std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) + << std::endl; + + + // Tranform to the 2Index Sym representation + U = Zero(); // fill this with only one direction + pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields + + TIndexRepA.update_representation(U); + Ur2A = TIndexRepA.U; // U_r + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; + std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; + + } else { + std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " + "because representation is trivial (dim = 1)" + << std::endl; } - - TIndexRep.update_representation(UV2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r - - TIndexRep.update_representation(U2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r - - TIndexRep.update_representation(V2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); - Ur2Vr2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); - pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); - } - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; - std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); - random(gridRNG,h_Asym); - h_Asym = real(h_Asym); - SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); - - // Re-extract h_sym - SU3::LatticeAlgebraVector h_Asym2(grid); - SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); - SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; - std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; - - - // Exponentiate - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); - U2iAS = expMat(Ar_Asym, 1.0, 16); - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); - uno2iAS = 1.0; - // Check matrix U2iS, must be real orthogonal - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); - std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) - << std::endl; - - Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) - << std::endl; - Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) - << std::endl; - - - - // Construct the fundamental matrix in the group - SU3::LatticeMatrix Af_Asym(grid); - SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); - SU3::LatticeMatrix Ufund2A(grid); - Ufund2A = expMat(Af_Asym, 1.0, 16); - SU3::LatticeMatrix UnitCheck2A(grid); - UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; - std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) - << std::endl; - UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; - std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) - << std::endl; - - - // Tranform to the 2Index Sym representation - U = Zero(); // fill this with only one direction - pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields - - TIndexRepA.update_representation(U); - Ur2A = TIndexRepA.U; // U_r - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; - std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; - -} else { - std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " - "because representation is trivial (dim = 1)" - << std::endl; -} - - - - - - - - +#endif Grid_finalize(); } diff --git a/tests/core/Test_reunitarise.cc b/tests/core/Test_reunitarise.cc index 6644be1a..10423159 100644 --- a/tests/core/Test_reunitarise.cc +++ b/tests/core/Test_reunitarise.cc @@ -122,14 +122,15 @@ int main (int argc, char ** argv) std::cout << "Determinant defect before projection " < using namespace std; using namespace Grid; +#ifdef HAVE_LIME + template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos { @@ -249,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif // HAVE_LIME_H diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index ecd0e629..669a7b6d 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -36,7 +36,8 @@ Author: Peter Boyle using namespace std; using namespace Grid; - ; + +#ifdef HAVE_LIME template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos @@ -250,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif diff --git a/tests/solver/Test_coarse_even_odd.cc b/tests/solver/Test_coarse_even_odd.cc index dfbab747..c7127121 100644 --- a/tests/solver/Test_coarse_even_odd.cc +++ b/tests/solver/Test_coarse_even_odd.cc @@ -93,8 +93,16 @@ int main(int argc, char** argv) { // Setup of Dirac Matrix and Operator // ///////////////////////////////////////////////////////////////////////////// - LatticeGaugeField Umu(Grid_f); SU3::HotConfiguration(pRNG_f, Umu); - + LatticeGaugeField Umu(Grid_f); +#if (Nc==2) + SU2::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==3) + SU3::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==4) + SU4::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==5) + SU5::HotConfiguration(pRNG_f, Umu); +#endif RealD checkTolerance = (getPrecision::value == 1) ? 1e-7 : 1e-15; RealD mass = -0.30; diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index b912ba4f..4242f64b 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -34,6 +34,7 @@ using namespace Grid; int main (int argc, char ** argv) { +#ifdef HAVE_LIME typedef typename DomainWallFermionR::FermionField FermionField; typedef typename DomainWallFermionR::ComplexField ComplexField; typename DomainWallFermionR::ImplParams params; @@ -237,4 +238,5 @@ int main (int argc, char ** argv) } Grid_finalize(); +#endif // HAVE_LIME } From 2851870d70ae40ad540f790433f798d6160aefdc Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 00:05:43 +0100 Subject: [PATCH 04/93] expClover support via helpers template class --- Grid/qcd/action/fermion/CloverHelpers.h | 371 ++++++++++++++++++ .../fermion/CompactWilsonCloverFermion.h | 3 +- Grid/qcd/action/fermion/Fermion.h | 72 ++-- Grid/qcd/action/fermion/WilsonCloverFermion.h | 3 +- Grid/qcd/action/fermion/WilsonCloverHelpers.h | 2 + ...CompactWilsonCloverFermionImplementation.h | 114 +++--- .../WilsonCloverFermionImplementation.h | 138 +++---- ...WilsonCloverFermionInstantiation.cc.master | 5 +- ...WilsonCloverFermionInstantiation.cc.master | 7 +- 9 files changed, 562 insertions(+), 153 deletions(-) create mode 100644 Grid/qcd/action/fermion/CloverHelpers.h diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h new file mode 100644 index 00000000..432bdf3c --- /dev/null +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -0,0 +1,371 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h + + Copyright (C) 2017 - 2022 + + Author: paboyle + Author: Daniel Richtmann + Author: Mattia Bruno + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#pragma once + +#include +#include +#include + +//////////////////////////////////////////// +// Standard Clover +// (4+m0) + csw * clover_term +// Exp Clover +// (4+m0) * exp(csw/(4+m0) clover_term) +// = (4+m0) + csw * clover_term + ... +//////////////////////////////////////////// + +NAMESPACE_BEGIN(Grid); + + +////////////////////////////////// +// Generic Standard Clover +////////////////////////////////// + +template +class CloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + + static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { + GridBase *grid = CloverTerm.Grid(); + CloverTerm += diag_mass; + + int lvol = grid->lSites(); + int DimRep = Impl::Dimension; + { + autoView(CTv,CloverTerm,CpuRead); + autoView(CTIv,CloverTermInv,CpuWrite); + thread_for(site, lvol, { + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); + peekLocalSite(Qx, CTv, lcoor); + //if (csw!=0){ + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + + EigenInvCloverOp = EigenCloverOp.inverse(); + //std::cout << EigenInvCloverOp << std::endl; + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; + // } + pokeLocalSite(Qxinv, CTIv, lcoor); + }); + } + } + + + static void CloverTermDerivative(GaugeField& clover_force, + const FermionField& X, + const FermionField& Y, + const std::vector& U, + RealD csw_t, RealD csw_r) { + GaugeLinkField force_mu(clover_force.Grid()), lambda(clover_force.Grid()); + PropagatorField Lambda(clover_force.Grid()); + + /////////////////////////////////////////////////////////// + // Clover term derivative + /////////////////////////////////////////////////////////// + Impl::outerProductImpl(Lambda, X, Y); + //std::cout << "Lambda:" << Lambda << std::endl; + + Gamma::Algebra sigma[] = { + Gamma::Algebra::SigmaXY, + Gamma::Algebra::SigmaXZ, + Gamma::Algebra::SigmaXT, + Gamma::Algebra::MinusSigmaXY, + Gamma::Algebra::SigmaYZ, + Gamma::Algebra::SigmaYT, + Gamma::Algebra::MinusSigmaXZ, + Gamma::Algebra::MinusSigmaYZ, + Gamma::Algebra::SigmaZT, + Gamma::Algebra::MinusSigmaXT, + Gamma::Algebra::MinusSigmaYT, + Gamma::Algebra::MinusSigmaZT}; + + /* + sigma_{\mu \nu}= + | 0 sigma[0] sigma[1] sigma[2] | + | sigma[3] 0 sigma[4] sigma[5] | + | sigma[6] sigma[7] 0 sigma[8] | + | sigma[9] sigma[10] sigma[11] 0 | + */ + + int count = 0; + clover_force = Zero(); + for (int mu = 0; mu < 4; mu++) + { + force_mu = Zero(); + for (int nu = 0; nu < 4; nu++) + { + if (mu == nu) + continue; + + RealD factor; + if (nu == 4 || mu == 4) + { + factor = 2.0 * csw_t; + } + else + { + factor = 2.0 * csw_r; + } + PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked + Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok + force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + count++; + } + + pokeLorentz(clover_force, U[mu] * force_mu, mu); + } + } +}; + + +////////////////////////////////// +// Generic Exp Clover +////////////////////////////////// + +template +class ExpCloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef WilsonCloverHelpers Helpers; + + static void plusIdentity(const CloverField& in) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; saprec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Clover.Grid(); + CloverField ExpClover(grid); + + int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); + + // csw/(diag_mass) * clover + Clover *= (1.0/diag_mass); + + ExpClover = Clover; + plusIdentity(ExpClover); // 1 + Clover + + // Taylor expansion, slow but generic + RealD pref = 1.0; + for (int i=2; i<=NMAX; i++) { + Clover = Clover * Clover; + pref /= (RealD)(i); + ExpClover += pref * Clover; + } + + // Convert the data layout of the clover terms + Clover = ExpClover * diag_mass; + CloverInv = adj(ExpClover * (1.0/diag_mass)); + } + + static void CloverTermDerivative(GaugeField& clover_force, const FermionField& X, const FermionField& Y, + const std::vector& U, RealD csw_t, RealD csw_r) { + assert(0); + } +}; + + +////////////////////////////////// +// Compact Standard Clover +////////////////////////////////// + + +template +class CompactCloverHelpers: public CompactWilsonCloverHelpers, + public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + Clover += diag_mass; + } + + static void Instantiate(CloverDiagonalField& Diagonal, + CloverTriangleField& Triangle, + CloverDiagonalField& DiagonalInv, + CloverTriangleField& TriangleInv, + RealD csw_t, RealD diag_mass) { + // Invert the clover term in the improved layout + CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + } + + // TODO: implement Cmunu for better performances with compact layout, but don't do it + // here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); + } +}; + +////////////////////////////////// +// Compact Exp Clover +////////////////////////////////// + + +template +class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void plusIdentity(const CloverField& in) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; saprec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + // csw/(diag_mass) * clover + Clover = Clover * (1.0/diag_mass); + } + + static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, + RealD csw_t, RealD diag_mass) { + GridBase* grid = Diagonal.Grid(); + int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); + // To be optimized: too much memory traffic; implement exp in improved layout + // Felix + Fabian: replace code below with + // + // ConvertLayout Clover -> Diagonal, Triangle + // ModifyBoundaries + // EvaluateExp + + // code to be replaced + CloverField Clover(grid), ExpClover(grid); + + CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); + + ExpClover = Clover; + plusIdentity(ExpClover); // 1 + Clover + + RealD pref = 1.0; + for (int i=2; i<=NMAX; i++) { + Clover = Clover * Clover; + pref /= (RealD)(i); + ExpClover += pref * Clover; + } + + // Convert the data layout of the clover terms + CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); + Diagonal = Diagonal * diag_mass; + Triangle = Triangle * diag_mass; + CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); + DiagonalInv = DiagonalInv*(1.0/diag_mass); + TriangleInv = TriangleInv*(1.0/diag_mass); + } + + + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + assert(0); + } + +}; + + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h index 3a166134..249b20bd 100644 --- a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h @@ -31,6 +31,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid); // + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site // = 84 complex words per site -template +template class CompactWilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers, public CompactWilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 78cf7851..bfbc16b8 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -138,38 +138,62 @@ typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; // Clover fermions -typedef WilsonCloverFermion WilsonCloverFermionR; -typedef WilsonCloverFermion WilsonCloverFermionF; -typedef WilsonCloverFermion WilsonCloverFermionD; +typedef CloverHelpers CloverR; +typedef CloverHelpers CloverF; +typedef CloverHelpers CloverD; -typedef WilsonCloverFermion WilsonCloverAdjFermionR; -typedef WilsonCloverFermion WilsonCloverAdjFermionF; -typedef WilsonCloverFermion WilsonCloverAdjFermionD; +typedef ExpCloverHelpers ExpCloverR; +typedef ExpCloverHelpers ExpCloverF; +typedef ExpCloverHelpers ExpCloverD; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; +typedef WilsonCloverFermion WilsonCloverFermionR; +typedef WilsonCloverFermion WilsonCloverFermionF; +typedef WilsonCloverFermion WilsonCloverFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; +typedef WilsonCloverFermion WilsonExpCloverFermionR; +typedef WilsonCloverFermion WilsonExpCloverFermionF; +typedef WilsonCloverFermion WilsonExpCloverFermionD; + +typedef WilsonCloverFermion WilsonCloverAdjFermionR; +typedef WilsonCloverFermion WilsonCloverAdjFermionF; +typedef WilsonCloverFermion WilsonCloverAdjFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; + +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; // Compact Clover fermions -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; +typedef CompactCloverHelpers CompactCloverR; +typedef CompactCloverHelpers CompactCloverF; +typedef CompactCloverHelpers CompactCloverD; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; +typedef CompactExpCloverHelpers CompactExpCloverR; +typedef CompactExpCloverHelpers CompactExpCloverF; +typedef CompactExpCloverHelpers CompactExpCloverD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionR; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionF; +typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; + +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; +typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index cd19fc10..562f08f4 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -32,6 +32,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid); // csw_r = csw_t to recover the isotropic version ////////////////////////////////////////////////////////////////// -template +template class WilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/WilsonCloverHelpers.h b/Grid/qcd/action/fermion/WilsonCloverHelpers.h index 60f19317..c221d5f0 100644 --- a/Grid/qcd/action/fermion/WilsonCloverHelpers.h +++ b/Grid/qcd/action/fermion/WilsonCloverHelpers.h @@ -209,6 +209,8 @@ public: }; +//////////////////////////////////////////////////////// + template class CompactWilsonCloverHelpers { public: diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 3dfcb133..b42957ac 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -32,17 +32,18 @@ #include #include + NAMESPACE_BEGIN(Grid); -template -CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, - GridCartesian& Fgrid, - GridRedBlackCartesian& Hgrid, - const RealD _mass, - const RealD _csw_r, - const RealD _csw_t, - const RealD _cF, - const WilsonAnisotropyCoefficients& clover_anisotropy, - const ImplParams& impl_p) +template +CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, + GridCartesian& Fgrid, + GridRedBlackCartesian& Hgrid, + const RealD _mass, + const RealD _csw_r, + const RealD _csw_t, + const RealD _cF, + const WilsonAnisotropyCoefficients& clover_anisotropy, + const ImplParams& impl_p) : WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy) , csw_r(_csw_r) , csw_t(_csw_t) @@ -68,40 +69,40 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); } -template -void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { WilsonBase::Dhop(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopOE(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopEO(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { WilsonBase::DhopDir(in, out, dir, disp); if(this->open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { WilsonBase::DhopDirAll(in, out); if(this->open_boundaries) { for(auto& o : out) ApplyBoundaryMask(o); } } -template -void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc Mooee(in, Tmp); @@ -109,8 +110,8 @@ void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& o if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc MooeeDag(in, Tmp); @@ -118,20 +119,20 @@ void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { WilsonBase::Meooe(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { WilsonBase::MeooeDag(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalOdd, TriangleOdd); @@ -144,13 +145,13 @@ void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionFiel if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { Mooee(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); @@ -163,23 +164,23 @@ void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionF if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { MooeeInv(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { DhopDir(in, out, dir, disp); } -template -void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { DhopDirAll(in, out); } -template -void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { +template +void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { assert(!open_boundaries); // TODO check for changes required for open bc // NOTE: code copied from original clover term @@ -251,7 +252,7 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -261,18 +262,18 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi force += clover_force; } -template -void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, +template +void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, FermionField& out, const CloverDiagonalField& diagonal, const CloverTriangleField& triangle) { @@ -285,8 +286,8 @@ void CompactWilsonCloverFermion::MooeeInternal(const FermionField& CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle); } -template -void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { +template +void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { // NOTE: parts copied from original implementation // Import gauge into base class @@ -318,8 +319,9 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; - TmpOriginal += this->diag_mass; - + // Handle mass term based on clover policy + CloverHelpers::MassTerm(TmpOriginal, this->diag_mass); + // Convert the data layout of the clover term double t4 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); @@ -328,9 +330,9 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { double t5 = usecond(); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Invert the clover term in the improved layout + // Instantiate based on clover policy double t6 = usecond(); - CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + CloverHelpers::Instantiate(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); // Fill the remaining clover fields double t7 = usecond(); @@ -353,7 +355,7 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { << ", fill clover = " << (t4 - t3) / 1e6 << ", convert = " << (t5 - t4) / 1e6 << ", boundaries = " << (t6 - t5) / 1e6 - << ", inversions = " << (t7 - t6) / 1e6 + << ", instantiate = " << (t7 - t6) / 1e6 << ", pick cbs = " << (t8 - t7) / 1e6 << ", total = " << (t8 - t0) / 1e6 << std::endl; diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index e4d2e736..2ebf45f8 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -34,8 +34,8 @@ NAMESPACE_BEGIN(Grid); -template -WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, +template +WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, GridCartesian& Fgrid, GridRedBlackCartesian& Hgrid, const RealD _mass, @@ -74,8 +74,8 @@ WilsonCloverFermion::WilsonCloverFermion(GaugeField& } // *NOT* EO -template -void WilsonCloverFermion::M(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::M(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -89,8 +89,8 @@ void WilsonCloverFermion::M(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -104,8 +104,8 @@ void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) +template +void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { double t0 = usecond(); WilsonFermion::ImportGauge(_Umu); @@ -131,47 +131,50 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += Helpers::fillCloverXT(Ex) * csw_t; CloverTerm += Helpers::fillCloverYT(Ey) * csw_t; CloverTerm += Helpers::fillCloverZT(Ez) * csw_t; - CloverTerm += diag_mass; - + double t4 = usecond(); - int lvol = _Umu.Grid()->lSites(); - int DimRep = Impl::Dimension; + CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass); +// CloverTerm += diag_mass; +// +// double t4 = usecond(); +// int lvol = _Umu.Grid()->lSites(); +// int DimRep = Impl::Dimension; +// +// double t5 = usecond(); +// { +// autoView(CTv,CloverTerm,CpuRead); +// autoView(CTIv,CloverTermInv,CpuWrite); +// thread_for(site, lvol, { +// Coordinate lcoor; +// grid->LocalIndexToLocalCoor(site, lcoor); +// Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); +// Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); +// typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); +// peekLocalSite(Qx, CTv, lcoor); +// //if (csw!=0){ +// for (int j = 0; j < Ns; j++) +// for (int k = 0; k < Ns; k++) +// for (int a = 0; a < DimRep; a++) +// for (int b = 0; b < DimRep; b++){ +// auto zz = Qx()(j, k)(a, b); +// EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); +// } +// // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; +// +// EigenInvCloverOp = EigenCloverOp.inverse(); +// //std::cout << EigenInvCloverOp << std::endl; +// for (int j = 0; j < Ns; j++) +// for (int k = 0; k < Ns; k++) +// for (int a = 0; a < DimRep; a++) +// for (int b = 0; b < DimRep; b++) +// Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); +// // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; +// // } +// pokeLocalSite(Qxinv, CTIv, lcoor); +// }); +// } double t5 = usecond(); - { - autoView(CTv,CloverTerm,CpuRead); - autoView(CTIv,CloverTermInv,CpuWrite); - thread_for(site, lvol, { - Coordinate lcoor; - grid->LocalIndexToLocalCoor(site, lcoor); - Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); - peekLocalSite(Qx, CTv, lcoor); - //if (csw!=0){ - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++){ - auto zz = Qx()(j, k)(a, b); - EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); - } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } - pokeLocalSite(Qxinv, CTIv, lcoor); - }); - } - - double t6 = usecond(); // Separate the even and odd parts pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); @@ -184,7 +187,7 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); - double t7 = usecond(); + double t6 = usecond(); #if 0 std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" @@ -192,40 +195,39 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) << ", allocations = " << (t2 - t1) / 1e6 << ", field strength = " << (t3 - t2) / 1e6 << ", fill clover = " << (t4 - t3) / 1e6 - << ", misc = " << (t5 - t4) / 1e6 - << ", inversions = " << (t6 - t5) / 1e6 - << ", pick cbs = " << (t7 - t6) / 1e6 - << ", total = " << (t7 - t0) / 1e6 + << ", instantiation = " << (t5 - t4) / 1e6 + << ", pick cbs = " << (t6 - t5) / 1e6 + << ", total = " << (t6 - t0) / 1e6 << std::endl; #endif } -template -void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseNo); } -template -void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseNo); } -template -void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseYes); } -template -void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseYes); } -template -void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) +template +void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) { out.Checkerboard() = in.Checkerboard(); CloverField *Clover; @@ -278,8 +280,8 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } // MooeeInternal // Derivative parts unpreconditioned pseudofermions -template -void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) { conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), force.Grid()); @@ -349,7 +351,7 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -360,15 +362,15 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } // Derivative parts -template -void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { assert(0); } // Derivative parts -template -void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +template +void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { assert(0); // not implemented yet } diff --git a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master index 7c91c252..1ea1549c 100644 --- a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master @@ -9,6 +9,7 @@ Author: paboyle Author: Guido Cossu Author: Daniel Richtmann + Author: Mattia Bruno This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,10 +33,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class CompactWilsonCloverFermion; +template class CompactWilsonCloverFermion>; +template class CompactWilsonCloverFermion>; NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master index af99dfb6..2cea3656 100644 --- a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master @@ -8,7 +8,8 @@ Author: paboyle Author: Guido Cossu - + Author: Mattia Bruno + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -31,10 +32,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class WilsonCloverFermion; +template class WilsonCloverFermion>; +template class WilsonCloverFermion>; NAMESPACE_END(Grid); From 3d44aa9cb9ed629fc227822a252d5037d518a5ef Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 01:10:19 +0100 Subject: [PATCH 05/93] cleaned up cloverhelpers; fixed test compact_clover which runs --- Grid/qcd/action/fermion/CloverHelpers.h | 85 +++---------------- .../WilsonCloverFermionImplementation.h | 39 --------- .../Test_compact_wilson_clover_speedup.cc | 4 +- 3 files changed, 12 insertions(+), 116 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 432bdf3c..af712852 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -98,71 +98,10 @@ public: } } - - static void CloverTermDerivative(GaugeField& clover_force, - const FermionField& X, - const FermionField& Y, - const std::vector& U, - RealD csw_t, RealD csw_r) { - GaugeLinkField force_mu(clover_force.Grid()), lambda(clover_force.Grid()); - PropagatorField Lambda(clover_force.Grid()); - - /////////////////////////////////////////////////////////// - // Clover term derivative - /////////////////////////////////////////////////////////// - Impl::outerProductImpl(Lambda, X, Y); - //std::cout << "Lambda:" << Lambda << std::endl; - - Gamma::Algebra sigma[] = { - Gamma::Algebra::SigmaXY, - Gamma::Algebra::SigmaXZ, - Gamma::Algebra::SigmaXT, - Gamma::Algebra::MinusSigmaXY, - Gamma::Algebra::SigmaYZ, - Gamma::Algebra::SigmaYT, - Gamma::Algebra::MinusSigmaXZ, - Gamma::Algebra::MinusSigmaYZ, - Gamma::Algebra::SigmaZT, - Gamma::Algebra::MinusSigmaXT, - Gamma::Algebra::MinusSigmaYT, - Gamma::Algebra::MinusSigmaZT}; - - /* - sigma_{\mu \nu}= - | 0 sigma[0] sigma[1] sigma[2] | - | sigma[3] 0 sigma[4] sigma[5] | - | sigma[6] sigma[7] 0 sigma[8] | - | sigma[9] sigma[10] sigma[11] 0 | - */ - - int count = 0; - clover_force = Zero(); - for (int mu = 0; mu < 4; mu++) - { - force_mu = Zero(); - for (int nu = 0; nu < 4; nu++) - { - if (mu == nu) - continue; - - RealD factor; - if (nu == 4 || mu == 4) - { - factor = 2.0 * csw_t; - } - else - { - factor = 2.0 * csw_r; - } - PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked - Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked - count++; - } - - pokeLorentz(clover_force, U[mu] * force_mu, mu); - } + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); } + }; @@ -227,15 +166,14 @@ public: ExpClover += pref * Clover; } - // Convert the data layout of the clover terms Clover = ExpClover * diag_mass; CloverInv = adj(ExpClover * (1.0/diag_mass)); } - static void CloverTermDerivative(GaugeField& clover_force, const FermionField& X, const FermionField& Y, - const std::vector& U, RealD csw_t, RealD csw_r) { + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); } + }; @@ -329,14 +267,9 @@ public: RealD csw_t, RealD diag_mass) { GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - // To be optimized: too much memory traffic; implement exp in improved layout - // Felix + Fabian: replace code below with - // - // ConvertLayout Clover -> Diagonal, Triangle - // ModifyBoundaries - // EvaluateExp - // code to be replaced + // To be optimized: implement exp in improved layout + // START: code to be replaced CloverField Clover(grid), ExpClover(grid); CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); @@ -353,9 +286,11 @@ public: // Convert the data layout of the clover terms CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); + CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); + // END: code to be replaced + Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; - CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); DiagonalInv = DiagonalInv*(1.0/diag_mass); TriangleInv = TriangleInv*(1.0/diag_mass); } diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 2ebf45f8..0991e274 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -134,45 +134,6 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Um double t4 = usecond(); CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass); -// CloverTerm += diag_mass; -// -// double t4 = usecond(); -// int lvol = _Umu.Grid()->lSites(); -// int DimRep = Impl::Dimension; -// -// double t5 = usecond(); -// { -// autoView(CTv,CloverTerm,CpuRead); -// autoView(CTIv,CloverTermInv,CpuWrite); -// thread_for(site, lvol, { -// Coordinate lcoor; -// grid->LocalIndexToLocalCoor(site, lcoor); -// Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); -// Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); -// typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); -// peekLocalSite(Qx, CTv, lcoor); -// //if (csw!=0){ -// for (int j = 0; j < Ns; j++) -// for (int k = 0; k < Ns; k++) -// for (int a = 0; a < DimRep; a++) -// for (int b = 0; b < DimRep; b++){ -// auto zz = Qx()(j, k)(a, b); -// EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); -// } -// // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; -// -// EigenInvCloverOp = EigenCloverOp.inverse(); -// //std::cout << EigenInvCloverOp << std::endl; -// for (int j = 0; j < Ns; j++) -// for (int k = 0; k < Ns; k++) -// for (int a = 0; a < DimRep; a++) -// for (int b = 0; b < DimRep; b++) -// Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); -// // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; -// // } -// pokeLocalSite(Qxinv, CTIv, lcoor); -// }); -// } double t5 = usecond(); // Separate the even and odd parts diff --git a/tests/core/Test_compact_wilson_clover_speedup.cc b/tests/core/Test_compact_wilson_clover_speedup.cc index 83e3da1f..7a74ab19 100644 --- a/tests/core/Test_compact_wilson_clover_speedup.cc +++ b/tests/core/Test_compact_wilson_clover_speedup.cc @@ -117,8 +117,8 @@ void runBenchmark(int* argc, char*** argv) { // type definitions typedef WilsonImpl WImpl; - typedef WilsonCloverFermion WilsonCloverOperator; - typedef CompactWilsonCloverFermion CompactWilsonCloverOperator; + typedef WilsonCloverFermion> WilsonCloverOperator; + typedef CompactWilsonCloverFermion> CompactWilsonCloverOperator; typedef typename WilsonCloverOperator::FermionField Fermion; typedef typename WilsonCloverOperator::GaugeField Gauge; From 11437930c56f86be3ee80d893a2d789d9c2f7c4f Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Tue, 22 Feb 2022 10:45:16 +0100 Subject: [PATCH 06/93] cleaned up definitions of wilsonclover fermions --- Grid/qcd/action/fermion/Fermion.h | 78 ++++++++++++++----------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index bfbc16b8..223ff9dd 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -138,62 +138,52 @@ typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; // Clover fermions -typedef CloverHelpers CloverR; -typedef CloverHelpers CloverF; -typedef CloverHelpers CloverD; +template using WilsonClover = WilsonCloverFermion>; +template using WilsonExpClover = WilsonCloverFermion>; -typedef ExpCloverHelpers ExpCloverR; -typedef ExpCloverHelpers ExpCloverF; -typedef ExpCloverHelpers ExpCloverD; +typedef WilsonClover WilsonCloverFermionR; +typedef WilsonClover WilsonCloverFermionF; +typedef WilsonClover WilsonCloverFermionD; -typedef WilsonCloverFermion WilsonCloverFermionR; -typedef WilsonCloverFermion WilsonCloverFermionF; -typedef WilsonCloverFermion WilsonCloverFermionD; +typedef WilsonExpClover WilsonExpCloverFermionR; +typedef WilsonExpClover WilsonExpCloverFermionF; +typedef WilsonExpClover WilsonExpCloverFermionD; -typedef WilsonCloverFermion WilsonExpCloverFermionR; -typedef WilsonCloverFermion WilsonExpCloverFermionF; -typedef WilsonCloverFermion WilsonExpCloverFermionD; +typedef WilsonClover WilsonCloverAdjFermionR; +typedef WilsonClover WilsonCloverAdjFermionF; +typedef WilsonClover WilsonCloverAdjFermionD; -typedef WilsonCloverFermion WilsonCloverAdjFermionR; -typedef WilsonCloverFermion WilsonCloverAdjFermionF; -typedef WilsonCloverFermion WilsonCloverAdjFermionD; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; - -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionD; // Compact Clover fermions -typedef CompactCloverHelpers CompactCloverR; -typedef CompactCloverHelpers CompactCloverF; -typedef CompactCloverHelpers CompactCloverD; +template using CompactWilsonClover = CompactWilsonCloverFermion>; +template using CompactWilsonExpClover = CompactWilsonCloverFermion>; -typedef CompactExpCloverHelpers CompactExpCloverR; -typedef CompactExpCloverHelpers CompactExpCloverF; -typedef CompactExpCloverHelpers CompactExpCloverD; +typedef CompactWilsonClover CompactWilsonCloverFermionR; +typedef CompactWilsonClover CompactWilsonCloverFermionF; +typedef CompactWilsonClover CompactWilsonCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionR; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionF; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonExpCloverFermionD; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionR; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionF; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; - -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; From 71034f828e76e066d51f67269ce7916c8f6c6cba Mon Sep 17 00:00:00 2001 From: Mattia Bruno <38449948+mbruno46@users.noreply.github.com> Date: Wed, 23 Feb 2022 01:02:27 +0100 Subject: [PATCH 07/93] attempt to fix broken WilsonExpClover; Compact version still broken will be replaced by F.Joswig --- Grid/qcd/action/fermion/CloverHelpers.h | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index af712852..7f7ad2b7 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -119,7 +119,8 @@ public: template using iImplClover = iScalar, Ns>>; typedef WilsonCloverHelpers Helpers; - static void plusIdentity(const CloverField& in) { + // Can this be avoided? + static void IdentityTimesC(const CloverField& in, RealD c) { int DimRep = Impl::Dimension; autoView(in_v, in, AcceleratorWrite); @@ -127,7 +128,7 @@ public: accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++) + cn[i] = cn[i-1] / RealD(i); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * Clover + cn[i]; Clover = ExpClover * diag_mass; - CloverInv = adj(ExpClover * (1.0/diag_mass)); + CloverInv = adj(ExpClover) * (1.0/diag_mass); } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { From 013dc2ef338bfb60d01947df65b6037e5d87412e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:13:47 +0000 Subject: [PATCH 08/93] tests: core tests for wilson clover and wilson exp clover including compact version extended/added --- tests/core/Test_wilson_clover.cc | 214 ++++++++++- tests/core/Test_wilson_exp_clover.cc | 530 +++++++++++++++++++++++++++ 2 files changed, 724 insertions(+), 20 deletions(-) create mode 100644 tests/core/Test_wilson_exp_clover.cc diff --git a/tests/core/Test_wilson_clover.cc b/tests/core/Test_wilson_clover.cc index 642c30a8..8f143070 100644 --- a/tests/core/Test_wilson_clover.cc +++ b/tests/core/Test_wilson_clover.cc @@ -2,11 +2,12 @@ Grid physics library, www.github.com/paboyle/Grid - Source file: ./benchmarks/Benchmark_wilson.cc + Source file: ./tests/core/Test_wilson_clover.cc Copyright (C) 2015 Author: Guido Cossu + Fabian Joswig This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,8 +68,6 @@ int main(int argc, char **argv) tmp = Zero(); FermionField err(&Grid); err = Zero(); - FermionField err2(&Grid); - err2 = Zero(); FermionField phi(&Grid); random(pRNG, phi); FermionField chi(&Grid); @@ -77,6 +76,8 @@ int main(int argc, char **argv) SU::HotConfiguration(pRNG, Umu); std::vector U(4, &Grid); + double tolerance = 1e-4; + double volume = 1; for (int mu = 0; mu < Nd; mu++) { @@ -88,7 +89,7 @@ int main(int argc, char **argv) RealD csw_t = 1.0; WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - //Dwc.ImportGauge(Umu); // not necessary, included in the constructor + CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; @@ -112,7 +113,24 @@ int main(int argc, char **argv) setCheckerboard(r_eo, r_e); err = ref - r_eo; - std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; @@ -152,6 +170,22 @@ int main(int argc, char **argv) std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl; @@ -169,7 +203,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; @@ -188,7 +236,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; @@ -207,7 +269,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; @@ -249,7 +325,7 @@ int main(int argc, char **argv) ///////////////// WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - Dwc_prime.ImportGauge(U_prime); + CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); tmp = Omega * src; pickCheckerboard(Even, src_e, tmp); @@ -262,7 +338,37 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = chi - adj(Omega) * phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "=================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; @@ -272,7 +378,6 @@ int main(int argc, char **argv) phi = Zero(); WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); - Dw.ImportGauge(Umu); Dw.M(src, result); Dwc.M(src, chi); @@ -280,13 +385,24 @@ int main(int argc, char **argv) Dwc_prime.M(Omega * src, phi); WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); - Dw_prime.ImportGauge(U_prime); Dw_prime.M(Omega * src, result2); + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); err = chi - adj(Omega) * phi; - err2 = result - adj(Omega) * result2; - std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; - std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err2) << std::endl; + std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; @@ -296,7 +412,6 @@ int main(int argc, char **argv) phi = Zero(); err = Zero(); WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 - Dwc_csw0.ImportGauge(Umu); pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); @@ -316,7 +431,34 @@ int main(int argc, char **argv) setCheckerboard(src, src_o); err = chi - phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; @@ -348,9 +490,41 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = ref - phi; - std::cout << GridLogMessage << "ref (unpreconditioned operator) diff :" << norm2(ref) << std::endl; - std::cout << GridLogMessage << "phi (EO decomposition) diff :" << norm2(phi) << std::endl; - std::cout << GridLogMessage << "norm diff :" << norm2(err) << std::endl; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); Grid_finalize(); } diff --git a/tests/core/Test_wilson_exp_clover.cc b/tests/core/Test_wilson_exp_clover.cc new file mode 100644 index 00000000..8516d0dc --- /dev/null +++ b/tests/core/Test_wilson_exp_clover.cc @@ -0,0 +1,530 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/core/Test_wilson_exp_clover.cc + + Copyright (C) 2022 + + Author: Guido Cossu + Fabian Joswig + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + +int main(int argc, char **argv) +{ + Grid_init(&argc, &argv); + + auto latt_size = GridDefaultLatt(); + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALF" << sizeof(RealF) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALD" << sizeof(RealD) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REAL" << sizeof(Real) << std::endl; + + std::vector seeds({1, 2, 3, 4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename WilsonExpCloverFermionR::FermionField FermionField; + typename WilsonExpCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + + FermionField src(&Grid); + random(pRNG, src); + FermionField result(&Grid); + result = Zero(); + FermionField result2(&Grid); + result2 = Zero(); + FermionField ref(&Grid); + ref = Zero(); + FermionField tmp(&Grid); + tmp = Zero(); + FermionField err(&Grid); + err = Zero(); + FermionField phi(&Grid); + random(pRNG, phi); + FermionField chi(&Grid); + random(pRNG, chi); + LatticeGaugeField Umu(&Grid); + SU::HotConfiguration(pRNG, Umu); + std::vector U(4, &Grid); + + double tolerance = 1e-4; + + double volume = 1; + for (int mu = 0; mu < Nd; mu++) + { + volume = volume * latt_size[mu]; + } + + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + + WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + FermionField src_e(&RBGrid); + FermionField src_o(&RBGrid); + FermionField r_e(&RBGrid); + FermionField r_o(&RBGrid); + FermionField r_eo(&Grid); + pickCheckerboard(Even, src_e, src); + pickCheckerboard(Odd, src_o, src); + + Dwc.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; + std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + FermionField chi_e(&RBGrid); + FermionField chi_o(&RBGrid); + + FermionField dchi_e(&RBGrid); + FermionField dchi_o(&RBGrid); + + FermionField phi_e(&RBGrid); + FermionField phi_o(&RBGrid); + + FermionField dphi_e(&RBGrid); + FermionField dphi_o(&RBGrid); + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Meooe(chi_e, dchi_o); + Dwc.Meooe(chi_o, dchi_e); + Dwc.MeooeDag(phi_e, dphi_o); + Dwc.MeooeDag(phi_o, dphi_e); + + ComplexD pDce = innerProduct(phi_e, dchi_e); + ComplexD pDco = innerProduct(phi_o, dchi_o); + ComplexD cDpe = innerProduct(chi_e, dphi_e); + ComplexD cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.Mooee(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.Mooee(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInvDag(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + ////////////////////// Gauge Transformation + std::vector seeds2({5, 6, 7, 8}); + GridParallelRNG pRNG2(&Grid); + pRNG2.SeedFixedIntegers(seeds2); + LatticeColourMatrix Omega(&Grid); + LatticeColourMatrix ShiftedOmega(&Grid); + LatticeGaugeField U_prime(&Grid); + U_prime = Zero(); + LatticeColourMatrix U_prime_mu(&Grid); + U_prime_mu = Zero(); + SU::LieRandomize(pRNG2, Omega, 1.0); + for (int mu = 0; mu < Nd; mu++) + { + U[mu] = peekLorentz(Umu, mu); + ShiftedOmega = Cshift(Omega, mu, 1); + U_prime_mu = Omega * U[mu] * adj(ShiftedOmega); + pokeLorentz(U_prime, U_prime_mu, mu); + } + ///////////////// + + WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_prime.Mooee(src_e, phi_e); + Dwc_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "=================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + + WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); + + Dw.M(src, result); + Dwc.M(src, chi); + + Dwc_prime.M(Omega * src, phi); + + WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); + Dw_prime.M(Omega * src, result2); + + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff WilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_csw0.Mooee(src_e, phi_e); + Dwc_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + Dwc.Meooe(src_o, phi_e); + Dwc.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Grid_finalize(); +} From 239e2c1ee6a031be03c67c4b8de16e9538323982 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:26:34 +0000 Subject: [PATCH 09/93] tests: wilson clover cg tests now include compact variant as well as exponential wilson clover operators --- tests/solver/Test_wilsonclover_cg_prec.cc | 27 +++++++++++++++-- tests/solver/Test_wilsonclover_cg_schur.cc | 32 ++++++++++++++++----- tests/solver/Test_wilsonclover_cg_unprec.cc | 27 +++++++++++++++-- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/tests/solver/Test_wilsonclover_cg_prec.cc b/tests/solver/Test_wilsonclover_cg_prec.cc index 5ef2dc09..abf64a1f 100644 --- a/tests/solver/Test_wilsonclover_cg_prec.cc +++ b/tests/solver/Test_wilsonclover_cg_prec.cc @@ -71,7 +71,12 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + // HermitianOperator HermOp(Dw); // ConjugateGradient CG(1.0e-8,10000); @@ -80,12 +85,28 @@ int main (int argc, char ** argv) LatticeFermion src_o(&RBGrid); LatticeFermion result_o(&RBGrid); pickCheckerboard(Odd,src_o,src); - result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO(Dw); + result_o=Zero(); CG(HermOpEO,src_o,result_o); - + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_compact(Dw_compact); + result_o=Zero(); + CG(HermOpEO_compact,src_o,result_o); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp(Dwe); + result_o=Zero(); + CG(HermOpEO_exp,src_o,result_o); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp_compact(Dwe_compact); + result_o=Zero(); + CG(HermOpEO_exp_compact,src_o,result_o); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_schur.cc b/tests/solver/Test_wilsonclover_cg_schur.cc index 567a8283..50d06af7 100644 --- a/tests/solver/Test_wilsonclover_cg_schur.cc +++ b/tests/solver/Test_wilsonclover_cg_schur.cc @@ -60,18 +60,36 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); - LatticeFermion result(&Grid); result=Zero(); - LatticeFermion resid(&Grid); - - RealD mass = -0.1; - RealD csw_r = 1.0; - RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + LatticeFermion result(&Grid); + LatticeFermion resid(&Grid); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); + RealD mass = -0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + RealD cF = 1.0; + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); SchurSolver(Dw,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dw_compact,src,result); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); + SchurSolver(Dwe,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dwe_compact,src,result); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_unprec.cc b/tests/solver/Test_wilsonclover_cg_unprec.cc index 755d80e1..2a859f11 100644 --- a/tests/solver/Test_wilsonclover_cg_unprec.cc +++ b/tests/solver/Test_wilsonclover_cg_unprec.cc @@ -59,7 +59,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); - LatticeFermion result(&Grid); result=Zero(); + LatticeFermion result(&Grid); LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; @@ -70,11 +70,34 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); - MdagMLinearOperator HermOp(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + MdagMLinearOperator HermOp(Dw); + result=Zero(); CG(HermOp,src,result); + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + MdagMLinearOperator HermOp_compact(Dw_compact); + result=Zero(); + CG(HermOp_compact,src,result); + + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp(Dwe); + result=Zero(); + CG(HermOp_exp,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp_compact(Dwe_compact); + result=Zero(); + CG(HermOp_exp_compact,src,result); + Grid_finalize(); } From 438caab25f66918dc5f29bd4a873f52baef2aeeb Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 27 Feb 2022 18:27:18 +0000 Subject: [PATCH 10/93] generate_instantiations.sh now correctly produces instantiations for CompactClover variant, redundant instantiations removed. --- ...ilsonKernelsInstantiationWilsonAdjImplD.cc | 52 +------------------ ...ilsonKernelsInstantiationWilsonAdjImplF.cc | 52 +------------------ .../WilsonKernelsInstantiationWilsonImplD.cc | 52 +------------------ .../WilsonKernelsInstantiationWilsonImplF.cc | 52 +------------------ ...tiationWilsonTwoIndexAntiSymmetricImplD.cc | 52 +------------------ ...tiationWilsonTwoIndexAntiSymmetricImplF.cc | 52 +------------------ ...stantiationWilsonTwoIndexSymmetricImplD.cc | 52 +------------------ ...stantiationWilsonTwoIndexSymmetricImplF.cc | 52 +------------------ .../WilsonKernelsInstantiationZWilsonImplD.cc | 52 +------------------ .../WilsonKernelsInstantiationZWilsonImplF.cc | 52 +------------------ .../instantiation/generate_instantiations.sh | 24 +++++++-- 11 files changed, 29 insertions(+), 515 deletions(-) mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc mode change 100644 => 120000 Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index a09fd420..d6845e75 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \ GparityWilsonImplF \ GparityWilsonImplD " +COMPACT_WILSON_IMPL_LIST=" \ + WilsonImplF \ + WilsonImplD " + DWF_IMPL_LIST=" \ WilsonImplF \ WilsonImplD \ @@ -40,13 +44,23 @@ EOF done -CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" +CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" for impl in $WILSON_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc +done +done + +CC_LIST="CompactWilsonCloverFermionInstantiation" + +for impl in $COMPACT_WILSON_IMPL_LIST +do +for f in $CC_LIST +do + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done @@ -63,14 +77,14 @@ for impl in $DWF_IMPL_LIST $GDWF_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done # overwrite the .cc file in Gparity directories for impl in $GDWF_IMPL_LIST do - ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc + ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc done @@ -84,7 +98,7 @@ for impl in $STAG_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done From 3e882f555dcb75a29e640c6c70fd7e7f23973b73 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 1 Mar 2022 08:54:45 -0500 Subject: [PATCH 11/93] Large / small sumD options --- Grid/lattice/Lattice_reduction.h | 9 +++++ Grid/lattice/Lattice_reduction_gpu.h | 53 ++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 326b9ea3..c3478ab4 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -142,6 +142,15 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) return sumD_cpu(arg,osites); #endif } +template +inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) +{ +#if defined(GRID_CUDA)||defined(GRID_HIP) + return sumD_gpu_large(arg,osites); +#else + return sumD_cpu(arg,osites); +#endif +} template inline typename vobj::scalar_object sum(const Lattice &arg) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index 73a704f5..c685a2c0 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -198,7 +198,7 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { // Possibly promote to double and sum ///////////////////////////////////////////////////////////////////////////////////////////////////////// template -inline typename vobj::scalar_objectD sumD_gpu_internal(const vobj *lat, Integer osites) +inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites) { typedef typename vobj::scalar_objectD sobj; typedef decltype(lat) Iterator; @@ -207,7 +207,8 @@ inline typename vobj::scalar_objectD sumD_gpu_internal(const vobj *lat, Integer Integer size = osites*nsimd; Integer numThreads, numBlocks; - getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); + int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); + assert(ok); Integer smemSize = numThreads * sizeof(sobj); @@ -219,6 +220,37 @@ inline typename vobj::scalar_objectD sumD_gpu_internal(const vobj *lat, Integer auto result = buffer_v[0]; return result; } + +template +inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites) +{ + typedef typename vobj::vector_type vector; + typedef typename vobj::scalar_typeD scalarD; + typedef typename vobj::scalar_objectD sobj; + sobj ret; + scalarD *ret_p = (scalarD *)&ret; + + const int words = sizeof(vobj)/sizeof(vector); + + Integer nsimd= vobj::Nsimd(); + Integer size = osites*nsimd; + Integer numThreads, numBlocks; + + Vector buffer(osites); + vector *dat = (vector *)lat; + vector *buf = &buffer[0]; + iScalar *tbuf =(iScalar *) &buffer[0]; + for(int w=0;w inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) { @@ -236,23 +268,14 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) int ok = getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); if ( ok ) { - ret = sumD_gpu_internal(lat,osites); + ret = sumD_gpu_small(lat,osites); } else { - Vector buffer(osites); - vector *dat = (vector *)lat; - vector *buf = &buffer[0]; - iScalar *tbuf =(iScalar *) &buffer[0]; - for(int w=0;w Date: Tue, 1 Mar 2022 11:17:24 -0500 Subject: [PATCH 12/93] Threaded intranode comms transfer - ideally between NUMA domains --- Grid/threads/Accelerator.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index b427b304..12483185 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -481,9 +481,10 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); #define accelerator_for2d(iter1, num1, iter2, num2, nsimd, ... ) thread_for2d(iter1,num1,iter2,num2,{ __VA_ARGS__ }); accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);} -inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);} + +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ GridThread::bcopy(from,to,bytes);} +inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} inline void acceleratorCopySynchronise(void) {}; inline int acceleratorIsCommunicable(void *ptr){ return 1; } From d4ae71b8806e535145a718a9e1e00a7315aaf5bc Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 2 Mar 2022 15:40:18 +0000 Subject: [PATCH 13/93] sum_gpu_large and sum_gpu templates added. --- Grid/lattice/Lattice_reduction.h | 16 ++++++++++++++++ Grid/lattice/Lattice_reduction_gpu.h | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index c3478ab4..0ddac437 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -168,6 +168,22 @@ inline typename vobj::scalar_object sum(const Lattice &arg) return ssum; } +template +inline typename vobj::scalar_object sum_large(const Lattice &arg) +{ +#if defined(GRID_CUDA)||defined(GRID_HIP) + autoView( arg_v, arg, AcceleratorRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_gpu_large(&arg_v[0],osites); +#else + autoView(arg_v, arg, CpuRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_cpu(&arg_v[0],osites); +#endif + arg.Grid()->GlobalSum(ssum); + return ssum; +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Deterministic Reduction operations //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c685a2c0..c3422af3 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -288,6 +288,14 @@ inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites) return result; } +template +inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites) +{ + typedef typename vobj::scalar_object sobj; + sobj result; + result = sumD_gpu_large(lat,osites); + return result; +} NAMESPACE_END(Grid); From d1decee4cce0b9a9f02bb5521cb06840b20022ad Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 2 Mar 2022 16:54:23 +0000 Subject: [PATCH 14/93] Cleaned up unused variables in Lattice_reduction_gpu.h --- Grid/lattice/Lattice_reduction_gpu.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c3422af3..bad86d2a 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -232,10 +232,6 @@ inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osi const int words = sizeof(vobj)/sizeof(vector); - Integer nsimd= vobj::Nsimd(); - Integer size = osites*nsimd; - Integer numThreads, numBlocks; - Vector buffer(osites); vector *dat = (vector *)lat; vector *buf = &buffer[0]; @@ -258,10 +254,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) typedef typename vobj::scalar_typeD scalarD; typedef typename vobj::scalar_objectD sobj; sobj ret; - scalarD *ret_p = (scalarD *)&ret; - const int words = sizeof(vobj)/sizeof(vector); - Integer nsimd= vobj::Nsimd(); Integer size = osites*nsimd; Integer numThreads, numBlocks; @@ -275,7 +268,6 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) return ret; } - ///////////////////////////////////////////////////////////////////////////////////////////////////////// // Return as same precision as input performing reduction in double precision though ///////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -297,5 +289,4 @@ inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osite return result; } - NAMESPACE_END(Grid); From d5b2323a579ca3b8e95fb6686a0fdf7d151a4503 Mon Sep 17 00:00:00 2001 From: Felix Ziegler Date: Mon, 7 Mar 2022 14:44:24 +0000 Subject: [PATCH 15/93] included Cayley-Hamilton exponentiation for the compact Wilson exp clover, bug fix for inverse of exp clover --- Grid/qcd/action/fermion/CloverHelpers.h | 296 +++++++++++++++++++----- 1 file changed, 240 insertions(+), 56 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 7f7ad2b7..134ee161 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -35,7 +35,7 @@ #include //////////////////////////////////////////// -// Standard Clover +// Standard Clover // (4+m0) + csw * clover_term // Exp Clover // (4+m0) * exp(csw/(4+m0) clover_term) @@ -46,10 +46,10 @@ NAMESPACE_BEGIN(Grid); ////////////////////////////////// -// Generic Standard Clover +// Generic Standard Clover ////////////////////////////////// -template +template class CloverHelpers: public WilsonCloverHelpers { public: @@ -61,7 +61,7 @@ public: static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { GridBase *grid = CloverTerm.Grid(); CloverTerm += diag_mass; - + int lvol = grid->lSites(); int DimRep = Impl::Dimension; { @@ -83,7 +83,7 @@ public: EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); } // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - + EigenInvCloverOp = EigenCloverOp.inverse(); //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) @@ -106,10 +106,10 @@ public: ////////////////////////////////// -// Generic Exp Clover +// Generic Exp Clover ////////////////////////////////// -template +template class ExpCloverHelpers: public WilsonCloverHelpers { public: @@ -122,9 +122,9 @@ public: // Can this be avoided? static void IdentityTimesC(const CloverField& in, RealD c) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa=0; i--) ExpClover = ExpClover * Clover + cn[i]; - + + // prepare inverse + CloverInv = (-1.0)*Clover; + Clover = ExpClover * diag_mass; - CloverInv = adj(ExpClover) * (1.0/diag_mass); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * CloverInv + cn[i]; + + CloverInv = ExpClover * (1.0/diag_mass); + } static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { @@ -182,11 +192,11 @@ public: ////////////////////////////////// -// Compact Standard Clover +// Compact Standard Clover ////////////////////////////////// -template +template class CompactCloverHelpers: public CompactWilsonCloverHelpers, public WilsonCloverHelpers { public: @@ -197,7 +207,7 @@ public: typedef WilsonCloverHelpers Helpers; typedef CompactWilsonCloverHelpers CompactHelpers; - + static void MassTerm(CloverField& Clover, RealD diag_mass) { Clover += diag_mass; } @@ -219,10 +229,9 @@ public: }; ////////////////////////////////// -// Compact Exp Clover +// Compact Exp Clover ////////////////////////////////// - template class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { public: @@ -230,22 +239,28 @@ public: INHERIT_IMPL_TYPES(Impl); INHERIT_CLOVER_TYPES(Impl); INHERIT_COMPACT_CLOVER_TYPES(Impl); - - template using iImplClover = iScalar, Ns>>; + + template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; - + +/* static void plusIdentity(const CloverField& in) { int DimRep = Impl::Dimension; - + autoView(in_v, in, AcceleratorWrite); - + accelerator_for(ss, in.Grid()->oSites(), 1, { for (int sa=0; sa> &t, RealD R) {return getNMAX(1e-12,R);} static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} - - static void MassTerm(CloverField& Clover, RealD diag_mass) { - // csw/(diag_mass) * clover - Clover = Clover * (1.0/diag_mass); - } - - static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + + static void ExponentiateHermitean6by6(const iMatrix &arg, const RealD& alpha, const std::vector& cN, const int Niter, iMatrix& dest){ + + typedef iMatrix mat; + + RealD qn[6]; + RealD qnold[6]; + RealD p[5]; + RealD trA2, trA3, trA4; + + mat A2, A3, A4, A5; + A2 = alpha * alpha * arg * arg; + A3 = alpha * arg * A2; + A4 = A2 * A2; + A5 = A2 * A3; + + trA2 = toReal( trace(A2) ); + trA3 = toReal( trace(A3) ); + trA4 = toReal( trace(A4)); + + p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0; + p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0; + p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2; + p[3] = trA3 / 3.0; + p[4] = 0.5 * trA2; + + qnold[0] = cN[Niter]; + qnold[1] = 0.0; + qnold[2] = 0.0; + qnold[3] = 0.0; + qnold[4] = 0.0; + qnold[5] = 0.0; + + for(int i = Niter-1; i >= 0; i--) + { + qn[0] = p[0] * qnold[5] + cN[i]; + qn[1] = p[1] * qnold[5] + qnold[0]; + qn[2] = p[2] * qnold[5] + qnold[1]; + qn[3] = p[3] * qnold[5] + qnold[2]; + qn[4] = p[4] * qnold[5] + qnold[3]; + qn[5] = qnold[4]; + + qnold[0] = qn[0]; + qnold[1] = qn[1]; + qnold[2] = qn[2]; + qnold[3] = qn[3]; + qnold[4] = qn[4]; + qnold[5] = qn[5]; + } + + mat unit(1.0); + + dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5); + + } + + static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); - - // To be optimized: implement exp in improved layout - // START: code to be replaced - CloverField Clover(grid), ExpClover(grid); - - CompactHelpers::ConvertLayout(Diagonal, Triangle, Clover); - - ExpClover = Clover; - plusIdentity(ExpClover); // 1 + Clover - - RealD pref = 1.0; - for (int i=2; i<=NMAX; i++) { - Clover = Clover * Clover; - pref /= (RealD)(i); - ExpClover += pref * Clover; + + // + // Implementation completely in Daniel's layout + // + + // Taylor expansion with Cayley-Hamilton recursion + // underlying Horner scheme as above + std::vector cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++){ + cn[i] = cn[i-1] / RealD(i); } - - // Convert the data layout of the clover terms - CompactHelpers::ConvertLayout(ExpClover, Diagonal, Triangle); - CompactHelpers::ConvertLayout(adj(ExpClover), DiagonalInv, TriangleInv); - // END: code to be replaced - + + // Taken over from Daniel's implementation + conformable(Diagonal, Triangle); + + long lsites = grid->lSites(); + + typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; + typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; + typedef iMatrix mat; + + autoView(diagonal_v, Diagonal, CpuRead); + autoView(triangle_v, Triangle, CpuRead); + autoView(diagonalExp_v, Diagonal, CpuWrite); + autoView(triangleExp_v, Triangle, CpuWrite); + autoView(diagonalExpInv_v, DiagonalInv, CpuWrite); + autoView(triangleExpInv_v, TriangleInv, CpuWrite); + + thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite + + mat srcCloverOpUL(0.0); // upper left block + mat srcCloverOpLR(0.0); // lower right block + mat ExpCloverOp; + + scalar_object_diagonal diagonal_tmp = Zero(); + scalar_object_diagonal diagonal_exp_tmp = Zero(); + scalar_object_triangle triangle_tmp = Zero(); + scalar_object_triangle triangle_exp_tmp = Zero(); + + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + + peekLocalSite(diagonal_tmp, diagonal_v, lcoor); + peekLocalSite(triangle_tmp, triangle_v, lcoor); + + // block = 0 + int block; + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpUL(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpUL(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + // block = 1 + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpLR(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpLR(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + + // exp(Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); + + // inverse exp(-Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,-1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExpInv_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExpInv_v, lcoor); + }); + Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; + DiagonalInv = DiagonalInv*(1.0/diag_mass); TriangleInv = TriangleInv*(1.0/diag_mass); } - + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); } - + }; From 56c089d347aa7f40cb17b05f12dec7021a356066 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 7 Mar 2022 16:29:19 +0000 Subject: [PATCH 16/93] Removed leftover comments --- Grid/qcd/action/fermion/CloverHelpers.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 134ee161..f7f81421 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -74,7 +74,7 @@ public: Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); peekLocalSite(Qx, CTv, lcoor); - //if (csw!=0){ + for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) @@ -82,17 +82,13 @@ public: auto zz = Qx()(j, k)(a, b); EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) for (int b = 0; b < DimRep; b++) Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } pokeLocalSite(Qxinv, CTIv, lcoor); }); } @@ -153,7 +149,6 @@ public: int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); - // csw/(diag_mass) * clover Clover *= (1.0/diag_mass); // Taylor expansion, slow but generic @@ -243,19 +238,6 @@ public: template using iImplClover = iScalar, Ns>>; typedef CompactWilsonCloverHelpers CompactHelpers; -/* - static void plusIdentity(const CloverField& in) { - int DimRep = Impl::Dimension; - - autoView(in_v, in, AcceleratorWrite); - - accelerator_for(ss, in.Grid()->oSites(), 1, { - for (int sa=0; sa Date: Mon, 7 Mar 2022 17:43:33 +0000 Subject: [PATCH 17/93] Reintroduced explicit inversion of the Clover term in case of the CompactExpClover because of the open boundary O(a) improvement. Changed the timing output to GridLogDebug --- Grid/qcd/action/fermion/CloverHelpers.h | 46 ++----------------- ...CompactWilsonCloverFermionImplementation.h | 41 +++++++++-------- .../WilsonCloverFermionImplementation.h | 19 ++++---- 3 files changed, 34 insertions(+), 72 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index f7f81421..ad456bde 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -207,13 +207,13 @@ public: Clover += diag_mass; } - static void Instantiate(CloverDiagonalField& Diagonal, + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { - // Invert the clover term in the improved layout - CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); + + // Do nothing } // TODO: implement Cmunu for better performances with compact layout, but don't do it @@ -313,7 +313,7 @@ public: } - static void Instantiate(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { @@ -345,8 +345,6 @@ public: autoView(triangle_v, Triangle, CpuRead); autoView(diagonalExp_v, Diagonal, CpuWrite); autoView(triangleExp_v, Triangle, CpuWrite); - autoView(diagonalExpInv_v, DiagonalInv, CpuWrite); - autoView(triangleExpInv_v, TriangleInv, CpuWrite); thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite @@ -421,46 +419,10 @@ public: pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); - - // inverse exp(-Clover) - - ExponentiateHermitean6by6(srcCloverOpUL,-1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 0; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - ExponentiateHermitean6by6(srcCloverOpLR,-1.0/diag_mass,cn,NMAX,ExpCloverOp); - - block = 1; - for(int i = 0; i < 6; i++){ - for(int j = 0; j < 6; j++){ - if (i == j){ - diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); - } - else if(i < j){ - triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); - } - } - } - - pokeLocalSite(diagonal_exp_tmp, diagonalExpInv_v, lcoor); - pokeLocalSite(triangle_exp_tmp, triangleExpInv_v, lcoor); }); Diagonal = Diagonal * diag_mass; Triangle = Triangle * diag_mass; - - DiagonalInv = DiagonalInv*(1.0/diag_mass); - TriangleInv = TriangleInv*(1.0/diag_mass); } diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index b42957ac..5120680b 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -326,16 +326,20 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie double t4 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); - // Possible modify the boundary values + // Exponentiate the clover (nothing happens in case of the standard clover) double t5 = usecond(); + CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + + // Possible modify the boundary values + double t6 = usecond(); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Instantiate based on clover policy - double t6 = usecond(); - CloverHelpers::Instantiate(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + // Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions) + double t7 = usecond(); + CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); // Fill the remaining clover fields - double t7 = usecond(); + double t8 = usecond(); pickCheckerboard(Even, DiagonalEven, Diagonal); pickCheckerboard(Even, TriangleEven, Triangle); pickCheckerboard(Odd, DiagonalOdd, Diagonal); @@ -346,20 +350,19 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); // Report timings - double t8 = usecond(); -#if 0 - std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", convert = " << (t5 - t4) / 1e6 - << ", boundaries = " << (t6 - t5) / 1e6 - << ", instantiate = " << (t7 - t6) / 1e6 - << ", pick cbs = " << (t8 - t7) / 1e6 - << ", total = " << (t8 - t0) / 1e6 - << std::endl; -#endif + double t9 = usecond(); + + std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl; + std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl; } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 0991e274..2a7e7535 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -150,17 +150,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Um pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); double t6 = usecond(); -#if 0 - std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", instantiation = " << (t5 - t4) / 1e6 - << ", pick cbs = " << (t6 - t5) / 1e6 - << ", total = " << (t6 - t0) / 1e6 - << std::endl; -#endif + std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl; } template From 0c0c2b1e20d3290ca2d54fe3a8a9339aabc6d9dd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 8 Mar 2022 09:44:51 +0000 Subject: [PATCH 18/93] Unnecessary arguments of CloverHelpers::Exponentiate_Clover removed. --- Grid/qcd/action/fermion/CloverHelpers.h | 6 +----- .../CompactWilsonCloverFermionImplementation.h | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index ad456bde..cd62f515 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -209,8 +209,6 @@ public: static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, - CloverDiagonalField& DiagonalInv, - CloverTriangleField& TriangleInv, RealD csw_t, RealD diag_mass) { // Do nothing @@ -313,9 +311,7 @@ public: } - static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, - CloverDiagonalField& DiagonalInv, CloverTriangleField& TriangleInv, - RealD csw_t, RealD diag_mass) { + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) { GridBase* grid = Diagonal.Grid(); int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 5120680b..2a53a254 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -328,7 +328,7 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeFie // Exponentiate the clover (nothing happens in case of the standard clover) double t5 = usecond(); - CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, DiagonalInv, TriangleInv, csw_t, this->diag_mass); + CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass); // Possible modify the boundary values double t6 = usecond(); From 92a83a9eb33c54467b29ff557bad4aed90c4f65e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 16 Mar 2022 17:14:36 +0000 Subject: [PATCH 19/93] Performance improve for Tesseract --- Grid/threads/Accelerator.h | 6 +++--- Grid/threads/Threads.h | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index 12483185..389f2cc4 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -482,9 +482,9 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific -inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} -inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ GridThread::bcopy(from,to,bytes);} -inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { GridThread::bcopy(from,to,bytes);} +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);} +inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);} inline void acceleratorCopySynchronise(void) {}; inline int acceleratorIsCommunicable(void *ptr){ return 1; } diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index a9fa13ea..6887134d 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -72,3 +72,20 @@ Author: paboyle #define thread_region DO_PRAGMA(omp parallel) #define thread_critical DO_PRAGMA(omp critical) +#ifdef GRID_OMP +inline void thread_bcopy(void *from, void *to,size_t bytes) +{ + uint64_t *ufrom = (uint64_t *)from; + uint64_t *uto = (uint64_t *)to; + assert(bytes%8==0); + uint64_t words=bytes/8; + thread_for(w,words,{ + uto[w] = ufrom[w]; + }); +} +#else +inline void thread_bcopy(void *from, void *to,size_t bytes) +{ + bcopy(from,to,bytes); +} +#endif From 0542eaf1da28daad4f6e346b78c2c8317e9566fe Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:02:09 +0100 Subject: [PATCH 20/93] First version of conserved current contraction for Wilson type quarks --- .../WilsonFermionImplementation.h | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 84ac25c1..12e65d24 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -603,7 +603,37 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + + PropagatorField tmp_shifted(UGrid); + PropagatorField g5Lg5(UGrid); + PropagatorField R(UGrid); + PropagatorField gmuR(UGrid); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + tmp_shifted=Cshift(q_in_1,mu,1); + g5Lg5=g5*tmp_shifted*g5; + R=q_in_2; + gmuR=gmu*R; + + qout=adj(g5Lg5)*R; + qout+=adj(g5Lg5)*gmuR; + + g5Lg5=g5*q_in_1*g5; + tmp_shifted=Cshift(q_in_2,mu,1); + Impl::multLinkField(R,this->Umu,tmp_shifted,mu); + gmuR=gmu*R; + + qout-=adj(g5Lg5)*R; + qout+=adj(g5Lg5)*gmuR; + + qout/=2; } From cdf31d52c15109102b31f88b135bbaff8f216d30 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:04:35 +0100 Subject: [PATCH 21/93] GaugeGrid and typo fixed --- .../implementation/WilsonFermionImplementation.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 12e65d24..60ccdc2e 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -603,6 +603,7 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); + auto UGrid= this->GaugeGrid(); PropagatorField tmp_shifted(UGrid); PropagatorField g5Lg5(UGrid); @@ -622,18 +623,18 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, R=q_in_2; gmuR=gmu*R; - qout=adj(g5Lg5)*R; - qout+=adj(g5Lg5)*gmuR; + q_out=adj(g5Lg5)*R; + q_out+=adj(g5Lg5)*gmuR; g5Lg5=g5*q_in_1*g5; tmp_shifted=Cshift(q_in_2,mu,1); Impl::multLinkField(R,this->Umu,tmp_shifted,mu); gmuR=gmu*R; - qout-=adj(g5Lg5)*R; - qout+=adj(g5Lg5)*gmuR; + q_out-=adj(g5Lg5)*R; + q_out+=adj(g5Lg5)*gmuR; - qout/=2; + q_out/=2; } From fe993c0836fe6c234e5e34112beecf0a5d98cadd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 31 Mar 2022 17:08:17 +0100 Subject: [PATCH 22/93] /=2 replaced by *=0.5 --- .../action/fermion/implementation/WilsonFermionImplementation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 60ccdc2e..3e7bb291 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -634,7 +634,7 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, q_out-=adj(g5Lg5)*R; q_out+=adj(g5Lg5)*gmuR; - q_out/=2; + q_out*=0.5; } From 603fd967477fd2b5b8ec06dfbcb948c13b46b151 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 10:58:56 +0100 Subject: [PATCH 23/93] Missing link multiplication added. --- .../fermion/implementation/WilsonFermionImplementation.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 3e7bb291..c82dd172 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -619,7 +619,8 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Gamma gmu=Gamma(Gmu[mu]); tmp_shifted=Cshift(q_in_1,mu,1); - g5Lg5=g5*tmp_shifted*g5; + Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu); + g5Lg5=g5*g5Lg5*g5; R=q_in_2; gmuR=gmu*R; From 9e82c468ab8ad6d929e982b1759c0049da7f6254 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 15:54:43 +0100 Subject: [PATCH 24/93] Multiplication of diagonal mass in exponentiate fixed for gpus --- Grid/qcd/action/fermion/CloverHelpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index cd62f515..72727279 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -417,8 +417,8 @@ public: pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); - Diagonal = Diagonal * diag_mass; - Triangle = Triangle * diag_mass; + Diagonal *= diag_mass; + Triangle *= diag_mass; } From 427c8695feaba4de96e865a3754e07f669e6e121 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 16:20:21 +0100 Subject: [PATCH 25/93] Change signs and prefactors for conserved current to mimic the 5d version. --- .../WilsonFermionImplementation.h | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index c82dd172..40e774b3 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -618,24 +618,22 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, }; Gamma gmu=Gamma(Gmu[mu]); + g5Lg5=g5*q_in_1*g5; + tmp_shifted=Cshift(q_in_2,mu,1); + Impl::multLinkField(R,this->Umu,tmp_shifted,mu); + gmuR=gmu*R; + + q_out=adj(g5Lg5)*R; + q_out-=adj(g5Lg5)*gmuR; + tmp_shifted=Cshift(q_in_1,mu,1); Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu); g5Lg5=g5*g5Lg5*g5; R=q_in_2; gmuR=gmu*R; - q_out=adj(g5Lg5)*R; - q_out+=adj(g5Lg5)*gmuR; - - g5Lg5=g5*q_in_1*g5; - tmp_shifted=Cshift(q_in_2,mu,1); - Impl::multLinkField(R,this->Umu,tmp_shifted,mu); - gmuR=gmu*R; - q_out-=adj(g5Lg5)*R; - q_out+=adj(g5Lg5)*gmuR; - - q_out*=0.5; + q_out-=adj(g5Lg5)*gmuR; } From 6577a03d1641df1f94cc53f1accd0d5485e325d0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 1 Apr 2022 18:39:12 +0100 Subject: [PATCH 26/93] Explcitly closed views in Exponentiate_Clover --- Grid/qcd/action/fermion/CloverHelpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 72727279..67417fcd 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -417,6 +417,11 @@ public: pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); + diagonal_v.ViewClose(); + triangle_v.ViewClose(); + diagonalExp_v.ViewClose(); + triangleExp_v.ViewClose(); + Diagonal *= diag_mass; Triangle *= diag_mass; } From f23626a6b80a653c68d2a9d2860dc65c5be98f09 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sat, 2 Apr 2022 11:32:15 +0100 Subject: [PATCH 27/93] End scope by additional block in CloverHelpers.h --- Grid/qcd/action/fermion/CloverHelpers.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index 67417fcd..c72a79e8 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -332,7 +332,7 @@ public: conformable(Diagonal, Triangle); long lsites = grid->lSites(); - + { typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; typedef iMatrix mat; @@ -416,11 +416,7 @@ public: pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); }); - - diagonal_v.ViewClose(); - triangle_v.ViewClose(); - diagonalExp_v.ViewClose(); - triangleExp_v.ViewClose(); + } Diagonal *= diag_mass; Triangle *= diag_mass; From c8a824425bbb5b543d41340d91a78ecc9273011e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 10:58:22 +0100 Subject: [PATCH 28/93] Error message added if another conserved current than vector is requested for Wilson type fermions. --- .../WilsonFermionImplementation.h | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 40e774b3..69a7fb16 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -599,16 +599,22 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Current curr_type, unsigned int mu) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + Gamma g5(Gamma::Algebra::Gamma5); conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); auto UGrid= this->GaugeGrid(); - PropagatorField tmp_shifted(UGrid); - PropagatorField g5Lg5(UGrid); - PropagatorField R(UGrid); - PropagatorField gmuR(UGrid); + PropagatorField tmp_shifted(UGrid); + PropagatorField g5Lg5(UGrid); + PropagatorField R(UGrid); + PropagatorField gmuR(UGrid); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -647,6 +653,12 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, unsigned int tmax, ComplexField &lattice_cmplx) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); assert(0); From d7191e5a026412c6c6c76a1f29961b4cb007506c Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 11:48:56 +0100 Subject: [PATCH 29/93] SeqConservedCurrent implemented for Wilson fermions --- .../WilsonFermionImplementation.h | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 69a7fb16..6bfb9305 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -659,9 +659,45 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, exit(1); } + int tshift = (mu == Nd-1) ? 1 : 0; + unsigned int LLt = GridDefaultLatt()[Tp]; conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + auto UGrid= this->GaugeGrid(); + + PropagatorField tmp(UGrid); + PropagatorField Utmp(UGrid); + PropagatorField L(UGrid); + PropagatorField zz (UGrid); + zz=Zero(); + LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + tmp = Cshift(q_in,mu,1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu); + tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop + tmp = where((lcoor>=tmin),tmp,zz); // Mask the time + q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated + + tmp = q_in *lattice_cmplx; + tmp = Cshift(tmp,mu,-1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link + tmp = -( Utmp + gmu*Utmp ); + // Mask the time + if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice + unsigned int t0 = 0; + tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz); + } else { + tmp = where((lcoor>=tmin+tshift),tmp,zz); + } + q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated } NAMESPACE_END(Grid); From 82aecbf4cfdb76a44c90855292e5c08628e48b62 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 5 Apr 2022 15:26:39 +0100 Subject: [PATCH 30/93] Test_wilson_conserved_current added --- tests/core/Test_wilson_conserved_current.cc | 290 ++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 tests/core/Test_wilson_conserved_current.cc diff --git a/tests/core/Test_wilson_conserved_current.cc b/tests/core/Test_wilson_conserved_current.cc new file mode 100644 index 00000000..3eba4fdc --- /dev/null +++ b/tests/core/Test_wilson_conserved_current.cc @@ -0,0 +1,290 @@ +/************************************************************************************* +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_cayley_cg.cc + +Copyright (C) 2022 + +Author: Peter Boyle +Author: Fabian Joswig + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; + + +template +void TestConserved(What & Ddwf, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5, + What *Ddwfrev=nullptr); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout< omegas; + std::vector < ComplexD > omegasrev(Ls); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplexF::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); + std::vector seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::HotConfiguration(RNG4,Umu); + } + + RealD mass=0.3; + RealD M5 =1.0; + std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} + + + +template +void TestConserved(Action & Ddwf, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5, + Action * Ddwfrev) +{ + LatticePropagator phys_src(UGrid); + LatticePropagator seqsrc(FGrid); + LatticePropagator prop5(FGrid); + LatticePropagator prop5rev(FGrid); + LatticePropagator prop4(UGrid); + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + LatticeComplex PA (UGrid); + LatticeComplex SV (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + LatticePropagator seqprop(UGrid); + + SpinColourMatrix kronecker; kronecker=1.0; + Coordinate coor({0,0,0,0}); + phys_src=Zero(); + pokeSite(kronecker,phys_src,coor); + + ConjugateGradient CG(1.0e-16,100000); + SchurRedBlackDiagTwoSolve schur(CG); + ZeroGuesser zpg; + for(int s=0;s(src4,phys_src,s,c); + + LatticeFermion src5 (FGrid); + Ddwf.ImportPhysicalFermionSource(src4,src5); + + LatticeFermion result5(FGrid); result5=Zero(); + schur(Ddwf,src5,result5,zpg); + std::cout<(prop5,result5,s,c); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(prop4,result4,s,c); + + if( Ddwfrev ) { + Ddwfrev->ImportPhysicalFermionSource(src4,src5); + result5 = Zero(); + schur(*Ddwfrev,src5,result5,zpg); + } + FermToProp(prop5rev,result5,s,c); + } + } + + auto curr = Current::Vector; + const int mu_J=0; + const int t_J=0; + + LatticeComplex ph (UGrid); ph=1.0; + + Ddwf.SeqConservedCurrent(prop5, + seqsrc, + phys_src, + curr, + mu_J, + t_J, + t_J,// whole lattice + ph); + + for(int s=0;s(src5,seqsrc,s,c); + + LatticeFermion result5(FGrid); result5=Zero(); + schur(Ddwf,src5,result5,zpg); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(seqprop,result4,s,c); + } + } + + Gamma g5(Gamma::Algebra::Gamma5); + Gamma gT(Gamma::Algebra::GammaT); + + std::vector sumPA; + std::vector sumSV; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Ddwf.ContractConservedCurrent(prop5rev,prop5,Axial_mu,phys_src,Current::Axial,Tdir); + Ddwf.ContractConservedCurrent(prop5rev,prop5,Vector_mu,phys_src,Current::Vector,Tdir); + Ddwf.ContractJ5q(prop5,PJ5q); + + PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current + SV = trace(Vector_mu); // Scalar-Vector conserved current + VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current + PP = trace(adj(prop4)*prop4); // Pseudoscalar density + + // Spatial sum + sliceSum(PA,sumPA,Tdir); + sliceSum(SV,sumSV,Tdir); + sliceSum(VV,sumVV,Tdir); + sliceSum(PP,sumPP,Tdir); + sliceSum(PJ5q,sumPJ5q,Tdir); + + const int Nt{static_cast(sumPA.size())}; + std::cout< check_buf; + + test_S = trace(qSite*g); + test_V = trace(qSite*g*Gamma::gmu[mu_J]); + + Ddwf.ContractConservedCurrent(prop5rev,prop5,cur,phys_src,curr,mu_J); + + c = trace(cur*g); + sliceSum(c, check_buf, Tp); + check_S = TensorRemove(check_buf[t_J]); + + auto gmu=Gamma::gmu[mu_J]; + c = trace(cur*g*gmu); + sliceSum(c, check_buf, Tp); + check_V = TensorRemove(check_buf[t_J]); + + + std::cout< -#include using namespace std; using namespace Grid; template -void TestConserved(What & Ddwf, +void TestConserved(What & Dw, LatticeGaugeField &Umu, - GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, - RealD mass, RealD M5, - GridParallelRNG *RNG4, - GridParallelRNG *RNG5, - What *Ddwfrev=nullptr); + GridParallelRNG *RNG4); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -57,28 +52,12 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout< omegas; - std::vector < ComplexD > omegasrev(Ls); - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); - GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); - GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - - - GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), - GridDefaultSimd(Nd,vComplexF::Nsimd()), - GridDefaultMpi()); - GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); - GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); - GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); - std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG4(UGrid); std::vector seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4); @@ -95,19 +74,41 @@ int main (int argc, char ** argv) SU::HotConfiguration(RNG4,Umu); } - RealD mass=0.3; - RealD M5 =1.0; - std::cout<(Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + typename WilsonCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; - std::cout<(Dw,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout<(Dwc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dwcc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewcc,Umu,UGrid,UrbGrid,&RNG4); Grid_finalize(); } @@ -115,27 +116,17 @@ int main (int argc, char ** argv) template -void TestConserved(Action & Ddwf, +void TestConserved(Action & Dw, LatticeGaugeField &Umu, - GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, - RealD mass, RealD M5, - GridParallelRNG *RNG4, - GridParallelRNG *RNG5, - Action * Ddwfrev) + GridParallelRNG *RNG4) { LatticePropagator phys_src(UGrid); - LatticePropagator seqsrc(FGrid); - LatticePropagator prop5(FGrid); - LatticePropagator prop5rev(FGrid); + LatticePropagator seqsrc(UGrid); LatticePropagator prop4(UGrid); - LatticePropagator Axial_mu(UGrid); LatticePropagator Vector_mu(UGrid); - LatticeComplex PA (UGrid); LatticeComplex SV (UGrid); LatticeComplex VV (UGrid); - LatticeComplex PJ5q(UGrid); - LatticeComplex PP (UGrid); LatticePropagator seqprop(UGrid); SpinColourMatrix kronecker; kronecker=1.0; @@ -151,25 +142,11 @@ void TestConserved(Action & Ddwf, LatticeFermion src4 (UGrid); PropToFerm(src4,phys_src,s,c); - LatticeFermion src5 (FGrid); - Ddwf.ImportPhysicalFermionSource(src4,src5); - - LatticeFermion result5(FGrid); result5=Zero(); - schur(Ddwf,src5,result5,zpg); - std::cout<(prop5,result5,s,c); - - LatticeFermion result4(UGrid); - Ddwf.ExportPhysicalFermionSolution(result5,result4); + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); + std::cout<(prop4,result4,s,c); - - if( Ddwfrev ) { - Ddwfrev->ImportPhysicalFermionSource(src4,src5); - result5 = Zero(); - schur(*Ddwfrev,src5,result5,zpg); - } - FermToProp(prop5rev,result5,s,c); } } @@ -179,7 +156,7 @@ void TestConserved(Action & Ddwf, LatticeComplex ph (UGrid); ph=1.0; - Ddwf.SeqConservedCurrent(prop5, + Dw.SeqConservedCurrent(prop4, seqsrc, phys_src, curr, @@ -191,14 +168,12 @@ void TestConserved(Action & Ddwf, for(int s=0;s(src5,seqsrc,s,c); + LatticeFermion src4 (UGrid); + PropToFerm(src4,seqsrc,s,c); - LatticeFermion result5(FGrid); result5=Zero(); - schur(Ddwf,src5,result5,zpg); + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); - LatticeFermion result4(UGrid); - Ddwf.ExportPhysicalFermionSolution(result5,result4); FermToProp(seqprop,result4,s,c); } } @@ -206,46 +181,32 @@ void TestConserved(Action & Ddwf, Gamma g5(Gamma::Algebra::Gamma5); Gamma gT(Gamma::Algebra::GammaT); - std::vector sumPA; std::vector sumSV; std::vector sumVV; - std::vector sumPP; - std::vector sumPJ5q; - Ddwf.ContractConservedCurrent(prop5rev,prop5,Axial_mu,phys_src,Current::Axial,Tdir); - Ddwf.ContractConservedCurrent(prop5rev,prop5,Vector_mu,phys_src,Current::Vector,Tdir); - Ddwf.ContractJ5q(prop5,PJ5q); + Dw.ContractConservedCurrent(prop4,prop4,Vector_mu,phys_src,Current::Vector,Tdir); - PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current SV = trace(Vector_mu); // Scalar-Vector conserved current VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current - PP = trace(adj(prop4)*prop4); // Pseudoscalar density // Spatial sum - sliceSum(PA,sumPA,Tdir); sliceSum(SV,sumSV,Tdir); sliceSum(VV,sumVV,Tdir); - sliceSum(PP,sumPP,Tdir); - sliceSum(PJ5q,sumPJ5q,Tdir); - const int Nt{static_cast(sumPA.size())}; + const int Nt{static_cast(sumSV.size())}; + std::cout< &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { GridBase *grid = U[0].Grid(); From 8b12a61097b25f5a3916f587c312336bf77f78ed Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Mon, 9 May 2022 11:53:22 +0100 Subject: [PATCH 33/93] fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am --- Grid/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/Makefile.am b/Grid/Makefile.am index aff5b9d0..7c3c151b 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -70,7 +70,7 @@ endif lib_LIBRARIES = libGrid.a CCFILES += $(extra_sources) -HFILES += $(extra_headers) +HFILES += $(extra_headers) Config.h Version.h libGrid_a_SOURCES = $(CCFILES) libGrid_adir = $(includedir)/Grid From 32facbd02a96595ff3fdb92aaa310bbcd0d356d3 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 10 May 2022 10:53:22 +0100 Subject: [PATCH 34/93] fix: assert for dimensions of compact Wilson clover moved to constructor. --- Grid/qcd/action/fermion/WilsonCloverTypes.h | 2 -- .../implementation/CompactWilsonCloverFermionImplementation.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonCloverTypes.h b/Grid/qcd/action/fermion/WilsonCloverTypes.h index a5a95d93..5ce3b91b 100644 --- a/Grid/qcd/action/fermion/WilsonCloverTypes.h +++ b/Grid/qcd/action/fermion/WilsonCloverTypes.h @@ -47,8 +47,6 @@ class CompactWilsonCloverTypes { public: INHERIT_IMPL_TYPES(Impl); - static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions"); - static constexpr int Nred = Nc * Nhs; // 6 static constexpr int Nblock = Nhs; // 2 static constexpr int Ndiagonal = Nred; // 6 diff --git a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h index 2496f3f1..e4864730 100644 --- a/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h @@ -59,6 +59,8 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(Gaug , BoundaryMask(&Fgrid) , BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid) { + assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3); + csw_r *= 0.5; csw_t *= 0.5; if (clover_anisotropy.isAnisotropic) From b051e00de044bee7a6756b9d51f943fe5189b32d Mon Sep 17 00:00:00 2001 From: James Richings Date: Mon, 16 May 2022 00:25:13 +0100 Subject: [PATCH 35/93] Additional Local Coherance Deflation operator() --- Grid/algorithms/iterative/Deflation.h | 38 ++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index 2eb28bf9..a69d1549 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -113,7 +113,43 @@ public: blockPromote(guess_coarse,guess,subspace); guess.Checkerboard() = src.Checkerboard(); }; -}; + + void operator()(const std::vector &src,std::vector &guess) { + int Nevec = (int)evec_coarse.size(); + int Nsrc = (int)src.size(); + // make temp variables + std::vector src_coarse(Nsrc,evec_coarse[0].Grid()); + std::vector guess_coarse(Nsrc,evec_coarse[0].Grid()); + //Preporcessing + std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl; + for (int j=0;j Date: Mon, 16 May 2022 00:28:28 +0100 Subject: [PATCH 36/93] bugfix: eo operator called in correct location --- benchmarks/Benchmark_wilson_sweep.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index 986a1831..d88e2d39 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -110,8 +110,8 @@ int main (int argc, char ** argv) bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << "\t"; // EO - bench_wilson(src,result,Dw,volume,DaggerNo); - bench_wilson(src,result,Dw,volume,DaggerYes); + bench_wilson_eo(src,result,Dw,volume,DaggerNo); + bench_wilson_eo(src,result,Dw,volume,DaggerYes); std::cout << std::endl; } } From 4b1997e2f3a23abc0ee61a7b19b612b50548c5a5 Mon Sep 17 00:00:00 2001 From: James Richings Date: Mon, 16 May 2022 15:58:33 +0100 Subject: [PATCH 37/93] wilson sweep test --- benchmarks/Benchmark_wilson_sweep.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index d88e2d39..50ad1d03 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -44,6 +44,13 @@ void bench_wilson ( double const volume, int const dag ); +void bench_wilson_eo ( + LatticeFermion & src, + LatticeFermion & result, + WilsonFermionR & Dw, + double const volume, + int const dag ); + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -110,8 +117,8 @@ int main (int argc, char ** argv) bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << "\t"; // EO - bench_wilson_eo(src,result,Dw,volume,DaggerNo); - bench_wilson_eo(src,result,Dw,volume,DaggerYes); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerNo); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerYes); std::cout << std::endl; } } From 79e34b3eb44b0c62a316736ffa5b0a7c0e2249f7 Mon Sep 17 00:00:00 2001 From: JPRichings Date: Thu, 19 May 2022 14:53:17 +0100 Subject: [PATCH 38/93] Local Coherence batch deflation --- Grid/algorithms/iterative/Deflation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h index a69d1549..1a8f97c9 100644 --- a/Grid/algorithms/iterative/Deflation.h +++ b/Grid/algorithms/iterative/Deflation.h @@ -128,7 +128,7 @@ public: std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl; blockProject(src_coarse[j],src[j],subspace); } - //deflation set up for eigen vector batchsize 1 and source batch size equal number of batches + //deflation set up for eigen vector batchsize 1 and source batch size equal number of sources std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl; for (int i=0;i Date: Tue, 24 May 2022 11:37:33 +0100 Subject: [PATCH 39/93] fix: fixed warning: missing return statement at end of non-void function in CloverHelpers --- Grid/qcd/action/fermion/CloverHelpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h index c72a79e8..57e71998 100644 --- a/Grid/qcd/action/fermion/CloverHelpers.h +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -181,6 +181,7 @@ public: static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); + return lambda; } }; @@ -425,6 +426,7 @@ public: static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { assert(0); + return lambda; } }; From c7205d2a73ea9033c3f4cc5b6f01ec3431fba348 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 24 May 2022 14:30:26 +0100 Subject: [PATCH 40/93] Removed nvcc guards for json --- Grid/json/json.hpp | 4 +--- Grid/serialisation/JSON_IO.cc | 2 +- Grid/serialisation/Serialisation.h | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Grid/json/json.hpp b/Grid/json/json.hpp index 618aa7a1..5b5d2215 100644 --- a/Grid/json/json.hpp +++ b/Grid/json/json.hpp @@ -1,4 +1,3 @@ -#ifndef __NVCC__ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ @@ -18918,5 +18917,4 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #undef NLOHMANN_BASIC_JSON_TPL -#endif -#endif +#endif \ No newline at end of file diff --git a/Grid/serialisation/JSON_IO.cc b/Grid/serialisation/JSON_IO.cc index f2282099..e47a2969 100644 --- a/Grid/serialisation/JSON_IO.cc +++ b/Grid/serialisation/JSON_IO.cc @@ -26,7 +26,7 @@ *************************************************************************************/ /* END LEGAL */ #include -#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) +#ifndef GRID_HIP NAMESPACE_BEGIN(Grid); diff --git a/Grid/serialisation/Serialisation.h b/Grid/serialisation/Serialisation.h index e14120af..4f395be2 100644 --- a/Grid/serialisation/Serialisation.h +++ b/Grid/serialisation/Serialisation.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" -#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) +#ifndef GRID_HIP #include "JSON_IO.h" #endif From 3ca0de1c409eb6c96ef95293d926915b8173346b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 24 May 2022 14:37:33 +0100 Subject: [PATCH 41/93] Fix json write for vector --- Grid/serialisation/JSON_IO.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/serialisation/JSON_IO.cc b/Grid/serialisation/JSON_IO.cc index e47a2969..c92203ba 100644 --- a/Grid/serialisation/JSON_IO.cc +++ b/Grid/serialisation/JSON_IO.cc @@ -82,7 +82,7 @@ void JSONWriter::writeDefault(const std::string &s, const std::string &x) if (s.size()) ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; else - ss_ << os.str() << " ," ; + ss_ << "\""<< os.str() << "\" ," ; } // Reader implementation /////////////////////////////////////////////////////// From bab8aa8eb049749402f1fbb0b33f1e086ce44403 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:27:40 +0100 Subject: [PATCH 42/93] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in DisableWarnings.h --- Grid/DisableWarnings.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Grid/DisableWarnings.h b/Grid/DisableWarnings.h index 4bd1edd0..64e4faf4 100644 --- a/Grid/DisableWarnings.h +++ b/Grid/DisableWarnings.h @@ -44,14 +44,22 @@ directory #ifdef __NVCC__ //disables nvcc specific warning in json.hpp #pragma clang diagnostic ignored "-Wdeprecated-register" + +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) + //disables nvcc specific warning in json.hpp +#pragma nv_diag_suppress unsigned_compare_with_zero +#pragma nv_diag_suppress cast_to_qualified_type + //disables nvcc specific warning in many files +#pragma nv_diag_suppress esa_on_defaulted_function_ignored +#pragma nv_diag_suppress extra_semicolon +#else + //disables nvcc specific warning in json.hpp #pragma diag_suppress unsigned_compare_with_zero #pragma diag_suppress cast_to_qualified_type - //disables nvcc specific warning in many files #pragma diag_suppress esa_on_defaulted_function_ignored #pragma diag_suppress extra_semicolon - -//Eigen only +#endif #endif // Disable vectorisation in Eigen on the Power8/9 and PowerPC From e909aeedf082f0aaba98b31cccd8119b6c38d86f Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:29:42 +0100 Subject: [PATCH 43/93] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in Grid_Eigen_Dense.h --- Grid/Grid_Eigen_Dense.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index c62d9cdb..5aee81de 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -14,7 +14,11 @@ /* NVCC save and restore compile environment*/ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress code_is_unreachable +#else #pragma diag_suppress code_is_unreachable +#endif #pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") From 7937ac2bab3eaef32222e6b28c6d4dda9f5af6d9 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 24 May 2022 15:31:03 +0100 Subject: [PATCH 44/93] fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in pugixml/pugixml.cc --- Grid/pugixml/pugixml.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index 45e6496a..81973964 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -16,8 +16,12 @@ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" +#else #pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" #endif +#endif #include "pugixml.h" From e346154c5dfe8ba6ada0b11bcfc8cfba0cc57f6b Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 24 May 2022 15:47:01 +0100 Subject: [PATCH 45/93] Updated json CUDA compile guards --- Grid/serialisation/JSON_IO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Grid/serialisation/JSON_IO.h b/Grid/serialisation/JSON_IO.h index b5255de3..090a36e1 100644 --- a/Grid/serialisation/JSON_IO.h +++ b/Grid/serialisation/JSON_IO.h @@ -54,7 +54,7 @@ namespace Grid void pop(void); template void writeDefault(const std::string &s, const U &x); -#ifdef __NVCC__ +#if defined(GRID_CUDA) || defined(GRID_HIP) void writeDefault(const std::string &s, const Grid::ComplexD &x) { std::complex z(real(x),imag(x)); @@ -101,7 +101,7 @@ namespace Grid void readDefault(const std::string &s, std::vector &output); template void readDefault(const std::string &s, std::pair &output); -#ifdef __NVCC__ +#if defined(GRID_CUDA) || defined(GRID_HIP) void readDefault(const std::string &s, ComplexD &output) { std::complex z; From da4daea57ab0afd2865f9282f13870e661e97be9 Mon Sep 17 00:00:00 2001 From: Raoul Hodgson Date: Tue, 24 May 2022 16:16:06 +0100 Subject: [PATCH 46/93] Updated json to latest release 3.10.5 --- Grid/json/json.hpp | 24163 ++++++++++++++++++++++++------------------- 1 file changed, 13667 insertions(+), 10496 deletions(-) diff --git a/Grid/json/json.hpp b/Grid/json/json.hpp index 5b5d2215..cb27e058 100644 --- a/Grid/json/json.hpp +++ b/Grid/json/json.hpp @@ -1,12 +1,12 @@ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 3.2.0 +| | |__ | | | | | | version 3.10.5 |_____|_____|_____|_|___| https://github.com/nlohmann/json Licensed under the MIT License . SPDX-License-Identifier: MIT -Copyright (c) 2013-2018 Niels Lohmann . +Copyright (c) 2013-2022 Niels Lohmann . Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,588 +27,56 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NLOHMANN_JSON_HPP -#define NLOHMANN_JSON_HPP +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file doc/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ #define NLOHMANN_JSON_VERSION_MAJOR 3 -#define NLOHMANN_JSON_VERSION_MINOR 2 -#define NLOHMANN_JSON_VERSION_PATCH 0 +#define NLOHMANN_JSON_VERSION_MINOR 10 +#define NLOHMANN_JSON_VERSION_PATCH 5 #include // all_of, find, for_each -#include // assert -#include // and, not, or #include // nullptr_t, ptrdiff_t, size_t #include // hash, less #include // initializer_list -#include // istream, ostream -#include // iterator_traits, random_access_iterator_tag +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr #include // accumulate #include // string, stoi, to_string #include // declval, forward, move, pair, swap - -// #include -#ifndef NLOHMANN_JSON_FWD_HPP -#define NLOHMANN_JSON_FWD_HPP - -#include // int64_t, uint64_t -#include // map -#include // allocator -#include // string #include // vector -/*! -@brief namespace for Niels Lohmann -@see https://github.com/nlohmann -@since version 1.0.0 -*/ -namespace nlohmann -{ -/*! -@brief default JSONSerializer template argument - -This serializer ignores the template arguments and uses ADL -([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) -for serialization. -*/ -template -struct adl_serializer; - -template class ObjectType = - std::map, - template class ArrayType = std::vector, - class StringType = std::string, class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = - adl_serializer> -class basic_json; - -/*! -@brief JSON Pointer - -A JSON pointer defines a string syntax for identifying a specific value -within a JSON document. It can be used with functions `at` and -`operator[]`. Furthermore, JSON pointers are the base for JSON patches. - -@sa [RFC 6901](https://tools.ietf.org/html/rfc6901) - -@since version 2.0.0 -*/ -template -class json_pointer; - -/*! -@brief default JSON class - -This type is the default specialization of the @ref basic_json class which -uses the standard template types. - -@since version 1.0.0 -*/ -using json = basic_json<>; -} - -#endif - -// #include - - -// This file contains all internal macro definitions -// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them - -// exclude unsupported compilers -#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) - #if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #endif -#endif - -// disable float-equal warnings on GCC/clang -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - -// disable documentation warnings on clang -#if defined(__clang__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wdocumentation" -#endif - -// allow for portable deprecation warnings -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #define JSON_DEPRECATED __attribute__((deprecated)) -#elif defined(_MSC_VER) - #define JSON_DEPRECATED __declspec(deprecated) -#else - #define JSON_DEPRECATED -#endif - -// allow to disable exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) - #define JSON_THROW(exception) throw exception - #define JSON_TRY try - #define JSON_CATCH(exception) catch(exception) - #define JSON_INTERNAL_CATCH(exception) catch(exception) -#else - #define JSON_THROW(exception) std::abort() - #define JSON_TRY if(true) - #define JSON_CATCH(exception) if(false) - #define JSON_INTERNAL_CATCH(exception) if(false) -#endif - -// override exception macros -#if defined(JSON_THROW_USER) - #undef JSON_THROW - #define JSON_THROW JSON_THROW_USER -#endif -#if defined(JSON_TRY_USER) - #undef JSON_TRY - #define JSON_TRY JSON_TRY_USER -#endif -#if defined(JSON_CATCH_USER) - #undef JSON_CATCH - #define JSON_CATCH JSON_CATCH_USER - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_CATCH_USER -#endif -#if defined(JSON_INTERNAL_CATCH_USER) - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER -#endif - -// manual branch prediction -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #define JSON_LIKELY(x) __builtin_expect(!!(x), 1) - #define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) -#else - #define JSON_LIKELY(x) x - #define JSON_UNLIKELY(x) x -#endif - -// C++ language standard detection -#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 -#elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) - #define JSON_HAS_CPP_14 -#endif - -// Ugly macros to avoid uglier copy-paste when specializing basic_json. They -// may be removed in the future once the class is split. - -#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ - template class ObjectType, \ - template class ArrayType, \ - class StringType, class BooleanType, class NumberIntegerType, \ - class NumberUnsignedType, class NumberFloatType, \ - template class AllocatorType, \ - template class JSONSerializer> - -#define NLOHMANN_BASIC_JSON_TPL \ - basic_json - -// #include - - -#include // not -#include // size_t -#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type - -namespace nlohmann -{ -namespace detail -{ -// alias templates to reduce boilerplate -template -using enable_if_t = typename std::enable_if::type; - -template -using uncvref_t = typename std::remove_cv::type>::type; - -// implementation of C++14 index_sequence and affiliates -// source: https://stackoverflow.com/a/32223343 -template -struct index_sequence -{ - using type = index_sequence; - using value_type = std::size_t; - static constexpr std::size_t size() noexcept - { - return sizeof...(Ints); - } -}; - -template -struct merge_and_renumber; - -template -struct merge_and_renumber, index_sequence> - : index_sequence < I1..., (sizeof...(I1) + I2)... > {}; - -template -struct make_index_sequence - : merge_and_renumber < typename make_index_sequence < N / 2 >::type, - typename make_index_sequence < N - N / 2 >::type > {}; - -template<> struct make_index_sequence<0> : index_sequence<> {}; -template<> struct make_index_sequence<1> : index_sequence<0> {}; - -template -using index_sequence_for = make_index_sequence; - -// dispatch utility (taken from ranges-v3) -template struct priority_tag : priority_tag < N - 1 > {}; -template<> struct priority_tag<0> {}; - -// taken from ranges-v3 -template -struct static_const -{ - static constexpr T value{}; -}; - -template -constexpr T static_const::value; -} -} - -// #include - - -#include // not -#include // numeric_limits -#include // false_type, is_constructible, is_integral, is_same, true_type -#include // declval - -// #include - -// #include - -// #include +// #include #include - -// #include - - -namespace nlohmann -{ -namespace detail -{ -template struct make_void -{ - using type = void; -}; -template using void_t = typename make_void::type; -} -} - - -// http://en.cppreference.com/w/cpp/experimental/is_detected -namespace nlohmann -{ -namespace detail -{ -struct nonesuch -{ - nonesuch() = delete; - ~nonesuch() = delete; - nonesuch(nonesuch const&) = delete; - void operator=(nonesuch const&) = delete; -}; - -template class Op, - class... Args> -struct detector -{ - using value_t = std::false_type; - using type = Default; -}; - -template class Op, class... Args> -struct detector>, Op, Args...> -{ - using value_t = std::true_type; - using type = Op; -}; - -template