diff --git a/Grid/DisableWarnings.h b/Grid/DisableWarnings.h index 4bd1edd0..64e4faf4 100644 --- a/Grid/DisableWarnings.h +++ b/Grid/DisableWarnings.h @@ -44,14 +44,22 @@ directory #ifdef __NVCC__ //disables nvcc specific warning in json.hpp #pragma clang diagnostic ignored "-Wdeprecated-register" + +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) + //disables nvcc specific warning in json.hpp +#pragma nv_diag_suppress unsigned_compare_with_zero +#pragma nv_diag_suppress cast_to_qualified_type + //disables nvcc specific warning in many files +#pragma nv_diag_suppress esa_on_defaulted_function_ignored +#pragma nv_diag_suppress extra_semicolon +#else + //disables nvcc specific warning in json.hpp #pragma diag_suppress unsigned_compare_with_zero #pragma diag_suppress cast_to_qualified_type - //disables nvcc specific warning in many files #pragma diag_suppress esa_on_defaulted_function_ignored #pragma diag_suppress extra_semicolon - -//Eigen only +#endif #endif // Disable vectorisation in Eigen on the Power8/9 and PowerPC diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index c62d9cdb..5aee81de 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -14,7 +14,11 @@ /* NVCC save and restore compile environment*/ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress code_is_unreachable +#else #pragma diag_suppress code_is_unreachable +#endif #pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index 14f3d306..b65eea46 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -120,6 +120,9 @@ public: SolverTimer.Start(); int k; for (k = 1; k <= MaxIterations; k++) { + + GridStopWatch IterationTimer; + IterationTimer.Start(); c = cp; MatrixTimer.Start(); @@ -152,8 +155,14 @@ public: LinearCombTimer.Stop(); LinalgTimer.Stop(); - std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + IterationTimer.Stop(); + if ( (k % 500) == 0 ) { + std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; + } else { + std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k + << " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl; + } // Stopping condition if (cp <= rsq) { @@ -170,13 +179,13 @@ public: << "\tTrue residual " << true_residual << "\tTarget " << Tolerance << std::endl; - std::cout << GridLogIterative << "Time breakdown "<::operator(); - RealD Tolerance; + // RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion std::vector IterationsToCompleteShift; // Iterations for this shift @@ -324,8 +324,8 @@ public: std::cout << GridLogMessage << "Time Breakdown "< &src,std::vector &guess) { + int Nevec = (int)evec_coarse.size(); + int Nsrc = (int)src.size(); + // make temp variables + std::vector src_coarse(Nsrc,evec_coarse[0].Grid()); + std::vector guess_coarse(Nsrc,evec_coarse[0].Grid()); + //Preporcessing + std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl; + for (int j=0;jShmBufferTranslate(dest,recv); assert(shm!=NULL); - // std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl; acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); } } if ( CommunicatorPolicy == CommunicatorPolicySequential ) { this->StencilSendToRecvFromComplete(list,dir); + list.resize(0); } return off_node_bytes; diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 5caab214..f3b485a4 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -125,6 +125,12 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// +template +typename vobj::scalar_object peekSite(const Lattice &l,const Coordinate &site){ + typename vobj::scalar_object s; + peekSite(s,l,site); + return s; +} template void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 0ddac437..16feb856 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -232,6 +232,7 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & const uint64_t sites = grid->oSites(); // Might make all code paths go this way. +#if 0 typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; @@ -241,15 +242,31 @@ inline ComplexD rankInnerProduct(const Lattice &left,const Lattice & autoView( right_v,right, AcceleratorRead); // GPU - SIMT lane compliance... - accelerator_for( ss, sites, 1,{ - auto x_l = left_v[ss]; - auto y_l = right_v[ss]; - inner_tmp_v[ss]=innerProductD(x_l,y_l); + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l)); }); } +#else + typedef decltype(innerProduct(vobj(),vobj())) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + { + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); + // GPU - SIMT lane compliance... + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + }); + } +#endif // This is in single precision and fails some tests - auto anrm = sum(inner_tmp_v,sites); + auto anrm = sumD(inner_tmp_v,sites); nrm = anrm; return nrm; } @@ -283,7 +300,7 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt conformable(x,y); typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_typeD vector_type; + // typedef typename vobj::vector_typeD vector_type; RealD nrm; GridBase *grid = x.Grid(); @@ -295,17 +312,29 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt autoView( x_v, x, AcceleratorRead); autoView( y_v, y, AcceleratorRead); autoView( z_v, z, AcceleratorWrite); - +#if 0 typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - accelerator_for( ss, sites, 1,{ - auto tmp = a*x_v[ss]+b*y_v[ss]; - inner_tmp_v[ss]=innerProductD(tmp,tmp); - z_v[ss]=tmp; + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProductD(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); }); nrm = real(TensorRemove(sum(inner_tmp_v,sites))); +#else + typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); + }); + nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); +#endif grid->GlobalSum(nrm); return nrm; } diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index e5e63716..6857dc84 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -424,9 +424,32 @@ public: // MT implementation does not implement fast discard even though // in principle this is possible //////////////////////////////////////////////// +#if 1 + thread_for( lidx, _grid->lSites(), { + int gidx; + int o_idx; + int i_idx; + int rank; + Coordinate pcoor; + Coordinate lcoor; + Coordinate gcoor; + _grid->LocalIndexToLocalCoor(lidx,lcoor); + pcoor=_grid->ThisProcessorCoor(); + _grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor); + _grid->GlobalCoorToGlobalIndex(gcoor,gidx); + + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + assert(rank == _grid->ThisRank() ); + + int l_idx=generator_idx(o_idx,i_idx); + _generators[l_idx] = master_engine; + Skip(_generators[l_idx],gidx); // Skip to next RNG sequence + }); +#else // Everybody loops over global volume. thread_for( gidx, _grid->_gsites, { + // Where is it? int rank; int o_idx; @@ -443,6 +466,7 @@ public: Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } }); +#endif #else //////////////////////////////////////////////////////////////// // Machine and thread decomposition dependent seeding is efficient diff --git a/Grid/log/Log.cc b/Grid/log/Log.cc index 31f9a3f3..63bc454f 100644 --- a/Grid/log/Log.cc +++ b/Grid/log/Log.cc @@ -65,6 +65,7 @@ GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL"); GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); +GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL"); GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE"); @@ -72,9 +73,10 @@ GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); void GridLogConfigure(std::vector &logstreams) { - GridLogError.Active(0); + GridLogError.Active(1); GridLogWarning.Active(0); GridLogMessage.Active(1); // at least the messages should be always on + GridLogMemory.Active(0); // at least the messages should be always on GridLogIterative.Active(0); GridLogDebug.Active(0); GridLogPerformance.Active(0); @@ -83,7 +85,7 @@ void GridLogConfigure(std::vector &logstreams) { GridLogHMC.Active(1); for (int i = 0; i < logstreams.size(); i++) { - if (logstreams[i] == std::string("Error")) GridLogError.Active(1); + if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1); if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); diff --git a/Grid/log/Log.h b/Grid/log/Log.h index b1696fee..4d512ff6 100644 --- a/Grid/log/Log.h +++ b/Grid/log/Log.h @@ -183,6 +183,7 @@ extern GridLogger GridLogPerformance; extern GridLogger GridLogIterative ; extern GridLogger GridLogIntegrator ; extern GridLogger GridLogHMC; +extern GridLogger GridLogMemory; extern Colours GridLogColours; std::string demangle(const char* name) ; diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h index 576f08bd..80d135d2 100644 --- a/Grid/parallelIO/IldgIO.h +++ b/Grid/parallelIO/IldgIO.h @@ -31,6 +31,7 @@ directory #include #include #include +#include #include #include @@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter { // Fill ILDG header data struct ////////////////////////////////////////////////////// ildgFormat ildgfmt ; - ildgfmt.field = std::string("su3gauge"); + const std::string stNC = std::to_string( Nc ) ; + ildgfmt.field = std::string("su"+stNC+"gauge"); if ( format == std::string("IEEE32BIG") ) { ildgfmt.precision = 32; @@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader { } else { assert(found_ildgFormat); - assert ( ildgFormat_.field == std::string("su3gauge") ); + const std::string stNC = std::to_string( Nc ) ; + assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") ); /////////////////////////////////////////////////////////////////////////////////////// // Populate our Grid metadata as best we can @@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader { std::ostringstream vers; vers << ildgFormat_.version; FieldMetaData_.hdr_version = vers.str(); - FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); + FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC); FieldMetaData_.nd=4; FieldMetaData_.dimension.resize(4); diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index af8b3f76..6b9d8708 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -6,8 +6,8 @@ Copyright (C) 2015 - Author: Peter Boyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -182,8 +182,8 @@ class GaugeStatistics public: void operator()(Lattice & data,FieldMetaData &header) { - header.link_trace=WilsonLoops::linkTrace(data); - header.plaquette =WilsonLoops::avgPlaquette(data); + header.link_trace = WilsonLoops::linkTrace(data); + header.plaquette = WilsonLoops::avgPlaquette(data); } }; typedef GaugeStatistics PeriodicGaugeStatistics; @@ -203,20 +203,24 @@ template<> inline void PrepareMetaData(Lattice 1 ) ; for(int mu=0;mu using iLorentzColour2x3 = iVector, 2>, Nd >; +template using iLorentzColour2x3 = iVector, Nc-1>, Nd >; typedef iLorentzColour2x3 LorentzColour2x3; typedef iLorentzColour2x3 LorentzColour2x3F; @@ -278,7 +282,6 @@ struct GaugeSimpleMunger{ template struct GaugeSimpleUnmunger { - void operator()(sobj &in, fobj &out) { for (int mu = 0; mu < Nd; mu++) { for (int i = 0; i < Nc; i++) { @@ -317,8 +320,8 @@ template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ for(int mu=0;mu struct Gauge3x2unmunger{ void operator() (sobj &in,fobj &out){ for(int mu=0;mu Author: Peter Boyle Author: paboyle + Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +31,8 @@ #ifndef GRID_NERSC_IO_H #define GRID_NERSC_IO_H +#include + NAMESPACE_BEGIN(Grid); using namespace Grid; @@ -147,15 +150,17 @@ public: std::string format(header.floating_point); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); + const int ieee32big = (format == std::string("IEEE32BIG")); + const int ieee32 = (format == std::string("IEEE32")); + const int ieee64big = (format == std::string("IEEE64BIG")); + const int ieee64 = (format == std::string("IEEE64") || \ + format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; // munger is a function of - if ( header.data_type == std::string("4D_SU3_GAUGE") ) { + const std::string stNC = std::to_string( Nc ) ; + if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,Gauge3x2munger(), offset,format, @@ -166,7 +171,7 @@ public: (Umu,file,Gauge3x2munger(),offset,format, nersc_csum,scidac_csuma,scidac_csumb); } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + } else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) { if ( ieee32 || ieee32big ) { BinaryIO::readLatticeObject (Umu,file,GaugeSimpleMunger(),offset,format, @@ -211,27 +216,29 @@ public: template static inline void writeConfiguration(Lattice &Umu, std::string file, - std::string ens_label = std::string("DWF")) + std::string ens_label = std::string("DWF"), + std::string ens_id = std::string("UKQCD"), + unsigned int sequence_number = 1) { - writeConfiguration(Umu,file,0,1,ens_label); + writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number); } template static inline void writeConfiguration(Lattice &Umu, std::string file, int two_row, int bits32, - std::string ens_label = std::string("DWF")) + std::string ens_label = std::string("DWF"), + std::string ens_id = std::string("UKQCD"), + unsigned int sequence_number = 1) { typedef vLorentzColourMatrixD vobj; typedef typename vobj::scalar_object sobj; FieldMetaData header; - /////////////////////////////////////////// - // Following should become arguments - /////////////////////////////////////////// - header.sequence_number = 1; - header.ensemble_id = std::string("UKQCD"); + header.sequence_number = sequence_number; + header.ensemble_id = ens_id; header.ensemble_label = ens_label; + header.hdr_version = "1.0" ; typedef LorentzColourMatrixD fobj3D; typedef LorentzColour2x3D fobj2D; @@ -245,10 +252,14 @@ public: uint64_t offset; - // Sod it -- always write 3x3 double - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - GaugeSimpleUnmunger munge; + // Sod it -- always write NcxNc double + header.floating_point = std::string("IEEE64BIG"); + const std::string stNC = std::to_string( Nc ) ; + if( two_row ) { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE" ); + } else { + header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC ); + } if ( grid->IsBoss() ) { truncate(file); offset = writeHeader(header,file); @@ -256,8 +267,15 @@ public: grid->Broadcast(0,(void *)&offset,sizeof(offset)); uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, - nersc_csum,scidac_csuma,scidac_csumb); + if( two_row ) { + Gauge3x2unmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } else { + GaugeSimpleUnmunger munge; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + } header.checksum = nersc_csum; if ( grid->IsBoss() ) { writeHeader(header,file); @@ -289,8 +307,7 @@ public: header.plaquette=0.0; MachineCharacteristics(header); - uint64_t offset; - + uint64_t offset; #ifdef RNG_RANLUX header.floating_point = std::string("UINT64"); header.data_type = std::string("RANLUX48"); @@ -330,7 +347,7 @@ public: GridBase *grid = parallel.Grid(); - uint64_t offset = readHeader(file,grid,header); + uint64_t offset = readHeader(file,grid,header); FieldMetaData clone(header); diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index dd25b41e..540b75c5 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -72,17 +72,9 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, inline uint64_t cyclecount(void){ return 0; } -#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx") -#define __SSC_STOP __SSC_MARK(0x110) -#define __SSC_START __SSC_MARK(0x111) - #else -#define __SSC_MARK(mark) -#define __SSC_STOP -#define __SSC_START - /* * cycle counters arch dependent */ diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h index 2a44faee..02c23a62 100644 --- a/Grid/perfmon/Timer.h +++ b/Grid/perfmon/Timer.h @@ -39,9 +39,9 @@ NAMESPACE_BEGIN(Grid) // C++11 time facilities better? inline double usecond(void) { struct timeval tv; -#ifdef TIMERS_ON + tv.tv_sec = 0; + tv.tv_usec = 0; gettimeofday(&tv,NULL); -#endif return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec; } diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index 45e6496a..81973964 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -16,8 +16,12 @@ #ifdef __NVCC__ #pragma push +#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5) +#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" +#else #pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" #endif +#endif #include "pugixml.h" diff --git a/Grid/qcd/action/ActionParams.h b/Grid/qcd/action/ActionParams.h index 13f2e594..b203f27a 100644 --- a/Grid/qcd/action/ActionParams.h +++ b/Grid/qcd/action/ActionParams.h @@ -38,24 +38,32 @@ NAMESPACE_BEGIN(Grid); struct GparityWilsonImplParams { Coordinate twists; //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs - GparityWilsonImplParams() : twists(Nd, 0) {}; + Coordinate dirichlet; // Blocksize of dirichlet BCs + GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {}; }; struct WilsonImplParams { bool overlapCommsCompute; + Coordinate dirichlet; // Blocksize of dirichlet BCs AcceleratorVector twist_n_2pi_L; AcceleratorVector boundary_phases; WilsonImplParams() { + dirichlet.resize(Nd,0); boundary_phases.resize(Nd, 1.0); twist_n_2pi_L.resize(Nd, 0.0); }; WilsonImplParams(const AcceleratorVector phi) : boundary_phases(phi), overlapCommsCompute(false) { twist_n_2pi_L.resize(Nd, 0.0); + dirichlet.resize(Nd,0); } }; struct StaggeredImplParams { - StaggeredImplParams() {}; + Coordinate dirichlet; // Blocksize of dirichlet BCs + StaggeredImplParams() + { + dirichlet.resize(Nd,0); + }; }; struct OneFlavourRationalParams : Serializable { diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 489b51ff..1ce4012e 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -68,9 +68,17 @@ public: /////////////////////////////////////////////////////////////// // Support for MADWF tricks /////////////////////////////////////////////////////////////// - virtual RealD Mass(void) { return mass; }; + RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; }; + RealD MassPlus(void) { return mass_plus; }; + RealD MassMinus(void) { return mass_minus; }; + void SetMass(RealD _mass) { - mass=_mass; + mass_plus=mass_minus=_mass; + SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs + } ; + void SetMass(RealD _mass_plus, RealD _mass_minus) { + mass_plus=_mass_plus; + mass_minus=_mass_minus; SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs } ; void P(const FermionField &psi, FermionField &chi); @@ -108,7 +116,7 @@ public: void MeooeDag5D (const FermionField &in, FermionField &out); // protected: - RealD mass; + RealD mass_plus, mass_minus; // Save arguments to SetCoefficientsInternal Vector _gamma; diff --git a/Grid/qcd/action/fermion/CloverHelpers.h b/Grid/qcd/action/fermion/CloverHelpers.h new file mode 100644 index 00000000..57e71998 --- /dev/null +++ b/Grid/qcd/action/fermion/CloverHelpers.h @@ -0,0 +1,435 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h + + Copyright (C) 2017 - 2022 + + Author: paboyle + Author: Daniel Richtmann + Author: Mattia Bruno + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#pragma once + +#include +#include +#include + +//////////////////////////////////////////// +// Standard Clover +// (4+m0) + csw * clover_term +// Exp Clover +// (4+m0) * exp(csw/(4+m0) clover_term) +// = (4+m0) + csw * clover_term + ... +//////////////////////////////////////////// + +NAMESPACE_BEGIN(Grid); + + +////////////////////////////////// +// Generic Standard Clover +////////////////////////////////// + +template +class CloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + + static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) { + GridBase *grid = CloverTerm.Grid(); + CloverTerm += diag_mass; + + int lvol = grid->lSites(); + int DimRep = Impl::Dimension; + { + autoView(CTv,CloverTerm,CpuRead); + autoView(CTIv,CloverTermInv,CpuWrite); + thread_for(site, lvol, { + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); + peekLocalSite(Qx, CTv, lcoor); + + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } + + EigenInvCloverOp = EigenCloverOp.inverse(); + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + pokeLocalSite(Qxinv, CTIv, lcoor); + }); + } + } + + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); + } + +}; + + +////////////////////////////////// +// Generic Exp Clover +////////////////////////////////// + +template +class ExpCloverHelpers: public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef WilsonCloverHelpers Helpers; + + // Can this be avoided? + static void IdentityTimesC(const CloverField& in, RealD c) { + int DimRep = Impl::Dimension; + + autoView(in_v, in, AcceleratorWrite); + + accelerator_for(ss, in.Grid()->oSites(), 1, { + for (int sa=0; saprec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) { + GridBase* grid = Clover.Grid(); + CloverField ExpClover(grid); + + int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass); + + Clover *= (1.0/diag_mass); + + // Taylor expansion, slow but generic + // Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...)) + // qN = cN + // qn = cn + qn+1 X + std::vector cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++) + cn[i] = cn[i-1] / RealD(i); + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * Clover + cn[i]; + + // prepare inverse + CloverInv = (-1.0)*Clover; + + Clover = ExpClover * diag_mass; + + ExpClover = Zero(); + IdentityTimesC(ExpClover, cn[NMAX]); + for (int i=NMAX-1; i>=0; i--) + ExpClover = ExpClover * CloverInv + cn[i]; + + CloverInv = ExpClover * (1.0/diag_mass); + + } + + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + assert(0); + return lambda; + } + +}; + + +////////////////////////////////// +// Compact Standard Clover +////////////////////////////////// + + +template +class CompactCloverHelpers: public CompactWilsonCloverHelpers, + public WilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + typedef WilsonCloverHelpers Helpers; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + Clover += diag_mass; + } + + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, + CloverTriangleField& Triangle, + RealD csw_t, RealD diag_mass) { + + // Do nothing + } + + // TODO: implement Cmunu for better performances with compact layout, but don't do it + // here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + return Helpers::Cmunu(U, lambda, mu, nu); + } +}; + +////////////////////////////////// +// Compact Exp Clover +////////////////////////////////// + +template +class CompactExpCloverHelpers: public CompactWilsonCloverHelpers { +public: + + INHERIT_IMPL_TYPES(Impl); + INHERIT_CLOVER_TYPES(Impl); + INHERIT_COMPACT_CLOVER_TYPES(Impl); + + template using iImplClover = iScalar, Ns>>; + typedef CompactWilsonCloverHelpers CompactHelpers; + + static void MassTerm(CloverField& Clover, RealD diag_mass) { + // do nothing! + // mass term is multiplied to exp(Clover) below + } + + static int getNMAX(RealD prec, RealD R) { + /* compute stop condition for exponential */ + int NMAX=1; + RealD cond=R*R/2.; + + while (cond*std::exp(R)>prec) { + NMAX++; + cond*=R/(double)(NMAX+1); + } + return NMAX; + } + + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-12,R);} + static int getNMAX(Lattice> &t, RealD R) {return getNMAX(1e-6,R);} + + static void ExponentiateHermitean6by6(const iMatrix &arg, const RealD& alpha, const std::vector& cN, const int Niter, iMatrix& dest){ + + typedef iMatrix mat; + + RealD qn[6]; + RealD qnold[6]; + RealD p[5]; + RealD trA2, trA3, trA4; + + mat A2, A3, A4, A5; + A2 = alpha * alpha * arg * arg; + A3 = alpha * arg * A2; + A4 = A2 * A2; + A5 = A2 * A3; + + trA2 = toReal( trace(A2) ); + trA3 = toReal( trace(A3) ); + trA4 = toReal( trace(A4)); + + p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0; + p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0; + p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2; + p[3] = trA3 / 3.0; + p[4] = 0.5 * trA2; + + qnold[0] = cN[Niter]; + qnold[1] = 0.0; + qnold[2] = 0.0; + qnold[3] = 0.0; + qnold[4] = 0.0; + qnold[5] = 0.0; + + for(int i = Niter-1; i >= 0; i--) + { + qn[0] = p[0] * qnold[5] + cN[i]; + qn[1] = p[1] * qnold[5] + qnold[0]; + qn[2] = p[2] * qnold[5] + qnold[1]; + qn[3] = p[3] * qnold[5] + qnold[2]; + qn[4] = p[4] * qnold[5] + qnold[3]; + qn[5] = qnold[4]; + + qnold[0] = qn[0]; + qnold[1] = qn[1]; + qnold[2] = qn[2]; + qnold[3] = qn[3]; + qnold[4] = qn[4]; + qnold[5] = qn[5]; + } + + mat unit(1.0); + + dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5); + + } + + static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) { + + GridBase* grid = Diagonal.Grid(); + int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass); + + // + // Implementation completely in Daniel's layout + // + + // Taylor expansion with Cayley-Hamilton recursion + // underlying Horner scheme as above + std::vector cn(NMAX+1); + cn[0] = 1.0; + for (int i=1; i<=NMAX; i++){ + cn[i] = cn[i-1] / RealD(i); + } + + // Taken over from Daniel's implementation + conformable(Diagonal, Triangle); + + long lsites = grid->lSites(); + { + typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal; + typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle; + typedef iMatrix mat; + + autoView(diagonal_v, Diagonal, CpuRead); + autoView(triangle_v, Triangle, CpuRead); + autoView(diagonalExp_v, Diagonal, CpuWrite); + autoView(triangleExp_v, Triangle, CpuWrite); + + thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite + + mat srcCloverOpUL(0.0); // upper left block + mat srcCloverOpLR(0.0); // lower right block + mat ExpCloverOp; + + scalar_object_diagonal diagonal_tmp = Zero(); + scalar_object_diagonal diagonal_exp_tmp = Zero(); + scalar_object_triangle triangle_tmp = Zero(); + scalar_object_triangle triangle_exp_tmp = Zero(); + + Coordinate lcoor; + grid->LocalIndexToLocalCoor(site, lcoor); + + peekLocalSite(diagonal_tmp, diagonal_v, lcoor); + peekLocalSite(triangle_tmp, triangle_v, lcoor); + + int block; + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpUL(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpUL(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + srcCloverOpLR(i,j) = static_cast(TensorRemove(diagonal_tmp()(block)(i))); + } + else{ + srcCloverOpLR(i,j) = static_cast(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j))); + } + } + } + + // exp(Clover) + + ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 0; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp); + + block = 1; + for(int i = 0; i < 6; i++){ + for(int j = 0; j < 6; j++){ + if (i == j){ + diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j); + } + else if(i < j){ + triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j); + } + } + } + + pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor); + pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor); + }); + } + + Diagonal *= diag_mass; + Triangle *= diag_mass; + } + + + static GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { + assert(0); + return lambda; + } + +}; + + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h index 3a166134..249b20bd 100644 --- a/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/CompactWilsonCloverFermion.h @@ -31,6 +31,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid); // + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site // = 84 complex words per site -template +template class CompactWilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers, public CompactWilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 78cf7851..223ff9dd 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -138,38 +138,52 @@ typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; // Clover fermions -typedef WilsonCloverFermion WilsonCloverFermionR; -typedef WilsonCloverFermion WilsonCloverFermionF; -typedef WilsonCloverFermion WilsonCloverFermionD; +template using WilsonClover = WilsonCloverFermion>; +template using WilsonExpClover = WilsonCloverFermion>; -typedef WilsonCloverFermion WilsonCloverAdjFermionR; -typedef WilsonCloverFermion WilsonCloverAdjFermionF; -typedef WilsonCloverFermion WilsonCloverAdjFermionD; +typedef WilsonClover WilsonCloverFermionR; +typedef WilsonClover WilsonCloverFermionF; +typedef WilsonClover WilsonCloverFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexSymmetricFermionD; +typedef WilsonExpClover WilsonExpCloverFermionR; +typedef WilsonExpClover WilsonExpCloverFermionF; +typedef WilsonExpClover WilsonExpCloverFermionD; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionR; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionF; -typedef WilsonCloverFermion WilsonCloverTwoIndexAntiSymmetricFermionD; +typedef WilsonClover WilsonCloverAdjFermionR; +typedef WilsonClover WilsonCloverAdjFermionF; +typedef WilsonClover WilsonCloverAdjFermionD; + +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexSymmetricFermionD; + +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionR; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionF; +typedef WilsonClover WilsonCloverTwoIndexAntiSymmetricFermionD; // Compact Clover fermions -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverFermionD; +template using CompactWilsonClover = CompactWilsonCloverFermion>; +template using CompactWilsonExpClover = CompactWilsonCloverFermion>; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverAdjFermionD; +typedef CompactWilsonClover CompactWilsonCloverFermionR; +typedef CompactWilsonClover CompactWilsonCloverFermionF; +typedef CompactWilsonClover CompactWilsonCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexSymmetricFermionD; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionR; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionF; +typedef CompactWilsonExpClover CompactWilsonExpCloverFermionD; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionR; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionF; -typedef CompactWilsonCloverFermion CompactWilsonCloverTwoIndexAntiSymmetricFermionD; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionR; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionF; +typedef CompactWilsonClover CompactWilsonCloverAdjFermionD; + +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexSymmetricFermionD; + +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionR; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionF; +typedef CompactWilsonClover CompactWilsonCloverTwoIndexAntiSymmetricFermionD; // Domain Wall fermions typedef DomainWallFermion DomainWallFermionR; diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index 0c159300..66644d7f 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -49,7 +49,7 @@ public: virtual FermionField &tmp(void) = 0; - virtual void DirichletBlock(Coordinate & _Block) { assert(0); }; + virtual void DirichletBlock(const Coordinate & _Block) { assert(0); }; GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index cd19fc10..562f08f4 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -32,6 +32,7 @@ #include #include +#include NAMESPACE_BEGIN(Grid); @@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid); // csw_r = csw_t to recover the isotropic version ////////////////////////////////////////////////////////////////// -template +template class WilsonCloverFermion : public WilsonFermion, public WilsonCloverHelpers { diff --git a/Grid/qcd/action/fermion/WilsonCloverHelpers.h b/Grid/qcd/action/fermion/WilsonCloverHelpers.h index 60f19317..c221d5f0 100644 --- a/Grid/qcd/action/fermion/WilsonCloverHelpers.h +++ b/Grid/qcd/action/fermion/WilsonCloverHelpers.h @@ -209,6 +209,8 @@ public: }; +//////////////////////////////////////////////////////// + template class CompactWilsonCloverHelpers { public: diff --git a/Grid/qcd/action/fermion/WilsonCloverTypes.h b/Grid/qcd/action/fermion/WilsonCloverTypes.h index a5a95d93..5ce3b91b 100644 --- a/Grid/qcd/action/fermion/WilsonCloverTypes.h +++ b/Grid/qcd/action/fermion/WilsonCloverTypes.h @@ -47,8 +47,6 @@ class CompactWilsonCloverTypes { public: INHERIT_IMPL_TYPES(Impl); - static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions"); - static constexpr int Nred = Nc * Nhs; // 6 static constexpr int Nblock = Nhs; // 2 static constexpr int Ndiagonal = Nred; // 6 diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index e0e08c1c..eba04abf 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -297,7 +297,7 @@ public: void ZeroCountersi(void) { } void Reporti(int calls) { } - std::vector surface_list; + // Vector surface_list; WilsonStencil(GridBase *grid, int npoints, @@ -307,10 +307,11 @@ public: : CartesianStencil (grid,npoints,checkerboard,directions,distances,p) { ZeroCountersi(); - surface_list.resize(0); + // surface_list.resize(0); this->same_node.resize(npoints); }; + /* void BuildSurfaceList(int Ls,int vol4){ // find same node for SHM @@ -331,7 +332,8 @@ public: } } } - + */ + template < class compressor> void HaloExchangeOpt(const Lattice &source,compressor &compress) { diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 91abf86a..eced6b81 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -178,16 +178,8 @@ public: GridRedBlackCartesian &FourDimRedBlackGrid, double _M5,const ImplParams &p= ImplParams()); - virtual void DirichletBlock(Coordinate & block) + virtual void DirichletBlock(const Coordinate & block) { - assert(block.size()==Nd+1); - if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ - Dirichlet = 1; - Block = block; - Stencil.DirichletBlock(block); - StencilEven.DirichletBlock(block); - StencilOdd.DirichletBlock(block); - } } // Constructors /* diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 1e2b13e3..51a7990c 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -47,7 +47,7 @@ CayleyFermion5D::CayleyFermion5D(GaugeField &_Umu, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid,_M5,p), - mass(_mass) + mass_plus(_mass), mass_minus(_mass) { } @@ -209,8 +209,8 @@ void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; Vector diag (Ls,1.0); - Vector upper(Ls,-1.0); upper[Ls-1]=mass; - Vector lower(Ls,-1.0); lower[0] =mass; + Vector upper(Ls,-1.0); upper[Ls-1]=mass_minus; + Vector lower(Ls,-1.0); lower[0] =mass_plus; M5D(psi,chi,chi,lower,diag,upper); } template @@ -220,8 +220,8 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D Vector diag = bs; Vector upper= cs; Vector lower= cs; - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,Din,lower,diag,upper); } // FIXME Redunant with the above routine; check this and eliminate @@ -235,8 +235,8 @@ template void CayleyFermion5D::Meo5D (const FermionField & upper[i]=-ceo[i]; lower[i]=-ceo[i]; } - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,chi,lower,diag,upper); } template @@ -250,8 +250,8 @@ void CayleyFermion5D::Mooee (const FermionField &psi, FermionField & upper[i]=-cee[i]; lower[i]=-cee[i]; } - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_minus*upper[Ls-1]; + lower[0] =-mass_plus*lower[0]; M5D(psi,psi,chi,lower,diag,upper); } template @@ -266,9 +266,9 @@ void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField & // Assemble the 5d matrix if ( s==0 ) { upper[s] = -cee[s+1] ; - lower[s] = mass*cee[Ls-1]; + lower[s] = mass_minus*cee[Ls-1]; } else if ( s==(Ls-1)) { - upper[s] = mass*cee[0]; + upper[s] = mass_plus*cee[0]; lower[s] = -cee[s-1]; } else { upper[s]=-cee[s+1]; @@ -291,8 +291,8 @@ void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) Vector diag(Ls,1.0); Vector upper(Ls,-1.0); Vector lower(Ls,-1.0); - upper[Ls-1]=-mass*upper[Ls-1]; - lower[0] =-mass*lower[0]; + upper[Ls-1]=-mass_plus*upper[Ls-1]; + lower[0] =-mass_minus*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); } @@ -307,9 +307,9 @@ void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField for (int s=0;s::SetCoefficientsInternal(RealD zolo_hi,Vector::SetCoefficientsInternal(RealD zolo_hi,Vector::SetCoefficientsInternal(RealD zolo_hi,Vector::ContractConservedCurrent( PropagatorField &q_in_1, Current curr_type, unsigned int mu) { + + assert(mass_plus == mass_minus); + RealD mass = mass_plus; + #if (!defined(GRID_HIP)) Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -777,6 +781,8 @@ void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, assert(mu>=0); assert(mu::M5D(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss= sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1, tmp2; - for(int s=0;s::M5Ddag(const FermionField &psi_i, M5Dcalls++; M5Dtime-=usecond(); - uint64_t nloop = grid->oSites()/Ls; + uint64_t nloop = grid->oSites(); accelerator_for(sss,nloop,Simd::Nsimd(),{ - uint64_t ss=sss*Ls; + uint64_t s = sss%Ls; + uint64_t ss= sss-s; typedef decltype(coalescedRead(psi[0])) spinor; spinor tmp1,tmp2; - for(int s=0;s #include + NAMESPACE_BEGIN(Grid); -template -CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, - GridCartesian& Fgrid, - GridRedBlackCartesian& Hgrid, - const RealD _mass, - const RealD _csw_r, - const RealD _csw_t, - const RealD _cF, - const WilsonAnisotropyCoefficients& clover_anisotropy, - const ImplParams& impl_p) +template +CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, + GridCartesian& Fgrid, + GridRedBlackCartesian& Hgrid, + const RealD _mass, + const RealD _csw_r, + const RealD _csw_t, + const RealD _cF, + const WilsonAnisotropyCoefficients& clover_anisotropy, + const ImplParams& impl_p) : WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy) , csw_r(_csw_r) , csw_t(_csw_t) @@ -58,50 +59,55 @@ CompactWilsonCloverFermion::CompactWilsonCloverFermion(GaugeField& _Umu, , BoundaryMask(&Fgrid) , BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid) { + assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3); + csw_r *= 0.5; csw_t *= 0.5; if (clover_anisotropy.isAnisotropic) csw_r /= clover_anisotropy.xi_0; ImportGauge(_Umu); - if (open_boundaries) + if (open_boundaries) { + this->BoundaryMaskEven.Checkerboard() = Even; + this->BoundaryMaskOdd.Checkerboard() = Odd; CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); + } } -template -void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::Dhop(const FermionField& in, FermionField& out, int dag) { WilsonBase::Dhop(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopOE(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopOE(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { +template +void CompactWilsonCloverFermion::DhopEO(const FermionField& in, FermionField& out, int dag) { WilsonBase::DhopEO(in, out, dag); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { WilsonBase::DhopDir(in, out, dir, disp); if(this->open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::DhopDirAll(const FermionField& in, std::vector& out) { WilsonBase::DhopDirAll(in, out); if(this->open_boundaries) { for(auto& o : out) ApplyBoundaryMask(o); } } -template -void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc Mooee(in, Tmp); @@ -109,8 +115,8 @@ void CompactWilsonCloverFermion::M(const FermionField& in, FermionField& o if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField& out) { out.Checkerboard() = in.Checkerboard(); WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc MooeeDag(in, Tmp); @@ -118,20 +124,20 @@ void CompactWilsonCloverFermion::Mdag(const FermionField& in, FermionField if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Meooe(const FermionField& in, FermionField& out) { WilsonBase::Meooe(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MeooeDag(const FermionField& in, FermionField& out) { WilsonBase::MeooeDag(in, out); if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalOdd, TriangleOdd); @@ -144,13 +150,13 @@ void CompactWilsonCloverFermion::Mooee(const FermionField& in, FermionFiel if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeDag(const FermionField& in, FermionField& out) { Mooee(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionField& out) { if(in.Grid()->_isCheckerBoarded) { if(in.Checkerboard() == Odd) { MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); @@ -163,23 +169,23 @@ void CompactWilsonCloverFermion::MooeeInv(const FermionField& in, FermionF if(open_boundaries) ApplyBoundaryMask(out); } -template -void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { +template +void CompactWilsonCloverFermion::MooeeInvDag(const FermionField& in, FermionField& out) { MooeeInv(in, out); // blocks are hermitian } -template -void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { +template +void CompactWilsonCloverFermion::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { DhopDir(in, out, dir, disp); } -template -void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { +template +void CompactWilsonCloverFermion::MdirAll(const FermionField& in, std::vector& out) { DhopDirAll(in, out); } -template -void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { +template +void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { assert(!open_boundaries); // TODO check for changes required for open bc // NOTE: code copied from original clover term @@ -251,7 +257,7 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -261,18 +267,18 @@ void CompactWilsonCloverFermion::MDeriv(GaugeField& force, const FermionFi force += clover_force; } -template -void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { +template +void CompactWilsonCloverFermion::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { assert(0); } -template -void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, +template +void CompactWilsonCloverFermion::MooeeInternal(const FermionField& in, FermionField& out, const CloverDiagonalField& diagonal, const CloverTriangleField& triangle) { @@ -285,8 +291,8 @@ void CompactWilsonCloverFermion::MooeeInternal(const FermionField& CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle); } -template -void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { +template +void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { // NOTE: parts copied from original implementation // Import gauge into base class @@ -318,22 +324,27 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; - TmpOriginal += this->diag_mass; - + // Handle mass term based on clover policy + CloverHelpers::MassTerm(TmpOriginal, this->diag_mass); + // Convert the data layout of the clover term double t4 = usecond(); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); - // Possible modify the boundary values + // Exponentiate the clover (nothing happens in case of the standard clover) double t5 = usecond(); + CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass); + + // Possible modify the boundary values + double t6 = usecond(); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); - // Invert the clover term in the improved layout - double t6 = usecond(); + // Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions) + double t7 = usecond(); CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); // Fill the remaining clover fields - double t7 = usecond(); + double t8 = usecond(); pickCheckerboard(Even, DiagonalEven, Diagonal); pickCheckerboard(Even, TriangleEven, Triangle); pickCheckerboard(Odd, DiagonalOdd, Diagonal); @@ -344,20 +355,19 @@ void CompactWilsonCloverFermion::ImportGauge(const GaugeField& _Umu) { pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); // Report timings - double t8 = usecond(); -#if 0 - std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", convert = " << (t5 - t4) / 1e6 - << ", boundaries = " << (t6 - t5) / 1e6 - << ", inversions = " << (t7 - t6) / 1e6 - << ", pick cbs = " << (t8 - t7) / 1e6 - << ", total = " << (t8 - t0) / 1e6 - << std::endl; -#endif + double t9 = usecond(); + + std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl; + std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl; } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index e4d2e736..2a7e7535 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -34,8 +34,8 @@ NAMESPACE_BEGIN(Grid); -template -WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, +template +WilsonCloverFermion::WilsonCloverFermion(GaugeField& _Umu, GridCartesian& Fgrid, GridRedBlackCartesian& Hgrid, const RealD _mass, @@ -74,8 +74,8 @@ WilsonCloverFermion::WilsonCloverFermion(GaugeField& } // *NOT* EO -template -void WilsonCloverFermion::M(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::M(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -89,8 +89,8 @@ void WilsonCloverFermion::M(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -104,8 +104,8 @@ void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) out += temp; } -template -void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) +template +void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { double t0 = usecond(); WilsonFermion::ImportGauge(_Umu); @@ -131,47 +131,11 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += Helpers::fillCloverXT(Ex) * csw_t; CloverTerm += Helpers::fillCloverYT(Ey) * csw_t; CloverTerm += Helpers::fillCloverZT(Ez) * csw_t; - CloverTerm += diag_mass; - + double t4 = usecond(); - int lvol = _Umu.Grid()->lSites(); - int DimRep = Impl::Dimension; + CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass); double t5 = usecond(); - { - autoView(CTv,CloverTerm,CpuRead); - autoView(CTIv,CloverTermInv,CpuWrite); - thread_for(site, lvol, { - Coordinate lcoor; - grid->LocalIndexToLocalCoor(site, lcoor); - Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero(); - peekLocalSite(Qx, CTv, lcoor); - //if (csw!=0){ - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++){ - auto zz = Qx()(j, k)(a, b); - EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); - } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } - pokeLocalSite(Qxinv, CTIv, lcoor); - }); - } - - double t6 = usecond(); // Separate the even and odd parts pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); @@ -184,48 +148,44 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); - double t7 = usecond(); + double t6 = usecond(); -#if 0 - std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" - << " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 - << ", allocations = " << (t2 - t1) / 1e6 - << ", field strength = " << (t3 - t2) / 1e6 - << ", fill clover = " << (t4 - t3) / 1e6 - << ", misc = " << (t5 - t4) / 1e6 - << ", inversions = " << (t6 - t5) / 1e6 - << ", pick cbs = " << (t7 - t6) / 1e6 - << ", total = " << (t7 - t0) / 1e6 - << std::endl; -#endif + std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl; + std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl; + std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl; + std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl; + std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl; + std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl; + std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl; + std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl; } -template -void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::Mooee(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseNo); } -template -void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseNo); } -template -void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInv(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerNo, InverseYes); } -template -void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) +template +void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField &out) { this->MooeeInternal(in, out, DaggerYes, InverseYes); } -template -void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) +template +void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) { out.Checkerboard() = in.Checkerboard(); CloverField *Clover; @@ -278,8 +238,8 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } // MooeeInternal // Derivative parts unpreconditioned pseudofermions -template -void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) { conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), force.Grid()); @@ -349,7 +309,7 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok - force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked + force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked count++; } @@ -360,15 +320,15 @@ void WilsonCloverFermion::MDeriv(GaugeField &force, const FermionField &X, } // Derivative parts -template -void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) +template +void WilsonCloverFermion::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) { assert(0); } // Derivative parts -template -void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +template +void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { assert(0); // not implemented yet } diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 7775ad9d..51c7df57 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -92,6 +92,19 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); } + if ( p.dirichlet.size() == Nd+1) { + Coordinate block = p.dirichlet; + if ( block[0] || block[1] || block[2] || block[3] || block[4] ){ + Dirichlet = 1; + Block = block; + } + } else { + Coordinate block(Nd+1,0); + Block = block; + } + + ZeroCounters(); + if (Impl::LsVectorised) { int nsimd = Simd::Nsimd(); diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 84ac25c1..c958019d 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -4,12 +4,13 @@ Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/qcd/action/fermion/WilsonFermion.cc -Copyright (C) 2015 +Copyright (C) 2022 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle +Author: Fabian Joswig This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -599,11 +600,47 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Current curr_type, unsigned int mu) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + Gamma g5(Gamma::Algebra::Gamma5); conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + auto UGrid= this->GaugeGrid(); + + PropagatorField tmp_shifted(UGrid); + PropagatorField g5Lg5(UGrid); + PropagatorField R(UGrid); + PropagatorField gmuR(UGrid); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + g5Lg5=g5*q_in_1*g5; + tmp_shifted=Cshift(q_in_2,mu,1); + Impl::multLinkField(R,this->Umu,tmp_shifted,mu); + gmuR=gmu*R; + + q_out=adj(g5Lg5)*R; + q_out-=adj(g5Lg5)*gmuR; + + tmp_shifted=Cshift(q_in_1,mu,1); + Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu); + g5Lg5=g5*g5Lg5*g5; + R=q_in_2; + gmuR=gmu*R; + + q_out-=adj(g5Lg5)*R; + q_out-=adj(g5Lg5)*gmuR; } @@ -617,9 +654,51 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, unsigned int tmax, ComplexField &lattice_cmplx) { + if(curr_type != Current::Vector) + { + std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl; + exit(1); + } + + int tshift = (mu == Nd-1) ? 1 : 0; + unsigned int LLt = GridDefaultLatt()[Tp]; conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); - assert(0); + auto UGrid= this->GaugeGrid(); + + PropagatorField tmp(UGrid); + PropagatorField Utmp(UGrid); + PropagatorField L(UGrid); + PropagatorField zz (UGrid); + zz=Zero(); + LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + }; + Gamma gmu=Gamma(Gmu[mu]); + + tmp = Cshift(q_in,mu,1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu); + tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop + tmp = where((lcoor>=tmin),tmp,zz); // Mask the time + q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated + + tmp = q_in *lattice_cmplx; + tmp = Cshift(tmp,mu,-1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link + tmp = -( Utmp + gmu*Utmp ); + // Mask the time + if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice + unsigned int t0 = 0; + tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz); + } else { + tmp = where((lcoor>=tmin+tshift),tmp,zz); + } + q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 9228b84c..623da5cf 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -440,6 +440,17 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); +#define KERNEL_CALL_EXT(A) \ + const uint64_t NN = Nsite*Ls; \ + const uint64_t sz = st.surface_list.size(); \ + auto ptr = &st.surface_list[0]; \ + accelerator_forNB( ss, sz, Simd::Nsimd(), { \ + int sF = ptr[ss]; \ + int sU = ss/Ls; \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); \ + accelerator_barrier(); + #define ASM_CALL(A) \ thread_for( ss, Nsite, { \ int sU = ss; \ diff --git a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master index 7c91c252..1ea1549c 100644 --- a/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation.cc.master @@ -9,6 +9,7 @@ Author: paboyle Author: Guido Cossu Author: Daniel Richtmann + Author: Mattia Bruno This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,10 +33,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class CompactWilsonCloverFermion; +template class CompactWilsonCloverFermion>; +template class CompactWilsonCloverFermion>; NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master index af99dfb6..2cea3656 100644 --- a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master +++ b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master @@ -8,7 +8,8 @@ Author: paboyle Author: Guido Cossu - + Author: Mattia Bruno + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -31,10 +32,12 @@ #include #include #include +#include NAMESPACE_BEGIN(Grid); #include "impl.h" -template class WilsonCloverFermion; +template class WilsonCloverFermion>; +template class WilsonCloverFermion>; NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc deleted file mode 100644 index f0b15e3b..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015, 2020 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: Nils Meyer Regensburg University - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -#ifndef AVX512 -#ifndef QPX -#ifndef A64FX -#ifndef A64FXFIXEDSIZE -#include -#endif -#endif -#endif -#endif - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonKernels; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..01c35e7b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index a09fd420..d6845e75 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \ GparityWilsonImplF \ GparityWilsonImplD " +COMPACT_WILSON_IMPL_LIST=" \ + WilsonImplF \ + WilsonImplD " + DWF_IMPL_LIST=" \ WilsonImplF \ WilsonImplD \ @@ -40,13 +44,23 @@ EOF done -CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" +CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" for impl in $WILSON_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc +done +done + +CC_LIST="CompactWilsonCloverFermionInstantiation" + +for impl in $COMPACT_WILSON_IMPL_LIST +do +for f in $CC_LIST +do + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done @@ -63,14 +77,14 @@ for impl in $DWF_IMPL_LIST $GDWF_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done # overwrite the .cc file in Gparity directories for impl in $GDWF_IMPL_LIST do - ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc + ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc done @@ -84,7 +98,7 @@ for impl in $STAG_IMPL_LIST do for f in $CC_LIST do - ln -f -s ../$f.cc.master $impl/$f$impl.cc + ln -f -s ../$f.cc.master $impl/$f$impl.cc done done diff --git a/Grid/qcd/action/filters/DirichletFilter.h b/Grid/qcd/action/filters/DirichletFilter.h index 95353dab..e388891f 100644 --- a/Grid/qcd/action/filters/DirichletFilter.h +++ b/Grid/qcd/action/filters/DirichletFilter.h @@ -53,9 +53,9 @@ struct DirichletFilter: public MomentumFilterBase LatticeInteger coor(grid); LatCM zz(grid); zz = Zero(); for(int mu=0;muGlobalDimensions()[mu] ) ) { + if ( (Block[mu]) && (Block[mu] <= grid->GlobalDimensions()[mu] ) ) { // If costly could provide Grid earlier and precompute masks - std::cout << " Dirichlet in mu="<gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x]) Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR; diff --git a/Grid/qcd/utils/GaugeFix.h b/Grid/qcd/utils/GaugeFix.h index c0bc2c83..d9d03c54 100644 --- a/Grid/qcd/utils/GaugeFix.h +++ b/Grid/qcd/utils/GaugeFix.h @@ -72,14 +72,13 @@ public: //Fix the gauge field Umu //0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf - static void SteepestDescentGaugeFix(GaugeLorentz &Umu, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { + static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) { GridBase *grid = Umu.Grid(); GaugeMat xform(grid); - SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog); + SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge); } - + static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) { //Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform - static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { GridBase *grid = Umu.Grid(); @@ -141,7 +140,9 @@ public: } } - assert(0 && "Gauge fixing did not converge within the specified number of iterations"); + std::cout << GridLogError << "Gauge fixing did not converge in " << maxiter << " iterations." << std::endl; + if (err_on_no_converge) + assert(0 && "Gauge fixing did not converge within the specified number of iterations"); }; static Real SteepestDescentStep(std::vector &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) { GridBase *grid = U[0].Grid(); diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index da1f5ac8..d6efbd5d 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -215,7 +215,7 @@ public: double vol = Umu.Grid()->gSites(); - return p.real() / vol / 4.0 / 3.0; + return p.real() / vol / (4.0 * Nc ) ; }; ////////////////////////////////////////////////// diff --git a/Grid/stencil/SimpleCompressor.h b/Grid/stencil/SimpleCompressor.h index 1150b234..b36d954f 100644 --- a/Grid/stencil/SimpleCompressor.h +++ b/Grid/stencil/SimpleCompressor.h @@ -52,6 +52,11 @@ public: return arg; } }; +class SimpleStencilParams{ +public: + Coordinate dirichlet; + SimpleStencilParams() {}; +}; NAMESPACE_END(Grid); diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 3e356a1c..eb73ba5f 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -131,7 +131,6 @@ class CartesianStencilAccelerator { int _checkerboard; int _npoints; // Move to template param? int _osites; - int _dirichlet; StencilVector _directions; StencilVector _distances; StencilVector _comms_send; @@ -503,7 +502,6 @@ public: } void AddCopy(void *from,void * to, Integer bytes) { - // std::cout << "Adding CopyReceiveBuffer "<_dirichlet = 1; for(int ii=0;ii_npoints;ii++){ int dimension = this->_directions[ii]; int displacement = this->_distances[ii]; - int shift = displacement; int gd = _grid->_gdimensions[dimension]; int fd = _grid->_fdimensions[dimension]; int pd = _grid->_processors [dimension]; - int ld = gd/pd; int pc = _grid->_processor_coor[dimension]; + int ld = fd/pd; /////////////////////////////////////////// // Figure out dirichlet send and receive // on this leg of stencil. @@ -668,25 +665,25 @@ public: int block = dirichlet_block[dimension]; this->_comms_send[ii] = comm_dim; this->_comms_recv[ii] = comm_dim; - if ( block ) { + if ( block && comm_dim ) { assert(abs(displacement) < ld ); - + // Quiesce communication across block boundaries if( displacement > 0 ) { // High side, low side // | <--B--->| // | | | // noR // noS - if ( (ld*(pc+1) ) % block == 0 ) this->_comms_recv[ii] = 0; - if ( ( ld*pc ) % block == 0 ) this->_comms_send[ii] = 0; + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_recv[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_send[ii] = 0; } else { // High side, low side // | <--B--->| // | | | // noS // noR - if ( (ld*(pc+1) ) % block == 0 ) this->_comms_send[ii] = 0; - if ( ( ld*pc ) % block == 0 ) this->_comms_recv[ii] = 0; + if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0; + if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0; } } } @@ -698,7 +695,6 @@ public: const std::vector &distances, Parameters p) { - this->_dirichlet = 0; face_table_computed=0; _grid = grid; this->parameters=p; @@ -715,6 +711,8 @@ public: this->_comms_recv.resize(npoints); this->same_node.resize(npoints); + if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up + _unified_buffer_size=0; surface_list.resize(0); @@ -734,7 +732,7 @@ public: int gd = _grid->_gdimensions[dimension]; int fd = _grid->_fdimensions[dimension]; int pd = _grid->_processors [dimension]; - int ld = gd/pd; + // int ld = gd/pd; int rd = _grid->_rdimensions[dimension]; int pc = _grid->_processor_coor[dimension]; this->_permute_type[point]=_grid->PermuteType(dimension); @@ -746,9 +744,6 @@ public: int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); int rotate_dim = _grid->_simd_layout[dimension]>2; - this->_comms_send[ii] = comm_dim; - this->_comms_recv[ii] = comm_dim; - assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported int sshift[2]; @@ -878,12 +873,14 @@ public: for(int x=0;xPermuteType(dimension); + int permute_slice; int sx = (x+sshift)%rd; int offnode = 0; if ( simd_layout > 1 ) { + permute_slice=1; for(int i=0;i>(permute_type+1)); @@ -900,6 +897,7 @@ public: } else { int comm_proc = ((x+sshift)/rd)%pd; offnode = (comm_proc!= 0); + permute_slice=0; } int wraparound=0; @@ -911,25 +909,29 @@ public: } // Wrap locally dirichlet support case OR node local - if ( (offnode==0) || (comms_recv==0) ) { + if ( offnode==0 ) { - int permute_slice=0; + permute_slice=0; CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); - + } else { + if ( comms_recv ) { + + ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase + + } else { + + CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); + + } + + } + + if ( offnode ) { int words = buffer_size; if (cbmask != 0x3) words=words>>1; - - // int rank = grid->_processor; - // int recv_from_rank; - // int xmit_to_rank; - - int unified_buffer_offset = _unified_buffer_size; _unified_buffer_size += words; - - ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase - } } } @@ -1060,8 +1062,6 @@ public: int comm_proc = ((x+sshift)/rd)%pd; if (comm_proc) { - - int words = buffer_size; if (cbmask != 0x3) words=words>>1; @@ -1069,64 +1069,70 @@ public: int bytes = words * compress.CommDatumSize(); int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - if ( !face_table_computed ) { - face_table.resize(face_idx+1); - std::vector > face_table_host ; - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); - face_table[face_idx].resize(face_table_host.size()); - acceleratorCopyToDevice(&face_table_host[0], - &face_table[face_idx][0], - face_table[face_idx].size()*sizeof(face_table_host[0])); - } + int comm_off = u_comm_offset; - // int rank = _grid->_processor; int recv_from_rank; int xmit_to_rank; + cobj *recv_buf; + cobj *send_buf; _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); assert (xmit_to_rank != _grid->ThisRank()); assert (recv_from_rank != _grid->ThisRank()); - cobj *recv_buf; - if ( compress.DecompressionStep() ) { - recv_buf=u_simd_recv_buf[0]; - } else { - recv_buf=this->u_recv_buf_p; + if( comms_send ) { + + if ( !face_table_computed ) { + face_table.resize(face_idx+1); + std::vector > face_table_host ; + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); + face_table[face_idx].resize(face_table_host.size()); + acceleratorCopyToDevice(&face_table_host[0], + &face_table[face_idx][0], + face_table[face_idx].size()*sizeof(face_table_host[0])); + } + + + if ( compress.DecompressionStep() ) { + recv_buf=u_simd_recv_buf[0]; + } else { + recv_buf=this->u_recv_buf_p; + } + + send_buf = this->u_send_buf_p; // Gather locally, must send + + //////////////////////////////////////////////////////// + // Gather locally + //////////////////////////////////////////////////////// + assert(send_buf!=NULL); + + Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so); } - - cobj *send_buf; - send_buf = this->u_send_buf_p; // Gather locally, must send - - //////////////////////////////////////////////////////// - // Gather locally - //////////////////////////////////////////////////////// - assert(send_buf!=NULL); - if ( comms_send ) - Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so); - face_idx++; - - int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes,cbmask); + int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask); if ( (!duplicate) ) { // Force comms for now /////////////////////////////////////////////////////////// // Build a list of things to do after we synchronise GPUs // Start comms now??? /////////////////////////////////////////////////////////// - AddPacket((void *)&send_buf[u_comm_offset], - (void *)&recv_buf[u_comm_offset], + AddPacket((void *)&send_buf[comm_off], + (void *)&recv_buf[comm_off], xmit_to_rank, comms_send, recv_from_rank, comms_recv, bytes); } - if ( compress.DecompressionStep() ) { - AddDecompress(&this->u_recv_buf_p[u_comm_offset], - &recv_buf[u_comm_offset], + if ( compress.DecompressionStep() && comms_recv ) { + AddDecompress(&this->u_recv_buf_p[comm_off], + &recv_buf[comm_off], words,Decompressions); } + u_comm_offset+=words; - } + face_idx++; + + } } return 0; } @@ -1155,7 +1161,6 @@ public: int permute_type=_grid->PermuteType(dimension); - // std::cout << "SimdNew permute type "< > face_table_host ; - Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); + Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host); face_table[face_idx].resize(face_table_host.size()); acceleratorCopyToDevice(&face_table_host[0], &face_table[face_idx][0], face_table[face_idx].size()*sizeof(face_table_host[0])); } - // if ( comms_send ) - Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); + if ( comms_send || comms_recv ) + Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); face_idx++; //spointers[0] -- low @@ -1226,8 +1232,8 @@ public: int nbr_plane = nbr_ic; assert (sx == nbr_ox); - auto rp = &u_simd_recv_buf[i ][u_comm_offset]; - auto sp = &u_simd_send_buf[nbr_plane][u_comm_offset]; + auto rp = &u_simd_recv_buf[i ][comm_off]; + auto sp = &u_simd_send_buf[nbr_plane][comm_off]; if(nbr_proc){ @@ -1253,9 +1259,12 @@ public: } } - AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers); + if ( comms_recv ) { + AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers); + } u_comm_offset +=buffer_size; + } } return 0; diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc index cbc798a9..092d46b3 100644 --- a/Grid/threads/Accelerator.cc +++ b/Grid/threads/Accelerator.cc @@ -16,6 +16,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; #ifdef GRID_CUDA cudaDeviceProp *gpu_props; cudaStream_t copyStream; +cudaStream_t cpuStream; void acceleratorInit(void) { int nDevices = 1; @@ -98,6 +99,7 @@ void acceleratorInit(void) cudaSetDevice(device); cudaStreamCreate(©Stream); + cudaStreamCreate(&cpuStream); const int len=64; char busid[len]; if( rank == world_rank ) { @@ -112,6 +114,7 @@ void acceleratorInit(void) #ifdef GRID_HIP hipDeviceProp_t *gpu_props; hipStream_t copyStream; +hipStream_t cpuStream; void acceleratorInit(void) { int nDevices = 1; @@ -180,6 +183,7 @@ void acceleratorInit(void) #endif hipSetDevice(device); hipStreamCreate(©Stream); + hipStreamCreate(&cpuStream); const int len=64; char busid[len]; if( rank == world_rank ) { diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h index a5fb7aa8..d88c08b4 100644 --- a/Grid/threads/Accelerator.h +++ b/Grid/threads/Accelerator.h @@ -107,6 +107,7 @@ void acceleratorInit(void); extern int acceleratorAbortOnGpuError; extern cudaStream_t copyStream; +extern cudaStream_t cpuStream; accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -134,7 +135,7 @@ inline void cuda_mem(void) }; \ dim3 cu_threads(nsimd,acceleratorThreads(),1); \ dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ - LambdaApply<<>>(num1,num2,nsimd,lambda); \ + LambdaApply<<>>(num1,num2,nsimd,lambda); \ } #define accelerator_for6dNB(iter1, num1, \ @@ -153,7 +154,7 @@ inline void cuda_mem(void) }; \ dim3 cu_blocks (num1,num2,num3); \ dim3 cu_threads(num4,num5,num6); \ - Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ + Lambda6Apply<<>>(num1,num2,num3,num4,num5,num6,lambda); \ } template __global__ @@ -189,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3, #define accelerator_barrier(dummy) \ { \ - cudaDeviceSynchronize(); \ + cudaStreamSynchronize(cpuStream); \ cudaError err = cudaGetLastError(); \ if ( cudaSuccess != err ) { \ printf("accelerator_barrier(): Cuda error %s \n", \ @@ -339,6 +340,7 @@ NAMESPACE_BEGIN(Grid); #define accelerator_inline __host__ __device__ inline extern hipStream_t copyStream; +extern hipStream_t cpuStream; /*These routines define mapping from thread grid to loop & vector lane indexing */ accelerator_inline int acceleratorSIMTlane(int Nsimd) { #ifdef GRID_SIMT @@ -360,12 +362,12 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \ hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,cpuStream, \ + num1,num2,nsimd, lambda); \ } else { \ hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ - 0,0, \ - num1,num2,nsimd, lambda); \ + 0,cpuStream, \ + num1,num2,nsimd, lambda); \ } \ } @@ -398,7 +400,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) #define accelerator_barrier(dummy) \ { \ - hipDeviceSynchronize(); \ + hipStreamSynchronize(cpuStream); \ auto err = hipGetLastError(); \ if ( err != hipSuccess ) { \ printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \ diff --git a/HMC/Mobius2p1f_DD_RHMC_96I.cc b/HMC/Mobius2p1f_DD_RHMC_96I.cc new file mode 100644 index 00000000..a6a2f26c --- /dev/null +++ b/HMC/Mobius2p1f_DD_RHMC_96I.cc @@ -0,0 +1,419 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_EODWFRatio.cc + +Copyright (C) 2015-2016 + +Author: Peter Boyle +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) { + using namespace Grid; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + + // Typedefs to simplify notation + typedef WilsonImplR FermionImplPolicy; + typedef MobiusFermionR FermionAction; + typedef typename FermionAction::FermionField FermionField; + + typedef Grid::XmlReader Serialiser; + + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + IntegratorParameters MD; + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 6; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + // Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real beta = 2.13; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + // std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + std::vector hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-4; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-7; + OFRp.mdtolerance= 1.0e-4; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + // Block4[0] = Dirichlet[1]; + // Block4[1] = Dirichlet[2]; + // Block4[2] = Dirichlet[3]; + Block4[0] = 0; + Block4[1] = 0; + Block4[2] = 0; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grid + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-8; + double MDStoppingCondition = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector *> Quotients; + std::vector *> Bdys; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h SdagS(StrangeOp); + HighBoundCheck(SdagS,vec,SFRp.hi); + ChebyBoundsCheck(SdagS,vec,SFRp.lo,SFRp.hi); + std::cout << "Strange inversion"<Mass() < UdagU(*Denominators[0]); + HighBoundCheck(UdagU,vec,OFRp.hi); + ChebyBoundsCheck(UdagU,vec,OFRp.lo,OFRp.hi); + std::cout << "light inversion"< SddagSd(StrangeOpDir); + HighBoundCheck(SddagSd,vec,OFRp.hi); + ChebyBoundsCheck(SddagSd,vec,OFRp.lo,OFRp.hi); + std::cout << "strange dirichlet inversion"<Mass()< UddagUd(*Numerators[0]); + HighBoundCheck(UddagUd,vec,OFRp.hi); + ChebyBoundsCheck(UddagUd,vec,OFRp.lo,OFRp.hi); + std::cout << "light dirichlet inversion"< Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UddagUd); + PlainHermOp Op (UddagUd); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid);src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int ev=0;ev Cheby(bound,90.,order); + FunctionHermOp OpCheby(Cheby,UdagU); + PlainHermOp Op (UdagU); + ImplicitlyRestartedLanczos IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt); + std::vector eval(Nm); + std::vector evec(Nm,rbgrid); + FermionField src(rbgrid); src = 1.0; + IRL.calc(eval,evec,src,Nconv); + + FermionField tmp(rbgrid); + FermionField ftmp(grid); + FermionField ftmp4(grid4); + for(int e=0;e +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +template + class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction { + public: + typedef typename FermionOperatorD::FermionField FieldD; + typedef typename FermionOperatorF::FermionField FieldF; + + using OperatorFunction::operator(); + + RealD Tolerance; + RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid4; //Grid for single-precision fields + GridBase* SinglePrecGrid5; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + + FermionOperatorF &FermOpF; + FermionOperatorD &FermOpD;; + SchurOperatorF &LinOpF; + SchurOperatorD &LinOpD; + + Integer TotalInnerIterations; //Number of inner CG iterations + Integer TotalOuterIterations; //Number of restarts + Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step + + MixedPrecisionConjugateGradientOperatorFunction(RealD tol, + Integer maxinnerit, + Integer maxouterit, + GridBase* _sp_grid4, + GridBase* _sp_grid5, + FermionOperatorF &_FermOpF, + FermionOperatorD &_FermOpD, + SchurOperatorF &_LinOpF, + SchurOperatorD &_LinOpD): + LinOpF(_LinOpF), + LinOpD(_LinOpD), + FermOpF(_FermOpF), + FermOpD(_FermOpD), + Tolerance(tol), + InnerTolerance(tol), + MaxInnerIterations(maxinnerit), + MaxOuterIterations(maxouterit), + SinglePrecGrid4(_sp_grid4), + SinglePrecGrid5(_sp_grid5), + OuterLoopNormMult(100.) + { + /* Debugging instances of objects; references are stored + std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " < &LinOpU, const FieldD &src, FieldD &psi) { + + std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<(&LinOpU); + + // std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <_Mat)<_Mat)==&(LinOpD._Mat)); + + //////////////////////////////////////////////////////////////////////////////////// + // Must snarf a single precision copy of the gauge field in Linop_d argument + //////////////////////////////////////////////////////////////////////////////////// + typedef typename FermionOperatorF::GaugeField GaugeFieldF; + typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF; + typedef typename FermionOperatorD::GaugeField GaugeFieldD; + typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD; + + GridBase * GridPtrF = SinglePrecGrid4; + GridBase * GridPtrD = FermOpD.Umu.Grid(); + GaugeFieldF U_f (GridPtrF); + GaugeLinkFieldF Umu_f(GridPtrF); + // std::cout << " Dim gauge field "<Nd()<Nd()<(FermOpD.Umu, mu); + precisionChange(Umu_f,Umu_d); + PokeIndex(FermOpF.Umu, Umu_f, mu); + } + pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu); + pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu); + + //////////////////////////////////////////////////////////////////////////////////// + // Make a mixed precision conjugate gradient + //////////////////////////////////////////////////////////////////////////////////// + MixedPrecisionConjugateGradient MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD); + std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" < HMCWrapper; + // MD.name = std::string("Leap Frog"); + // typedef GenericHMCRunner HMCWrapper; + // MD.name = std::string("Force Gradient"); + typedef GenericHMCRunner HMCWrapper; + MD.name = std::string("MinimumNorm2"); + MD.MDsteps = 4; + MD.trajL = 1.0; + + HMCparameters HMCparams; + HMCparams.StartTrajectory = 1077; + HMCparams.Trajectories = 1; + HMCparams.NoMetropolisUntil= 0; + // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + // HMCparams.StartingType =std::string("ColdStart"); + HMCparams.StartingType =std::string("CheckpointStart"); + HMCparams.MD = MD; + HMCWrapper TheHMC(HMCparams); + + // Grid from the command line arguments --grid and --mpi + TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition + + CheckpointerParameters CPparams; + CPparams.config_prefix = "ckpoint_DDHMC_lat"; + CPparams.rng_prefix = "ckpoint_DDHMC_rng"; + CPparams.saveInterval = 1; + CPparams.format = "IEEE64BIG"; + TheHMC.Resources.LoadNerscCheckpointer(CPparams); + + RNGModuleParameters RNGpar; + RNGpar.serial_seeds = "1 2 3 4 5"; + RNGpar.parallel_seeds = "6 7 8 9 10"; + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + // here there is too much indirection + typedef PlaquetteMod PlaqObs; + TheHMC.Resources.AddObservable(); + ////////////////////////////////////////////// + + const int Ls = 12; + RealD M5 = 1.8; + RealD b = 1.5; + RealD c = 0.5; + Real beta = 2.31; + // Real light_mass = 5.4e-4; + Real light_mass = 7.8e-4; + Real strange_mass = 0.02132; + Real pv_mass = 1.0; + std::vector hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); + + // FIXME: + // Same in MC and MD + // Need to mix precision too + OneFlavourRationalParams SFRp; // Strange + SFRp.lo = 4.0e-3; + SFRp.hi = 90.0; + SFRp.MaxIter = 60000; + SFRp.tolerance= 1.0e-8; + SFRp.mdtolerance= 1.0e-6; + SFRp.degree = 12; + SFRp.precision= 50; + SFRp.BoundsCheckFreq=0; + + OneFlavourRationalParams OFRp; // Up/down + OFRp.lo = 2.0e-5; + OFRp.hi = 90.0; + OFRp.MaxIter = 60000; + OFRp.tolerance= 1.0e-8; + OFRp.mdtolerance= 1.0e-6; + // OFRp.degree = 20; converges + // OFRp.degree = 16; + OFRp.degree = 12; + OFRp.precision= 80; + OFRp.BoundsCheckFreq=0; + + auto GridPtr = TheHMC.Resources.GetCartesian(); + auto GridRBPtr = TheHMC.Resources.GetRBCartesian(); + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + typedef MixedPrecisionConjugateGradientOperatorFunction MxPCG; + + //////////////////////////////////////////////////////////////// + // Domain decomposed + //////////////////////////////////////////////////////////////// + Coordinate latt4 = GridPtr->GlobalDimensions(); + Coordinate mpi = GridPtr->ProcessorGrid(); + Coordinate shm; + + GlobalSharedMemory::GetShmDims(mpi,shm); + + Coordinate CommDim(Nd); + for(int d=0;d1 ? 1 : 0; + + Coordinate NonDirichlet(Nd+1,0); + Coordinate Dirichlet(Nd+1,0); + Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0]; + Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1]; + Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2]; + Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3]; + + Coordinate Block4(Nd); + Block4[0] = Dirichlet[1]; + Block4[1] = Dirichlet[2]; + Block4[2] = Dirichlet[3]; + Block4[3] = Dirichlet[4]; + + int Width=3; + TheHMC.Resources.SetMomentumFilter(new DDHMCFilter(Block4,Width)); + + ////////////////////////// + // Fermion Grids + ////////////////////////// + auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr); + auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr); + + Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd()); + auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi); + auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF); + auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF); + auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF); + + IwasakiGaugeActionR GaugeAction(beta); + + // temporarily need a gauge field + LatticeGaugeField U(GridPtr); + LatticeGaugeFieldF UF(GridPtrF); + + std::cout << GridLogMessage << " Running the HMC "<< std::endl; + TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file + TheHMC.initializeGaugeFieldAndRNGs(U); + + + // These lines are unecessary if BC are all periodic + std::vector boundary = {1,1,1,-1}; + FermionAction::ImplParams Params(boundary); + Params.dirichlet=NonDirichlet; + FermionAction::ImplParams ParamsDir(boundary); + ParamsDir.dirichlet=Dirichlet; + + // double StoppingCondition = 1e-14; + // double MDStoppingCondition = 1e-9; + double StoppingCondition = 1e-10; + double MDStoppingCondition = 1e-7; + double MDStoppingConditionLoose = 1e-6; + double MaxCGIterations = 300000; + ConjugateGradient CG(StoppingCondition,MaxCGIterations); + ConjugateGradient MDCG(MDStoppingCondition,MaxCGIterations); + + //////////////////////////////////// + // Collect actions + //////////////////////////////////// + ActionLevel Level1(1); + ActionLevel Level2(4); + ActionLevel Level3(8); + + //////////////////////////////////// + // Strange action + //////////////////////////////////// + FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params); + FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params); + + FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir); + FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir); + + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp); + OneFlavourEvenOddRatioRationalPseudoFermionAction StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp); + Level1.push_back(&StrangePseudoFermionBdy); + Level2.push_back(&StrangePseudoFermionLocal); + Level1.push_back(&StrangePseudoFermionPVBdy); + + //////////////////////////////////// + // up down action + //////////////////////////////////// + std::vector light_den; + std::vector light_num; + std::vector dirichlet_den; + std::vector dirichlet_num; + + int n_hasenbusch = hasenbusch.size(); + light_den.push_back(light_mass); dirichlet_den.push_back(0); + for(int h=0;h Numerators; + std::vector Denominators; + std::vector DenominatorsF; + std::vector *> Quotients; + std::vector *> Bdys; + std::vector ActionMPCG; + std::vector MPCG; + + typedef SchurDiagMooeeOperator LinearOperatorF; + typedef SchurDiagMooeeOperator LinearOperatorD; + std::vector LinOpD; + std::vector LinOpF; + + for(int h=0;h(*Numerators[h],*Denominators[h],MDCG,CG)); + Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG)); + } else { + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction(*Numerators[h],*Denominators[h],OFRp)); + } + } + + int nquo=Quotients.size(); + Level1.push_back(Bdys[0]); + Level1.push_back(Bdys[1]); + for(int h=0;h + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +int main(int argc, char **argv) +{ + using namespace Grid; + + Grid_init(&argc, &argv); + + Coordinate latt4 = GridDefaultLatt(); + Coordinate mpi = GridDefaultMpi(); + Coordinate simd = GridDefaultSimd(Nd,vComplexD::Nsimd()); + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd,mpi); + + GridSerialRNG sRNG; sRNG.SeedUniqueString(std::string("The Serial RNG")); + GridParallelRNG pRNG(UGrid); pRNG.SeedUniqueString(std::string("The 4D RNG")); + + std::string rngfile("ckpoint_rng.0"); + NerscIO::writeRNGState(sRNG, pRNG, rngfile); + + Grid_finalize(); +} + + + diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index 707f330c..c6814cdc 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -191,9 +191,7 @@ int main (int argc, char ** argv) std::cout<Barrier(); diff --git a/benchmarks/Benchmark_dwf_fp32.cc b/benchmarks/Benchmark_dwf_fp32.cc index f79797fa..5ee764c4 100644 --- a/benchmarks/Benchmark_dwf_fp32.cc +++ b/benchmarks/Benchmark_dwf_fp32.cc @@ -249,8 +249,9 @@ void Benchmark(int Ls, Coordinate Dirichlet) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <Barrier(); diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index 0aaccecc..289af41d 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -81,8 +81,8 @@ int main (int argc, char ** argv) Vector diag = Dw.bs; Vector upper= Dw.cs; Vector lower= Dw.cs; - upper[Ls-1]=-Dw.mass*upper[Ls-1]; - lower[0] =-Dw.mass*lower[0]; + upper[Ls-1]=-Dw.mass_minus*upper[Ls-1]; + lower[0] =-Dw.mass_plus*lower[0]; LatticeFermion r_eo(FGrid); LatticeFermion src_e (FrbGrid); diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc index 986a1831..50ad1d03 100644 --- a/benchmarks/Benchmark_wilson_sweep.cc +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -44,6 +44,13 @@ void bench_wilson ( double const volume, int const dag ); +void bench_wilson_eo ( + LatticeFermion & src, + LatticeFermion & result, + WilsonFermionR & Dw, + double const volume, + int const dag ); + int main (int argc, char ** argv) { Grid_init(&argc,&argv); @@ -110,8 +117,8 @@ int main (int argc, char ** argv) bench_wilson(src,result,Dw,volume,DaggerYes); std::cout << "\t"; // EO - bench_wilson(src,result,Dw,volume,DaggerNo); - bench_wilson(src,result,Dw,volume,DaggerYes); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerNo); + bench_wilson_eo(src_o,result_e,Dw,volume,DaggerYes); std::cout << std::endl; } } diff --git a/configure.ac b/configure.ac index 406b0b74..528f0125 100644 --- a/configure.ac +++ b/configure.ac @@ -159,7 +159,7 @@ case ${ac_ZMOBIUS} in esac ############### Nc AC_ARG_ENABLE([Nc], - [AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])], + [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])], [ac_Nc=${enable_Nc}], [ac_Nc=3]) case ${ac_Nc} in @@ -394,11 +394,10 @@ case ${CXXTEST} in fi ;; hipcc) -# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" CXXLD=${CXX} if test $ac_openmp = yes; then - CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" + CXXFLAGS="$CXXFLAGS -fopenmp" fi ;; dpcpp) @@ -557,16 +556,19 @@ esac AC_ARG_ENABLE([setdevice],[AC_HELP_STRING([--enable-setdevice | --disable-setdevice], [Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no]) case ${ac_SETDEVICE} in - yes);; - no) + yes) + echo ENABLE SET DEVICE + ;; + *) AC_DEFINE([GRID_DEFAULT_GPU],[1],[GRID_DEFAULT_GPU] ) + echo DISABLE SET DEVICE ;; esac ######################################################### ###################### Shared memory intranode ######### ######################################################### -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no|none], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no]) case ${ac_SHM} in @@ -586,7 +588,7 @@ case ${ac_SHM} in AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) ;; - shmnone | no) + shmnone | no | none) AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) ;; diff --git a/examples/Example_Laplacian.cc b/examples/Example_Laplacian.cc index 402f7235..587bc66b 100644 --- a/examples/Example_Laplacian.cc +++ b/examples/Example_Laplacian.cc @@ -93,14 +93,14 @@ template class FreeLaplacianStencil : public SparseMatrixBase StencilImpl; + typedef CartesianStencil StencilImpl; GridBase *grid; StencilImpl Stencil; SimpleCompressor Compressor; FreeLaplacianStencil(GridBase *_grid) - : Stencil (_grid,6,Even,directions,displacements,0), grid(_grid) + : Stencil (_grid,6,Even,directions,displacements,SimpleStencilParams()), grid(_grid) { }; virtual GridBase *Grid(void) { return grid; }; @@ -168,7 +168,8 @@ public: typedef iImplDoubledGaugeField SiteDoubledGaugeField; typedef Lattice DoubledGaugeField; - typedef CartesianStencil StencilImpl; + typedef CartesianStencil StencilImpl; + SimpleStencilParams p; GridBase *grid; StencilImpl Stencil; @@ -177,7 +178,7 @@ public: CovariantLaplacianStencil(GaugeField &Umu) : grid(Umu.Grid()), - Stencil (grid,6,Even,directions,displacements,0), + Stencil (grid,6,Even,directions,displacements,p), Uds(grid) { for (int mu = 0; mu < Nd; mu++) { diff --git a/systems/Crusher/dwf4.slurm b/systems/Crusher/dwf4.slurm index 6bb953c4..8e2a6e4c 100644 --- a/systems/Crusher/dwf4.slurm +++ b/systems/Crusher/dwf4.slurm @@ -7,21 +7,19 @@ #SBATCH -o DWF.%J #SBATCH -e DWF.%J #SBATCH -N 1 -#SBATCH -n 4 -#SBATCH --exclusive +#SBATCH -n 2 +#SBATCH --gpu-bind=map_gpu:0,1 DIR=. -module list +source setup.sh + +export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM -export MPICH_SMP_SINGLE_COPY_MODE=NONE -#export MPICH_SMP_SINGLE_COPY_MODE=CMA -export OMP_NUM_THREADS=4 +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -PARAMS=" --accelerator-threads 8 --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0" -srun --gpus-per-task 1 -n4 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS +srun --gpus-per-task 1 -N1 -n2 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.16.32.64 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 diff --git a/systems/Crusher/dwf8.slurm b/systems/Crusher/dwf8.slurm index 2113dcd1..4bc1917a 100644 --- a/systems/Crusher/dwf8.slurm +++ b/systems/Crusher/dwf8.slurm @@ -6,10 +6,10 @@ #SBATCH -J DWF #SBATCH -o DWF.%J #SBATCH -e DWF.%J -#SBATCH -N 8 -#SBATCH -n 64 -#SBATCH --exclusive -#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#SBATCH -N 1 +#SBATCH -n 8 +##SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 +#SBATCH --gpu-bind=map_gpu:0,1,2,3,6,7,4,5 DIR=. source setup.sh @@ -17,25 +17,12 @@ source setup.sh export MPICH_OFI_NIC_POLICY=GPU export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPICH_GPU_SUPPORT_ENABLED=1 -export MPICH_SMP_SINGLE_COPY_MODE=XPMEM +#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM #export MPICH_SMP_SINGLE_COPY_MODE=CMA #export MPICH_SMP_SINGLE_COPY_MODE=NONE -export OMP_NUM_THREADS=1 +export OMP_NUM_THREADS=16 echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -for vol in 64.64.64.256 64.64.64.128 32.32.32.256 32.32.32.128 -do -PARAMS=" --accelerator-threads 8 --grid $vol --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.${vol}.8node.shm-mpi1 -done - -PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node - -PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0" -echo $PARAMS -srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0 +srun --gpus-per-task 1 -N1 -n8 ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.1 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8 diff --git a/systems/Crusher/sourceme.sh b/systems/Crusher/sourceme.sh index 051014dc..ad0d6582 100644 --- a/systems/Crusher/sourceme.sh +++ b/systems/Crusher/sourceme.sh @@ -1,6 +1,6 @@ module load PrgEnv-gnu module load rocm/5.1.0 -module load cray-mpich/8.1.15 +module load cray-mpich/8.1.16 module load gmp #module load cray-fftw module load craype-accel-amd-gfx90a diff --git a/systems/Perlmutter/config-command b/systems/Perlmutter/config-command index b399c535..4f7ecee3 100644 --- a/systems/Perlmutter/config-command +++ b/systems/Perlmutter/config-command @@ -1,9 +1,14 @@ +DIR=`pwd` +PREFIX=$DIR/../Prequisites/install/ ../../configure \ --enable-comms=mpi \ --enable-simd=GPU \ --enable-shm=nvlink \ --enable-gen-simd-width=64 \ --enable-accelerator=cuda \ + --enable-setdevice \ + --disable-accelerator-cshift \ + --with-gmp=$PREFIX \ --disable-fermion-reps \ --disable-unified \ --disable-gparity \ diff --git a/systems/Perlmutter/dwf4.slurm b/systems/Perlmutter/dwf4.slurm index ba198595..426573d9 100644 --- a/systems/Perlmutter/dwf4.slurm +++ b/systems/Perlmutter/dwf4.slurm @@ -1,24 +1,27 @@ #!/bin/bash -#SBATCH -A mp13 +#SBATCH -A m3886_g #SBATCH -C gpu -#SBATCH -q regular +#SBATCH -q debug #SBATCH -t 0:20:00 -#SBATCH -n 16 -#SBATCH --ntasks-per-node=4 #SBATCH -c 32 -#SBATCH --exclusive +#SBATCH -N 1 +#SBATCH -n 4 +#SBATCH --ntasks-per-node=4 #SBATCH --gpus-per-task=1 -#SBATCH --gpu-bind=map_gpu:0,1,2,3 +#SBATCH --exclusive +#SBATCH --gpu-bind=none export SLURM_CPU_BIND="cores" -export MPICH_RDMA_ENABLED_CUDA=1 export MPICH_GPU_SUPPORT_ENABLED=1 -srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node +export MPICH_RDMA_ENABLED_CUDA=1 +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0 +export MPICH_GPU_NO_ASYNC_MEMCPY=0 +#export MPICH_SMP_SINGLE_COPY_MODE=CMA -OPT="--comms-overlap --comms-concurrent --shm-mpi 0" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0 +OPT="--comms-sequential --shm-mpi 1" +VOL=64.64.64.64 +srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT +#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT -OPT="--comms-overlap --comms-concurrent --shm-mpi 1" -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1 -srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1 diff --git a/systems/Perlmutter/sourceme.sh b/systems/Perlmutter/sourceme.sh index 9359dea9..6d09b1c9 100644 --- a/systems/Perlmutter/sourceme.sh +++ b/systems/Perlmutter/sourceme.sh @@ -1,4 +1,4 @@ export CRAY_ACCEL_TARGET=nvidia80 -module load PrgEnv-gnu cpe-cuda cuda +module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4 diff --git a/systems/Summit/config-command b/systems/Summit/config-command index b565addc..4caf652e 100644 --- a/systems/Summit/config-command +++ b/systems/Summit/config-command @@ -2,11 +2,12 @@ --enable-simd=GPU \ --enable-gen-simd-width=32 \ --enable-unified=no \ - --enable-shm=nvlink \ + --enable-shm=no \ --disable-gparity \ - --enable-setdevice \ + --disable-setdevice \ --disable-fermion-reps \ --enable-accelerator=cuda \ + --enable-accelerator-cshift \ --prefix /ccs/home/paboyle/prefix \ CXX=nvcc \ LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \ diff --git a/systems/Summit/dwf16.lsf b/systems/Summit/dwf16.lsf index ef8c21a5..3242fc86 100644 --- a/systems/Summit/dwf16.lsf +++ b/systems/Summit/dwf16.lsf @@ -1,25 +1,39 @@ #!/bin/bash #BSUB -P LGT104 -#BSUB -W 2:00 +#BSUB -W 0:20 #BSUB -nnodes 16 #BSUB -J DWF + export OMP_NUM_THREADS=6 export PAMI_IBV_ADAPTER_AFFINITY=1 export PAMI_ENABLE_STRIPING=1 -export OPT="--comms-concurrent --comms-overlap " -APP="./benchmarks/Benchmark_comms_host_device --mpi 4.4.4.3 " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log +DIR=. +source sourceme.sh -APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log +echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE -APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " -jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log +VOLS=( 32.32.32.16 32.32.32.64 64.32.32.64 64.32.64.64 64.64.64.64 64.64.64.128 64.64.64.256 64.64.64.512 128.64.64.64.512) +MPI=( 1.1.1.1 1.1.1.4 2.1.1.4 2.1.2.4 2.2.2.4 2.2.2.8 2.2.2.16 2.2.2.32 4.4.2.32 ) +RANKS=( 1 4 8 16 32 64 128 256 1024) +NODES=( 1 1 2 4 8 16 32 64 128) +INTS=( 0 1 2 3 4 5 6 7 8) +for i in 5 +do + vol=${VOLS[$i]} + nodes=${NODES[$i]} + mpi=${MPI[$i]} + ranks=${RANKS[$i]} + JSRUN="jsrun --nrs $nodes -a4 -g4 -c42 -dpacked -b packed:10 --latency_priority gpu-cpu --smpiargs=-gpu" + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-sequential --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.seq.ker + PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-overlap --shm 2048 --shm-mpi 0" + $JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.over.ker +done diff --git a/tests/IO/Test_nersc_io.cc b/tests/IO/Test_nersc_io.cc index c15c320e..ae5b9c0d 100644 --- a/tests/IO/Test_nersc_io.cc +++ b/tests/IO/Test_nersc_io.cc @@ -147,7 +147,7 @@ int main (int argc, char ** argv) Complex p = TensorRemove(Tp); std::cout< mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); - mCG(src_o,result_o); - + double t1,t2,flops; + int iters; + for(int i=0;i<100;i++){ + result_o = Zero(); + t1=usecond(); + mCG(src_o,result_o); + t2=usecond(); + iters = mCG.TotalInnerIterations; //Number of inner CG iterations + flops = 1320.0*2*FGrid->gSites()*iters; + std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< CG(1.0e-8,10000); - CG(HermOpEO,src_o,result_o_2); - - MemoryManager::Print(); + for(int i=0;i<100;i++){ + result_o_2 = Zero(); + t1=usecond(); + CG(HermOpEO,src_o,result_o_2); + t2=usecond(); + iters = CG.IterationsToComplete; + flops = 1320.0*2*FGrid->gSites()*iters; + std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.< using namespace std; using namespace Grid; - ; int main(int argc, char ** argv) { Grid_init(&argc, &argv); @@ -80,7 +79,8 @@ int main(int argc, char ** argv) { Foo=lex; } - typedef CartesianStencil Stencil; + typedef CartesianStencil Stencil; + SimpleStencilParams p; for(int dir=0;dir<4;dir++){ for(int disp=0;disp directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil myStencil(&Fine,npoint,0,directions,displacements,0); + Stencil myStencil(&Fine,npoint,0,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o directions(npoint,dir); std::vector displacements(npoint,disp); - Stencil EStencil(&rbFine,npoint,Even,directions,displacements,0); - Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,0); + Stencil EStencil(&rbFine,npoint,Even,directions,displacements,p); + Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,p); Coordinate ocoor(4); for(int o=0;o WImpl; - typedef WilsonCloverFermion WilsonCloverOperator; - typedef CompactWilsonCloverFermion CompactWilsonCloverOperator; + typedef WilsonCloverFermion> WilsonCloverOperator; + typedef CompactWilsonCloverFermion> CompactWilsonCloverOperator; typedef typename WilsonCloverOperator::FermionField Fermion; typedef typename WilsonCloverOperator::GaugeField Gauge; diff --git a/tests/core/Test_lie_generators.cc b/tests/core/Test_lie_generators.cc index e044378c..8f18ed04 100644 --- a/tests/core/Test_lie_generators.cc +++ b/tests/core/Test_lie_generators.cc @@ -9,6 +9,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: Guido Cossu +Author: Jamie Hudspith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,14 +43,14 @@ directory using namespace std; using namespace Grid; - ; +; int main(int argc, char** argv) { Grid_init(&argc, &argv); std::vector latt({4, 4, 4, 8}); GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid( - latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); + latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); @@ -60,15 +61,19 @@ int main(int argc, char** argv) { << std::endl; SU2::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; + + // guard as this code fails to compile for Nc != 3 +#if (Nc == 3) + SU2Adjoint::printGenerators(); SU2::testGenerators(); SU2Adjoint::testGenerators(); - + std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; SU3::printGenerators(); std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; SU3Adjoint::printGenerators(); @@ -111,12 +116,10 @@ int main(int argc, char** argv) { // AdjointRepresentation has the predefined number of colours Nc // Representations RepresentationTypes(grid); - - LatticeGaugeField U(grid), V(grid); SU3::HotConfiguration(gridRNG, U); SU3::HotConfiguration(gridRNG, V); - + // Adjoint representation // Test group structure // (U_f * V_f)_r = U_r * V_r @@ -127,17 +130,17 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu = peekLorentz(V,mu); pokeLorentz(UV,Umu*Vmu, mu); } - + AdjRep.update_representation(UV); typename AdjointRep::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r - - + + AdjRep.update_representation(U); typename AdjointRep::LatticeField Ur = AdjRep.U; // U_r - + AdjRep.update_representation(V); typename AdjointRep::LatticeField Vr = AdjRep.U; // V_r - + typename AdjointRep::LatticeField UrVr(grid); UrVr = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -145,10 +148,10 @@ int main(int argc, char** argv) { typename AdjointRep::LatticeMatrix Vrmu = peekLorentz(Vr,mu); pokeLorentz(UrVr,Urmu*Vrmu, mu); } - + typename AdjointRep::LatticeField Diff_check = UVr - UrVr; std::cout << GridLogMessage << "Group structure SU("<::AdjointLieAlgebraMatrix(h_adj,Ar); - + // Re-extract h_adj SU3::LatticeAlgebraVector h_adj2(grid); SU_Adjoint::projectOnAlgebra(h_adj2, Ar); SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2; std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl; - + // Exponentiate typename AdjointRep::LatticeMatrix Uadj(grid); Uadj = expMat(Ar, 1.0, 16); - + typename AdjointRep::LatticeMatrix uno(grid); uno = 1.0; // Check matrix Uadj, must be real orthogonal typename AdjointRep::LatticeMatrix Ucheck = Uadj - conjugate(Uadj); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) - << std::endl; - + << std::endl; + Ucheck = Uadj * adj(Uadj) - uno; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck) - << std::endl; + << std::endl; Ucheck = adj(Uadj) * Uadj - uno; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck) - << std::endl; - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af(grid); SU3::FundamentalLieAlgebraMatrix(h_adj,Af); @@ -193,72 +195,65 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck(grid); UnitCheck = Ufund * adj(Ufund) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck) - << std::endl; + << std::endl; UnitCheck = adj(Ufund) * Ufund - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck) - << std::endl; - + << std::endl; // Tranform to the adjoint representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields - + AdjRep.update_representation(U); Ur = AdjRep.U; // U_r typename AdjointRep::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj - + typename AdjointRep::LatticeMatrix Diff_check_mat = Ur0 - Uadj; std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl; - - - // TwoIndexRep tests - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::printGenerators(); - std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; + std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; SU2TwoIndexSymm::testGenerators(); std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printBase(); - std::cout << GridLogMessage << "*********************************************" - << std::endl; - std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; + std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::printGenerators(); std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; SU2TwoIndexAntiSymm::testGenerators(); - - std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); random(gridRNG,Gauss2); @@ -276,13 +271,13 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2 = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" - << std::endl; + << std::endl; // Projectors SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); random(gridRNG,Gauss2a); @@ -300,11 +295,11 @@ int main(int argc, char** argv) { SU3::LatticeAlgebraVector diff2a = ha - hb; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "*********************************************" - << std::endl; + << std::endl; std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure" - << std::endl; + << std::endl; // Testing HMC representation classes TwoIndexRep< Nc, Symmetric > TIndexRep(grid); @@ -313,7 +308,7 @@ int main(int argc, char** argv) { LatticeGaugeField U2(grid), V2(grid); SU3::HotConfiguration(gridRNG, U2); SU3::HotConfiguration(gridRNG, V2); - + LatticeGaugeField UV2(grid); UV2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -321,16 +316,16 @@ int main(int argc, char** argv) { SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu); pokeLorentz(UV2,Umu2*Vmu2, mu); } - + TIndexRep.update_representation(UV2); typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r - + TIndexRep.update_representation(U2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r - + TIndexRep.update_representation(V2); typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid); Ur2Vr2 = Zero(); for (int mu = 0; mu < Nd; mu++) { @@ -338,11 +333,11 @@ int main(int argc, char** argv) { typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu); pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu); } - + typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2; std::cout << GridLogMessage << "Group structure SU("<::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym); - + // Re-extract h_sym SU3::LatticeAlgebraVector h_sym2(grid); SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym); SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; - - + // Exponentiate typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid); U2iS = expMat(Ar_sym, 1.0, 16); - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid); uno2iS = 1.0; // Check matrix U2iS, must be real orthogonal typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS); std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS) - << std::endl; - + << std::endl; + Ucheck2iS = U2iS * adj(U2iS) - uno2iS; std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS) - << std::endl; + << std::endl; Ucheck2iS = adj(U2iS) * U2iS - uno2iS; std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS) - << std::endl; - - - + << std::endl; + // Construct the fundamental matrix in the group SU3::LatticeMatrix Af_sym(grid); SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); @@ -386,147 +378,137 @@ int main(int argc, char** argv) { SU3::LatticeMatrix UnitCheck2(grid); UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f; std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2) - << std::endl; + << std::endl; UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f; std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2) - << std::endl; - - + << std::endl; + + // Tranform to the 2Index Sym representation U = Zero(); // fill this with only one direction pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields - + TIndexRep.update_representation(U); Ur2 = TIndexRep.U; // U_r typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS - + typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS; std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl; - - - - + if (TwoIndexRep::Dimension != 1){ - std::cout << GridLogMessage << "*********************************************" - << std::endl; + std::cout << GridLogMessage << "*********************************************" + << std::endl; - std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" - << std::endl; - // Testing HMC representation classes - TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); + std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" + << std::endl; + // Testing HMC representation classes + TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); - // Test group structure - // (U_f * V_f)_r = U_r * V_r - LatticeGaugeField U2A(grid), V2A(grid); - SU3::HotConfiguration(gridRNG, U2A); - SU3::HotConfiguration(gridRNG, V2A); + // Test group structure + // (U_f * V_f)_r = U_r * V_r + LatticeGaugeField U2A(grid), V2A(grid); + SU3::HotConfiguration(gridRNG, U2A); + SU3::HotConfiguration(gridRNG, V2A); - LatticeGaugeField UV2A(grid); - UV2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); - SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); - pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + LatticeGaugeField UV2A(grid); + UV2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); + SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); + pokeLorentz(UV2A,Umu2A*Vmu2A, mu); + } + + TIndexRep.update_representation(UV2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r + + TIndexRep.update_representation(U2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r + + TIndexRep.update_representation(V2A); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); + Ur2Vr2A = Zero(); + for (int mu = 0; mu < Nd; mu++) { + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); + pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); + } + + typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; + std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); + random(gridRNG,h_Asym); + h_Asym = real(h_Asym); + SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); + + // Re-extract h_sym + SU3::LatticeAlgebraVector h_Asym2(grid); + SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); + SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; + std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; + + + // Exponentiate + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); + U2iAS = expMat(Ar_Asym, 1.0, 16); + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); + uno2iAS = 1.0; + // Check matrix U2iS, must be real orthogonal + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); + std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) + << std::endl; + + Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) + << std::endl; + Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; + std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) + << std::endl; + + + + // Construct the fundamental matrix in the group + SU3::LatticeMatrix Af_Asym(grid); + SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); + SU3::LatticeMatrix Ufund2A(grid); + Ufund2A = expMat(Af_Asym, 1.0, 16); + SU3::LatticeMatrix UnitCheck2A(grid); + UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; + std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) + << std::endl; + UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; + std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) + << std::endl; + + + // Tranform to the 2Index Sym representation + U = Zero(); // fill this with only one direction + pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields + + TIndexRepA.update_representation(U); + Ur2A = TIndexRepA.U; // U_r + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS + + typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; + std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; + + } else { + std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " + "because representation is trivial (dim = 1)" + << std::endl; } - - TIndexRep.update_representation(UV2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r - - TIndexRep.update_representation(U2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r - - TIndexRep.update_representation(V2A); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid); - Ur2Vr2A = Zero(); - for (int mu = 0; mu < Nd; mu++) { - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu); - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu); - pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu); - } - - typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A; - std::cout << GridLogMessage << "Group structure SU("<::LatticeMatrix Ar_Asym(grid); - random(gridRNG,h_Asym); - h_Asym = real(h_Asym); - SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym); - - // Re-extract h_sym - SU3::LatticeAlgebraVector h_Asym2(grid); - SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym); - SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2; - std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl; - - - // Exponentiate - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid); - U2iAS = expMat(Ar_Asym, 1.0, 16); - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid); - uno2iAS = 1.0; - // Check matrix U2iS, must be real orthogonal - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS); - std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS) - << std::endl; - - Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS) - << std::endl; - Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS; - std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS) - << std::endl; - - - - // Construct the fundamental matrix in the group - SU3::LatticeMatrix Af_Asym(grid); - SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym); - SU3::LatticeMatrix Ufund2A(grid); - Ufund2A = expMat(Af_Asym, 1.0, 16); - SU3::LatticeMatrix UnitCheck2A(grid); - UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f; - std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A) - << std::endl; - UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f; - std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A) - << std::endl; - - - // Tranform to the 2Index Sym representation - U = Zero(); // fill this with only one direction - pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields - - TIndexRepA.update_representation(U); - Ur2A = TIndexRepA.U; // U_r - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS - - typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; - std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; - -} else { - std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " - "because representation is trivial (dim = 1)" - << std::endl; -} - - - - - - - - +#endif Grid_finalize(); } diff --git a/tests/core/Test_reunitarise.cc b/tests/core/Test_reunitarise.cc index 6644be1a..10423159 100644 --- a/tests/core/Test_reunitarise.cc +++ b/tests/core/Test_reunitarise.cc @@ -122,14 +122,15 @@ int main (int argc, char ** argv) std::cout << "Determinant defect before projection " < + Fabian Joswig This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -67,8 +68,6 @@ int main(int argc, char **argv) tmp = Zero(); FermionField err(&Grid); err = Zero(); - FermionField err2(&Grid); - err2 = Zero(); FermionField phi(&Grid); random(pRNG, phi); FermionField chi(&Grid); @@ -77,6 +76,8 @@ int main(int argc, char **argv) SU::HotConfiguration(pRNG, Umu); std::vector U(4, &Grid); + double tolerance = 1e-4; + double volume = 1; for (int mu = 0; mu < Nd; mu++) { @@ -88,7 +89,7 @@ int main(int argc, char **argv) RealD csw_t = 1.0; WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - //Dwc.ImportGauge(Umu); // not necessary, included in the constructor + CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; @@ -112,7 +113,24 @@ int main(int argc, char **argv) setCheckerboard(r_eo, r_e); err = ref - r_eo; - std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; @@ -152,6 +170,22 @@ int main(int argc, char **argv) std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl; @@ -169,7 +203,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; @@ -188,7 +236,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; @@ -207,7 +269,21 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = phi - chi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; @@ -249,7 +325,7 @@ int main(int argc, char **argv) ///////////////// WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); - Dwc_prime.ImportGauge(U_prime); + CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); tmp = Omega * src; pickCheckerboard(Even, src_e, tmp); @@ -262,7 +338,37 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = chi - adj(Omega) * phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "=================================================================" << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; @@ -272,7 +378,6 @@ int main(int argc, char **argv) phi = Zero(); WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); - Dw.ImportGauge(Umu); Dw.M(src, result); Dwc.M(src, chi); @@ -280,13 +385,24 @@ int main(int argc, char **argv) Dwc_prime.M(Omega * src, phi); WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); - Dw_prime.ImportGauge(U_prime); Dw_prime.M(Omega * src, result2); + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); err = chi - adj(Omega) * phi; - err2 = result - adj(Omega) * result2; - std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; - std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err2) << std::endl; + std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; @@ -296,7 +412,6 @@ int main(int argc, char **argv) phi = Zero(); err = Zero(); WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 - Dwc_csw0.ImportGauge(Umu); pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Odd, phi_o, phi); @@ -316,7 +431,34 @@ int main(int argc, char **argv) setCheckerboard(src, src_o); err = chi - phi; - std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; @@ -348,9 +490,41 @@ int main(int argc, char **argv) setCheckerboard(phi, phi_o); err = ref - phi; - std::cout << GridLogMessage << "ref (unpreconditioned operator) diff :" << norm2(ref) << std::endl; - std::cout << GridLogMessage << "phi (EO decomposition) diff :" << norm2(phi) << std::endl; - std::cout << GridLogMessage << "norm diff :" << norm2(err) << std::endl; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); Grid_finalize(); } diff --git a/tests/core/Test_wilson_conserved_current.cc b/tests/core/Test_wilson_conserved_current.cc new file mode 100644 index 00000000..3ee1a271 --- /dev/null +++ b/tests/core/Test_wilson_conserved_current.cc @@ -0,0 +1,253 @@ +/************************************************************************************* +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_cayley_cg.cc + +Copyright (C) 2022 + +Author: Peter Boyle +Author: Fabian Joswig + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + + +template +void TestConserved(What & Dw, + LatticeGaugeField &Umu, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + GridParallelRNG *RNG4); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout< seeds5({5,6,7,8}); + GridParallelRNG RNG4(UGrid); + std::vector seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + if( argc > 1 && argv[1][0] != '-' ) + { + std::cout<::HotConfiguration(RNG4,Umu); + } + + typename WilsonCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + + std::cout<(Dw,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dwc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dwcc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewc,Umu,UGrid,UrbGrid,&RNG4); + + std::cout<(Dewcc,Umu,UGrid,UrbGrid,&RNG4); + + Grid_finalize(); +} + + + +template +void TestConserved(Action & Dw, + LatticeGaugeField &Umu, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + GridParallelRNG *RNG4) +{ + LatticePropagator phys_src(UGrid); + LatticePropagator seqsrc(UGrid); + LatticePropagator prop4(UGrid); + LatticePropagator Vector_mu(UGrid); + LatticeComplex SV (UGrid); + LatticeComplex VV (UGrid); + LatticePropagator seqprop(UGrid); + + SpinColourMatrix kronecker; kronecker=1.0; + Coordinate coor({0,0,0,0}); + phys_src=Zero(); + pokeSite(kronecker,phys_src,coor); + + ConjugateGradient CG(1.0e-16,100000); + SchurRedBlackDiagTwoSolve schur(CG); + ZeroGuesser zpg; + for(int s=0;s(src4,phys_src,s,c); + + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); + std::cout<(prop4,result4,s,c); + } + } + + auto curr = Current::Vector; + const int mu_J=0; + const int t_J=0; + + LatticeComplex ph (UGrid); ph=1.0; + + Dw.SeqConservedCurrent(prop4, + seqsrc, + phys_src, + curr, + mu_J, + t_J, + t_J,// whole lattice + ph); + + for(int s=0;s(src4,seqsrc,s,c); + + LatticeFermion result4(UGrid); result4=Zero(); + schur(Dw,src4,result4,zpg); + + FermToProp(seqprop,result4,s,c); + } + } + + Gamma g5(Gamma::Algebra::Gamma5); + Gamma gT(Gamma::Algebra::GammaT); + + std::vector sumSV; + std::vector sumVV; + + Dw.ContractConservedCurrent(prop4,prop4,Vector_mu,phys_src,Current::Vector,Tdir); + + SV = trace(Vector_mu); // Scalar-Vector conserved current + VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current + + // Spatial sum + sliceSum(SV,sumSV,Tdir); + sliceSum(VV,sumVV,Tdir); + + const int Nt{static_cast(sumSV.size())}; + + std::cout< check_buf; + + test_S = trace(qSite*g); + test_V = trace(qSite*g*Gamma::gmu[mu_J]); + + Dw.ContractConservedCurrent(prop4,prop4,cur,phys_src,curr,mu_J); + + c = trace(cur*g); + sliceSum(c, check_buf, Tp); + check_S = TensorRemove(check_buf[t_J]); + + auto gmu=Gamma::gmu[mu_J]; + c = trace(cur*g*gmu); + sliceSum(c, check_buf, Tp); + check_V = TensorRemove(check_buf[t_J]); + + + std::cout< + +using namespace std; +using namespace Grid; + +int main(int argc, char **argv) +{ + Grid_init(&argc, &argv); + + auto latt_size = GridDefaultLatt(); + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size, simd_layout, mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALF" << sizeof(RealF) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REALD" << sizeof(RealD) << std::endl; + std::cout << GridLogMessage << "Grid floating point word size is REAL" << sizeof(Real) << std::endl; + + std::vector seeds({1, 2, 3, 4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename WilsonExpCloverFermionR::FermionField FermionField; + typename WilsonExpCloverFermionR::ImplParams params; + WilsonAnisotropyCoefficients anis; + + FermionField src(&Grid); + random(pRNG, src); + FermionField result(&Grid); + result = Zero(); + FermionField result2(&Grid); + result2 = Zero(); + FermionField ref(&Grid); + ref = Zero(); + FermionField tmp(&Grid); + tmp = Zero(); + FermionField err(&Grid); + err = Zero(); + FermionField phi(&Grid); + random(pRNG, phi); + FermionField chi(&Grid); + random(pRNG, chi); + LatticeGaugeField Umu(&Grid); + SU::HotConfiguration(pRNG, Umu); + std::vector U(4, &Grid); + + double tolerance = 1e-4; + + double volume = 1; + for (int mu = 0; mu < Nd; mu++) + { + volume = volume * latt_size[mu]; + } + + RealD mass = 0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + + WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + FermionField src_e(&RBGrid); + FermionField src_o(&RBGrid); + FermionField r_e(&RBGrid); + FermionField r_o(&RBGrid); + FermionField r_eo(&Grid); + pickCheckerboard(Even, src_e, src); + pickCheckerboard(Odd, src_o, src); + + Dwc.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + + Dwc_compact.Meooe(src_e, r_o); + std::cout << GridLogMessage << "Applied Meo" << std::endl; + Dwc_compact.Meooe(src_o, r_e); + std::cout << GridLogMessage << "Applied Moe" << std::endl; + Dwc_compact.Dhop(src, ref, DaggerNo); + + setCheckerboard(r_eo, r_o); + setCheckerboard(r_eo, r_e); + + err = ref - r_eo; + std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl; + assert(fabs(norm2(err)) < tolerance); + + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; + std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + FermionField chi_e(&RBGrid); + FermionField chi_o(&RBGrid); + + FermionField dchi_e(&RBGrid); + FermionField dchi_o(&RBGrid); + + FermionField phi_e(&RBGrid); + FermionField phi_o(&RBGrid); + + FermionField dphi_e(&RBGrid); + FermionField dphi_o(&RBGrid); + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Meooe(chi_e, dchi_o); + Dwc.Meooe(chi_o, dchi_e); + Dwc.MeooeDag(phi_e, dphi_o); + Dwc.MeooeDag(phi_o, dphi_e); + + ComplexD pDce = innerProduct(phi_e, dchi_e); + ComplexD pDco = innerProduct(phi_o, dchi_o); + ComplexD cDpe = innerProduct(chi_e, dphi_e); + ComplexD cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; + + Dwc_compact.Meooe(chi_e, dchi_o); + Dwc_compact.Meooe(chi_o, dchi_e); + Dwc_compact.MeooeDag(phi_e, dphi_o); + Dwc_compact.MeooeDag(phi_o, dphi_e); + + pDce = innerProduct(phi_e, dchi_e); + pDco = innerProduct(phi_o, dchi_o); + cDpe = innerProduct(chi_e, dphi_e); + cDpo = innerProduct(chi_o, dphi_o); + + std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl; + std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl; + + std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl; + std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl; + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.Mooee(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.Mooee(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.Mooee(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.Mooee(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInvDag(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInvDag(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInvDag(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==============================================================" << std::endl; + std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; + std::cout << GridLogMessage << "==============================================================" << std::endl; + + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dwc.MooeeDag(chi_e, src_e); + Dwc.MooeeInv(src_e, phi_e); + + Dwc.MooeeDag(chi_o, src_o); + Dwc.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Dwc_compact.MooeeDag(chi_e, src_e); + Dwc_compact.MooeeInv(src_e, phi_e); + + Dwc_compact.MooeeDag(chi_o, src_o); + Dwc_compact.MooeeInv(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = phi - chi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + ////////////////////// Gauge Transformation + std::vector seeds2({5, 6, 7, 8}); + GridParallelRNG pRNG2(&Grid); + pRNG2.SeedFixedIntegers(seeds2); + LatticeColourMatrix Omega(&Grid); + LatticeColourMatrix ShiftedOmega(&Grid); + LatticeGaugeField U_prime(&Grid); + U_prime = Zero(); + LatticeColourMatrix U_prime_mu(&Grid); + U_prime_mu = Zero(); + SU::LieRandomize(pRNG2, Omega, 1.0); + for (int mu = 0; mu < Nd; mu++) + { + U[mu] = peekLorentz(Umu, mu); + ShiftedOmega = Cshift(Omega, mu, 1); + U_prime_mu = Omega * U[mu] * adj(ShiftedOmega); + pokeLorentz(U_prime, U_prime_mu, mu); + } + ///////////////// + + WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); + CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_prime.Mooee(src_e, phi_e); + Dwc_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + tmp = Zero(); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + tmp = Omega * src; + pickCheckerboard(Even, src_e, tmp); + pickCheckerboard(Odd, src_o, tmp); + + Dwc_compact_prime.Mooee(src_e, phi_e); + Dwc_compact_prime.Mooee(src_o, phi_o); + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "=================================================================" << std::endl; + std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; + std::cout << GridLogMessage << "================================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + + WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); + + Dw.M(src, result); + Dwc.M(src, chi); + + Dwc_prime.M(Omega * src, phi); + + WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); + Dw_prime.M(Omega * src, result2); + + err = result - adj(Omega) * result2; + std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff WilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + + Dwc_compact.M(src, chi); + Dwc_compact_prime.M(Omega * src, phi); + + err = chi - adj(Omega) * phi; + std::cout << GridLogMessage << "norm diff CompactWilsonExpClover " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_csw0.Mooee(src_e, phi_e); + Dwc_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0 + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + Dw.Mooee(src_e, chi_e); + Dw.Mooee(src_o, chi_o); + Dwc_compact_csw0.Mooee(src_e, phi_e); + Dwc_compact_csw0.Mooee(src_o, phi_o); + + setCheckerboard(chi, chi_e); + setCheckerboard(chi, chi_o); + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + setCheckerboard(src, src_e); + setCheckerboard(src, src_o); + + err = chi - phi; + std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + std::cout << GridLogMessage << "==========================================================" << std::endl; + std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; + std::cout << GridLogMessage << "==========================================================" << std::endl; + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc.Mooee(src_e, chi_e); + Dwc.Mooee(src_o, chi_o); + Dwc.Meooe(src_o, phi_e); + Dwc.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + chi = Zero(); + phi = Zero(); + err = Zero(); + + pickCheckerboard(Even, phi_e, phi); + pickCheckerboard(Odd, phi_o, phi); + pickCheckerboard(Even, chi_e, chi); + pickCheckerboard(Odd, chi_o, chi); + + // M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o) + + Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator + + // EO matrix + Dwc_compact.Mooee(src_e, chi_e); + Dwc_compact.Mooee(src_o, chi_o); + Dwc_compact.Meooe(src_o, phi_e); + Dwc_compact.Meooe(src_e, phi_o); + + phi_o += chi_o; + phi_e += chi_e; + + setCheckerboard(phi, phi_e); + setCheckerboard(phi, phi_o); + + err = ref - phi; + std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl; + std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl; + std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl; + assert(fabs(norm2(err)) < tolerance); + + Grid_finalize(); +} diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc index 0c398306..726ecd4a 100644 --- a/tests/hmc/Test_hmc_ScalarActionNxN.cc +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -132,8 +132,8 @@ int main(int argc, char **argv) { // Checkpointer definition CheckpointerParameters CPparams(Reader); - //TheHMC.Resources.LoadBinaryCheckpointer(CPparams); - TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); + TheHMC.Resources.LoadBinaryCheckpointer(CPparams); + //TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); this breaks for compilation without lime RNGModuleParameters RNGpar(Reader); TheHMC.Resources.SetRNGSeeds(RNGpar); diff --git a/tests/hmc/Test_hmc_WG_Production.cc b/tests/hmc/Test_hmc_WG_Production.cc index b16c073b..8cd74791 100644 --- a/tests/hmc/Test_hmc_WG_Production.cc +++ b/tests/hmc/Test_hmc_WG_Production.cc @@ -74,10 +74,10 @@ int main(int argc, char **argv) { // Checkpointer definition CheckpointerParameters CPparams(Reader); - //TheHMC.Resources.LoadNerscCheckpointer(CPparams); + TheHMC.Resources.LoadNerscCheckpointer(CPparams); - // Store metadata in the Scidac checkpointer - TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar); + // Store metadata in the Scidac checkpointer - obviously breaks without LIME + //TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar); RNGModuleParameters RNGpar(Reader); TheHMC.Resources.SetRNGSeeds(RNGpar); diff --git a/tests/lanczos/Test_compressed_lanczos.cc b/tests/lanczos/Test_compressed_lanczos.cc index 3fa3c490..d7d0d52d 100644 --- a/tests/lanczos/Test_compressed_lanczos.cc +++ b/tests/lanczos/Test_compressed_lanczos.cc @@ -37,6 +37,8 @@ Author: Peter Boyle using namespace std; using namespace Grid; +#ifdef HAVE_LIME + template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos { @@ -249,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif // HAVE_LIME_H diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc index ecd0e629..669a7b6d 100644 --- a/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg.cc @@ -36,7 +36,8 @@ Author: Peter Boyle using namespace std; using namespace Grid; - ; + +#ifdef HAVE_LIME template class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos @@ -250,3 +251,11 @@ int main (int argc, char ** argv) { Grid_finalize(); } +#else + +int main( void ) +{ + return 0 ; +} + +#endif diff --git a/tests/solver/Test_coarse_even_odd.cc b/tests/solver/Test_coarse_even_odd.cc index dfbab747..c7127121 100644 --- a/tests/solver/Test_coarse_even_odd.cc +++ b/tests/solver/Test_coarse_even_odd.cc @@ -93,8 +93,16 @@ int main(int argc, char** argv) { // Setup of Dirac Matrix and Operator // ///////////////////////////////////////////////////////////////////////////// - LatticeGaugeField Umu(Grid_f); SU3::HotConfiguration(pRNG_f, Umu); - + LatticeGaugeField Umu(Grid_f); +#if (Nc==2) + SU2::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==3) + SU3::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==4) + SU4::HotConfiguration(pRNG_f, Umu); +#elif (defined Nc==5) + SU5::HotConfiguration(pRNG_f, Umu); +#endif RealD checkTolerance = (getPrecision::value == 1) ? 1e-7 : 1e-15; RealD mass = -0.30; diff --git a/tests/solver/Test_dwf_mrhs_cg.cc b/tests/solver/Test_dwf_mrhs_cg.cc index b912ba4f..4242f64b 100644 --- a/tests/solver/Test_dwf_mrhs_cg.cc +++ b/tests/solver/Test_dwf_mrhs_cg.cc @@ -34,6 +34,7 @@ using namespace Grid; int main (int argc, char ** argv) { +#ifdef HAVE_LIME typedef typename DomainWallFermionR::FermionField FermionField; typedef typename DomainWallFermionR::ComplexField ComplexField; typename DomainWallFermionR::ImplParams params; @@ -237,4 +238,5 @@ int main (int argc, char ** argv) } Grid_finalize(); +#endif // HAVE_LIME } diff --git a/tests/solver/Test_wilsonclover_cg_prec.cc b/tests/solver/Test_wilsonclover_cg_prec.cc index 5ef2dc09..abf64a1f 100644 --- a/tests/solver/Test_wilsonclover_cg_prec.cc +++ b/tests/solver/Test_wilsonclover_cg_prec.cc @@ -71,7 +71,12 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + // HermitianOperator HermOp(Dw); // ConjugateGradient CG(1.0e-8,10000); @@ -80,12 +85,28 @@ int main (int argc, char ** argv) LatticeFermion src_o(&RBGrid); LatticeFermion result_o(&RBGrid); pickCheckerboard(Odd,src_o,src); - result_o=Zero(); - SchurDiagMooeeOperator HermOpEO(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO(Dw); + result_o=Zero(); CG(HermOpEO,src_o,result_o); - + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_compact(Dw_compact); + result_o=Zero(); + CG(HermOpEO_compact,src_o,result_o); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp(Dwe); + result_o=Zero(); + CG(HermOpEO_exp,src_o,result_o); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + SchurDiagMooeeOperator HermOpEO_exp_compact(Dwe_compact); + result_o=Zero(); + CG(HermOpEO_exp_compact,src_o,result_o); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_schur.cc b/tests/solver/Test_wilsonclover_cg_schur.cc index 567a8283..50d06af7 100644 --- a/tests/solver/Test_wilsonclover_cg_schur.cc +++ b/tests/solver/Test_wilsonclover_cg_schur.cc @@ -60,18 +60,36 @@ int main (int argc, char ** argv) LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); LatticeFermion src(&Grid); random(pRNG,src); - LatticeFermion result(&Grid); result=Zero(); - LatticeFermion resid(&Grid); - - RealD mass = -0.1; - RealD csw_r = 1.0; - RealD csw_t = 1.0; - WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + LatticeFermion result(&Grid); + LatticeFermion resid(&Grid); ConjugateGradient CG(1.0e-8,10000); SchurRedBlackDiagMooeeSolve SchurSolver(CG); + RealD mass = -0.1; + RealD csw_r = 1.0; + RealD csw_t = 1.0; + RealD cF = 1.0; + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); SchurSolver(Dw,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dw_compact,src,result); + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + result=Zero(); + SchurSolver(Dwe,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + result=Zero(); + SchurSolver(Dwe_compact,src,result); Grid_finalize(); } diff --git a/tests/solver/Test_wilsonclover_cg_unprec.cc b/tests/solver/Test_wilsonclover_cg_unprec.cc index 755d80e1..2a859f11 100644 --- a/tests/solver/Test_wilsonclover_cg_unprec.cc +++ b/tests/solver/Test_wilsonclover_cg_unprec.cc @@ -59,7 +59,7 @@ int main (int argc, char ** argv) LatticeFermion src(&Grid); random(pRNG,src); RealD nrm = norm2(src); - LatticeFermion result(&Grid); result=Zero(); + LatticeFermion result(&Grid); LatticeGaugeField Umu(&Grid); SU::HotConfiguration(pRNG,Umu); double volume=1; @@ -70,11 +70,34 @@ int main (int argc, char ** argv) RealD mass = -0.1; RealD csw_r = 1.0; RealD csw_t = 1.0; + RealD cF = 1.0; WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); + WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t); + CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0); - MdagMLinearOperator HermOp(Dw); ConjugateGradient CG(1.0e-8,10000); + + std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl; + MdagMLinearOperator HermOp(Dw); + result=Zero(); CG(HermOp,src,result); + std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl; + MdagMLinearOperator HermOp_compact(Dw_compact); + result=Zero(); + CG(HermOp_compact,src,result); + + + std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp(Dwe); + result=Zero(); + CG(HermOp_exp,src,result); + + std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl; + MdagMLinearOperator HermOp_exp_compact(Dwe_compact); + result=Zero(); + CG(HermOp_exp_compact,src,result); + Grid_finalize(); }