1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-23 10:12:02 +01:00

Compare commits

..

121 Commits

Author SHA1 Message Date
8b91b61b61 First cut at faster GPU slice sum via atomics 2022-12-22 15:13:45 -05:00
4ca1bf7cca Added gauge invariance test 2022-12-21 07:23:16 -05:00
2ff868f7a5 CPU open doesn't need to free space 2022-12-20 05:10:23 -05:00
ede02b6883 Memory manager debug Felix case 2022-12-20 05:10:23 -05:00
1822ced302 Bug fix 2022-12-20 05:10:23 -05:00
37ba32776f More logging 2022-12-20 05:10:23 -05:00
99b3697b03 More loggin 2022-12-20 05:10:23 -05:00
43a45ec97b SSC_START 2022-12-20 05:10:23 -05:00
b00a4142e5 A=A fix 2022-12-20 05:10:23 -05:00
3791bc527b Logging pulled in from dirichlet branch 2022-12-20 05:10:23 -05:00
d8c29f5fcf Updated FFT test for PETSc 2022-12-18 12:05:00 -05:00
281f8101fe Matt FFT test 2022-12-17 20:35:33 -05:00
07acfe89f2 Merge pull request #417 from rrhodgson/feature/fermtoprop
Feature/fermtoprop
2022-12-06 12:45:03 -05:00
40234f531f FermToProp accelerator_for -> thread_for 2022-12-06 17:34:51 +00:00
d49694f38f PropToFerm fix 2022-12-06 15:48:54 +00:00
97a098636d FermToProp 2022-11-30 15:36:35 -05:00
e13930c8b2 Faster fermtoprop case 2022-11-30 15:11:29 -05:00
0655dab466 Open MP on host enabled 2022-11-08 13:38:54 -08:00
7f097bcc28 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-11-08 13:23:40 -08:00
5c75aa5008 Device mem 2022-11-08 13:22:57 -08:00
1873101362 PVC 2022-11-08 13:22:45 -08:00
63fd1dfa62 Config on PVC 2022-11-08 13:22:09 -08:00
bd68861b28 SYCL sum 2022-11-08 12:49:26 -08:00
82e959f66c SYCL reduction 2022-11-08 12:45:25 -08:00
62e52de06d Merge pull request #414 from fjosw/feat/eCloverGPU
Compact Exponential Cloverterm on GPU
2022-11-01 09:15:44 -04:00
184adeedb8 feat: renamed open_boundaries to fixedBoundaries 2022-10-26 12:53:46 +01:00
5fa6a8b96d docs: CompactClover debug info generalized. 2022-10-26 12:41:14 +01:00
a2a879b668 docs: CompactClover Debug Info improved. 2022-10-25 17:20:42 +01:00
9317d893b2 docs: details about inversion of CompactClover term added. 2022-10-25 17:10:06 +01:00
86075fdd45 feat: MassTerm and ExponentiateClover merged into InstantiateClover 2022-10-25 17:05:34 +01:00
b36442e263 feat: CloverHelpers::InvertClover implemented which handles the
inversion of the Clover term depending on clover type and the boundary
conditions.
2022-10-25 16:57:01 +01:00
513d797ea6 fix: signature of CompactWilsonCloverHelpers::Exponentiate fixed. 2022-10-25 16:17:22 +01:00
9e4835a3e3 feat: changed CompactWilsonExpClover exponentiation to Taylor expansion
with Horner scheme.
2022-10-25 15:19:43 +01:00
477ebf24f4 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2022-10-04 11:19:43 -07:00
0d5639f707 Run script update 2022-10-04 11:13:41 -07:00
413312f9a9 Benchmark the halo construction.
THe bye counts are out and should be doubled for SIMD directions
2022-10-04 11:12:59 -07:00
03508448f8 Remove verbose 2022-10-04 11:12:15 -07:00
e1e5c75023 Stencil gather improvements - SVM was running slow and used for a pointer array that wasn't needed to be in SVM 2022-10-04 11:11:10 -07:00
9296299b61 Better commenting 2022-10-04 11:10:34 -07:00
913fbca74a Merge pull request #410 from gkanwar/photon_and_sha_patches
Photon.h and SHA256 patches
2022-08-31 18:01:45 -04:00
60dfb49afa Remove FP16 tests when FP16 is disabled 2022-08-21 17:29:55 +02:00
554c238359 Update OpenSSL digest to use high-level methods
This avoids deprecation warnings when compiling against OpenSSL 3.0
but should still be backwards compatible. It is the recommended way
to use the digest API going forward.
2022-08-21 17:28:57 +02:00
f922adf05e Fix Photon ComplexField type 2022-08-21 16:16:18 +02:00
188d2c7a4d PVC default, ignore ATS 2022-08-02 08:38:53 -07:00
17d7177105 Files for SYCL 2022-08-02 08:33:39 -07:00
bb0a0da47a inon blocking caution due to SYCL 2022-08-02 08:09:43 -07:00
84110166e4 Fix the fence 2022-08-02 08:00:43 -07:00
d32b923b6c Fencing on a stream in SYCL is needed. Didn't know that ... gulp 2022-08-02 07:58:04 -07:00
2ab1af5754 Ensure no synchronize and not optoin dependent 2022-07-19 09:51:06 -07:00
5f8892bf03 Mistake pointed out by Camilo 2022-07-19 09:31:51 -07:00
f14e7e51e7 Grid accelerator 2022-07-12 10:56:22 -07:00
042ab1a052 Update GridStd.h 2022-06-27 13:21:39 -04:00
2df98a99bc Merge pull request #406 from giordano/patch-1
Update default value of gen-simd-width in README
2022-06-14 17:46:25 -04:00
315ea18be2 Update default value of gen-simd-width in README 2022-06-14 22:41:05 +01:00
a9c2e1df03 Merge pull request #404 from rrhodgson/feature/json_nvcc
Feature/json nvcc
2022-05-25 13:30:11 -04:00
da4daea57a Updated json to latest release 3.10.5 2022-05-24 16:16:06 +01:00
af3b065add Merge pull request #403 from fjosw/fix/cuda_11_5_warnings
Fixed nvcc 11.5+ warnings
2022-05-24 11:10:02 -04:00
e346154c5d Updated json CUDA compile guards 2022-05-24 15:48:01 +01:00
7937ac2bab fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in pugixml/pugixml.cc 2022-05-24 15:31:03 +01:00
e909aeedf0 fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT standard in Grid_Eigen_Dense.h 2022-05-24 15:29:42 +01:00
bab8aa8eb0 fix: conditional pragmas according to new NVCC_DIAG_PRAGMA_SUPPORT
standard in DisableWarnings.h
2022-05-24 15:27:40 +01:00
38b22f05be Merge pull request #402 from fjosw/fix/clover_warnings
fixed clover warnings
2022-05-24 10:05:27 -04:00
3ca0de1c40 Fix json write for vector<string> 2022-05-24 14:37:33 +01:00
c7205d2a73 Removed nvcc guards for json 2022-05-24 14:30:26 +01:00
617c5362c1 fix: fixed warning: missing return statement at end of non-void function
in CloverHelpers
2022-05-24 11:37:33 +01:00
083b58e66d Merge pull request #401 from JPRichings/LocalCoheranceDeflation
Local coherance batch deflation
2022-05-20 11:44:22 -04:00
633427a2df Merge pull request #400 from JPRichings/wilson_sweep
bench wilson sweep fix
2022-05-20 11:43:40 -04:00
2031d6910a Merge branch 'paboyle:develop' into wilson_sweep 2022-05-20 16:20:23 +01:00
79e34b3eb4 Local Coherence batch deflation 2022-05-19 14:53:17 +01:00
4f3d581ab4 Merge branch 'paboyle:develop' into LocalCoheranceDeflation 2022-05-19 14:46:17 +01:00
d16427b837 Merge pull request #399 from fjosw/fix/Nc_neq_3
fix: assert for dimensions of compact Wilson clover moved to constructor
2022-05-17 09:03:42 -04:00
4b1997e2f3 wilson sweep test 2022-05-16 15:58:33 +01:00
8939d5dc73 bugfix: eo operator called in correct location 2022-05-16 00:28:28 +01:00
b051e00de0 Additional Local Coherance Deflation operator() 2022-05-16 00:25:13 +01:00
8aa75b492f Merge branch 'develop' into fix/Nc_neq_3 2022-05-10 14:22:03 +01:00
0274f40686 Merge pull request #389 from mbruno46/mbruno-eclover
Feature/expClover
2022-05-10 09:18:19 -04:00
77aa147ce5 Merge branch 'develop' into mbruno-eclover 2022-05-10 09:16:53 -04:00
32facbd02a fix: assert for dimensions of compact Wilson clover moved to
constructor.
2022-05-10 10:53:22 +01:00
4de50ab146 Merge pull request #396 from fjosw/fix/readd_config.h
fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am
2022-05-09 08:26:48 -04:00
8b12a61097 fix: readded Config.h and Version.h to HFILEs in Grid/Makefile.am 2022-05-09 11:53:22 +01:00
79ea027c0b Merge pull request #377 from RJHudspith/develop
NERSC and ILDG for non-SU(3) configuration checkpoints
2022-05-03 08:55:48 -04:00
62339d437f Merge pull request #387 from lehner/feature/gpt
Parity mass terms for domain wall fermions to enable 4d eofa
2022-05-03 08:52:18 -04:00
698e745276 Merge pull request #390 from fjosw/feature/conserved_current_wilson
Conserved current for wilson fermions
2022-05-03 08:51:10 -04:00
9a6e2c315d Merge pull request #394 from fjosw/fix/gauge_fix_ErrorOnNoConverge
SteepestDescentGaugeFix now exits when the algorithm does not converge.
2022-05-03 08:49:26 -04:00
e61fed87db SteepestDescentGaugeFix now exits when the algorithm does not converge.
This behaviour can be altered by setting err_on_no_converge to false.
2022-04-20 15:41:55 +01:00
b8bc560b51 Test_wilson_conserved_current implemented, all 5d references removed. 2022-04-05 17:33:45 +01:00
6bc2483d57 Merge branch 'feature/eclover' into feature/conserved_current_wilson 2022-04-05 15:26:49 +01:00
82aecbf4cf Test_wilson_conserved_current added 2022-04-05 15:26:39 +01:00
ee23a76aa0 Merge pull request #2 from fjosw/feature/eclover
Feature/eclover
2022-04-05 13:30:13 +02:00
d7191e5a02 SeqConservedCurrent implemented for Wilson fermions 2022-04-05 11:48:56 +01:00
c8a824425b Error message added if another conserved current than vector is requested for
Wilson type fermions.
2022-04-05 10:58:22 +01:00
f23626a6b8 End scope by additional block in CloverHelpers.h 2022-04-02 16:08:15 +01:00
6577a03d16 Explcitly closed views in Exponentiate_Clover 2022-04-01 18:39:12 +01:00
427c8695fe Change signs and prefactors for conserved current to mimic the 5d
version.
2022-04-01 16:20:21 +01:00
9e82c468ab Multiplication of diagonal mass in exponentiate fixed for gpus 2022-04-01 15:54:43 +01:00
603fd96747 Missing link multiplication added. 2022-04-01 10:58:56 +01:00
fe993c0836 /=2 replaced by *=0.5 2022-03-31 17:08:17 +01:00
cdf31d52c1 GaugeGrid and typo fixed 2022-03-31 17:04:35 +01:00
0542eaf1da First version of conserved current contraction for Wilson type quarks 2022-03-31 17:02:09 +01:00
317bdcf158 nerscio parametrization 2022-03-24 13:10:47 +01:00
9ca2c98882 Merge branch 'develop' of https://github.com/paboyle/Grid into mbruno-eclover 2022-03-22 15:31:37 +01:00
53ae01a34a Merge pull request #1 from fjosw/feature/eclover
Feature/eclover
2022-03-15 15:23:35 +01:00
76c294a7ba open bc fix 2022-03-08 13:55:16 +01:00
0c0c2b1e20 Unnecessary arguments of CloverHelpers::Exponentiate_Clover removed. 2022-03-08 09:44:51 +00:00
e2fc3a0f04 Merge pull request #28 from paboyle/develop
Sync with Upstream
2022-03-08 09:58:51 +01:00
451e7972fd Reintroduced explicit inversion of the Clover term in case of the
CompactExpClover because of the open boundary O(a) improvement. Changed
the timing output to GridLogDebug
2022-03-07 17:43:33 +00:00
56c089d347 Removed leftover comments 2022-03-07 16:40:20 +00:00
acf740e44d Merge pull request #1 from FelixPGZiegler/feature/eclover
Feature/eclover
2022-03-07 16:25:11 +00:00
182f513404 Merge remote-tracking branch 'fjosw/feature/eclover' into feature/eclover 2022-03-07 15:22:04 +00:00
d5b2323a57 included Cayley-Hamilton exponentiation for the compact Wilson exp clover, bug fix for inverse of exp clover 2022-03-07 14:44:24 +00:00
bad18d4417 Merge branch 'paboyle:develop' into feature/eclover 2022-03-07 13:54:10 +00:00
438caab25f generate_instantiations.sh now correctly produces instantiations for CompactClover variant, redundant instantiations removed. 2022-02-27 18:27:18 +00:00
239e2c1ee6 tests: wilson clover cg tests now include compact variant as well as
exponential wilson clover operators
2022-02-27 18:26:34 +00:00
013dc2ef33 tests: core tests for wilson clover and wilson exp clover including
compact version extended/added
2022-02-27 18:13:47 +00:00
9616811c3d Merge branch 'feature/gpt' of https://github.com/lehner/Grid into feature/gpt 2022-02-24 22:03:05 +01:00
8a3002c03b separate left and right masses for CayleyFermion5D 2022-02-24 22:02:56 +01:00
71034f828e attempt to fix broken WilsonExpClover; Compact version still broken will be replaced by F.Joswig 2022-02-23 01:02:27 +01:00
11437930c5 cleaned up definitions of wilsonclover fermions 2022-02-22 10:45:16 +01:00
3d44aa9cb9 cleaned up cloverhelpers; fixed test compact_clover which runs 2022-02-22 01:10:19 +01:00
2851870d70 expClover support via helpers template class 2022-02-22 00:05:43 +01:00
0bd83cdbda Fixes for Nc!=3 Nersc IO, Gauge and Gauge_NCxNC compatible with GLU. Trace normalisation changed in places removing explicit threes. Guards against non-su3 tests and tests failing when LIME is not compiled. 2021-11-28 21:51:03 +01:00
96 changed files with 17159 additions and 11669 deletions

View File

@ -44,14 +44,22 @@ directory
#ifdef __NVCC__ #ifdef __NVCC__
//disables nvcc specific warning in json.hpp //disables nvcc specific warning in json.hpp
#pragma clang diagnostic ignored "-Wdeprecated-register" #pragma clang diagnostic ignored "-Wdeprecated-register"
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
//disables nvcc specific warning in json.hpp
#pragma nv_diag_suppress unsigned_compare_with_zero
#pragma nv_diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files
#pragma nv_diag_suppress esa_on_defaulted_function_ignored
#pragma nv_diag_suppress extra_semicolon
#else
//disables nvcc specific warning in json.hpp
#pragma diag_suppress unsigned_compare_with_zero #pragma diag_suppress unsigned_compare_with_zero
#pragma diag_suppress cast_to_qualified_type #pragma diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files //disables nvcc specific warning in many files
#pragma diag_suppress esa_on_defaulted_function_ignored #pragma diag_suppress esa_on_defaulted_function_ignored
#pragma diag_suppress extra_semicolon #pragma diag_suppress extra_semicolon
#endif
//Eigen only
#endif #endif
// Disable vectorisation in Eigen on the Power8/9 and PowerPC // Disable vectorisation in Eigen on the Power8/9 and PowerPC

View File

@ -16,6 +16,7 @@
#include <functional> #include <functional>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <strings.h>
#include <stdio.h> #include <stdio.h>
#include <signal.h> #include <signal.h>
#include <ctime> #include <ctime>

View File

@ -14,7 +14,11 @@
/* NVCC save and restore compile environment*/ /* NVCC save and restore compile environment*/
#ifdef __NVCC__ #ifdef __NVCC__
#pragma push #pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress code_is_unreachable
#else
#pragma diag_suppress code_is_unreachable #pragma diag_suppress code_is_unreachable
#endif
#pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__CUDA_ARCH__")
#pragma push_macro("__NVCC__") #pragma push_macro("__NVCC__")
#pragma push_macro("__CUDACC__") #pragma push_macro("__CUDACC__")

View File

@ -262,7 +262,7 @@ public:
autoView( Tnp_v , (*Tnp), AcceleratorWrite); autoView( Tnp_v , (*Tnp), AcceleratorWrite);
autoView( Tnm_v , (*Tnm), AcceleratorWrite); autoView( Tnm_v , (*Tnm), AcceleratorWrite);
const int Nsimd = CComplex::Nsimd(); const int Nsimd = CComplex::Nsimd();
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, { accelerator_for(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
}); });

View File

@ -264,7 +264,7 @@ public:
auto Tnp_v = Tnp->View(); auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View(); auto Tnm_v = Tnm->View();
constexpr int Nsimd = vector_type::Nsimd(); constexpr int Nsimd = vector_type::Nsimd();
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, { accelerator_for(ss, in.Grid()->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
}); });

View File

@ -113,7 +113,43 @@ public:
blockPromote(guess_coarse,guess,subspace); blockPromote(guess_coarse,guess,subspace);
guess.Checkerboard() = src.Checkerboard(); guess.Checkerboard() = src.Checkerboard();
}; };
};
void operator()(const std::vector<FineField> &src,std::vector<FineField> &guess) {
int Nevec = (int)evec_coarse.size();
int Nsrc = (int)src.size();
// make temp variables
std::vector<CoarseField> src_coarse(Nsrc,evec_coarse[0].Grid());
std::vector<CoarseField> guess_coarse(Nsrc,evec_coarse[0].Grid());
//Preporcessing
std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
guess_coarse[j] = Zero();
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockProject(src_coarse[j],src[j],subspace);
}
//deflation set up for eigen vector batchsize 1 and source batch size equal number of sources
std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl;
for (int i=0;i<Nevec;i++)
{
std::cout << GridLogMessage << "ProjectAccum Nvec: " << i << std::endl;
const CoarseField & tmp = evec_coarse[i];
for (int j=0;j<Nsrc;j++)
{
axpy(guess_coarse[j],TensorRemove(innerProduct(tmp,src_coarse[j])) / eval_coarse[i],tmp,guess_coarse[j]);
}
}
//postprocessing
std::cout << GridLogMessage << "Start BlockPromote for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockPromote(guess_coarse[j],guess[j],subspace);
guess[j].Checkerboard() = src[j].Checkerboard();
}
};
};

View File

@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax]; MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
int MemoryManager::Victim[MemoryManager::NallocType]; int MemoryManager::Victim[MemoryManager::NallocType];
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 }; int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType]; uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Actual allocation and deallocation utils // Actual allocation and deallocation utils

View File

@ -36,6 +36,11 @@ NAMESPACE_BEGIN(Grid);
#define GRID_ALLOC_SMALL_LIMIT (4096) #define GRID_ALLOC_SMALL_LIMIT (4096)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define FILE_LINE __FILE__ ":" TOSTRING(__LINE__)
#define AUDIT(a) MemoryManager::Audit(FILE_LINE)
/*Pinning pages is costly*/ /*Pinning pages is costly*/
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Advise the LatticeAccelerator class // Advise the LatticeAccelerator class
@ -92,8 +97,9 @@ private:
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ; static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim,uint64_t &cbytes) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ; static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t &cbytes) ;
static void PrintBytes(void);
public: public:
static void PrintBytes(void);
static void Audit(std::string s);
static void Init(void); static void Init(void);
static void InitMessage(void); static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes); static void *AcceleratorAllocate(size_t bytes);
@ -113,6 +119,8 @@ private:
static uint64_t DeviceToHostBytes; static uint64_t DeviceToHostBytes;
static uint64_t HostToDeviceXfer; static uint64_t HostToDeviceXfer;
static uint64_t DeviceToHostXfer; static uint64_t DeviceToHostXfer;
static uint64_t DeviceEvictions;
static uint64_t DeviceDestroy;
private: private:
#ifndef GRID_UVM #ifndef GRID_UVM
@ -170,6 +178,7 @@ private:
public: public:
static void Print(void); static void Print(void);
static void PrintAll(void);
static void PrintState( void* CpuPtr); static void PrintState( void* CpuPtr);
static int isOpen (void* CpuPtr); static int isOpen (void* CpuPtr);
static void ViewClose(void* CpuPtr,ViewMode mode); static void ViewClose(void* CpuPtr,ViewMode mode);

View File

@ -3,8 +3,13 @@
#warning "Using explicit device memory copies" #warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...) #define MAXLINE 512
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -23,6 +28,8 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes; uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer; uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer; uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
//////////////////////////////////// ////////////////////////////////////
// Priority ordering for unlocked entries // Priority ordering for unlocked entries
@ -104,15 +111,17 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty); assert(AccCache.state!=Empty);
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0); assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0); assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL); assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) { if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceDestroy++;
DeviceBytes -=AccCache.bytes; DeviceBytes -=AccCache.bytes;
LRUremove(AccCache); LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes); AccCache.AccPtr=(uint64_t) NULL;
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
} }
uint64_t CpuPtr = AccCache.CpuPtr; uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr); EntryErase(CpuPtr);
@ -121,26 +130,36 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::Evict(AcceleratorViewEntry &AccCache) void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
{ {
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Make CPU consistent, remove from Accelerator, remove entry // Make CPU consistent, remove from Accelerator, remove from LRU, LEAVE CPU only entry
// Cannot be locked. If allocated must be in LRU pool. // Cannot be acclocked. If allocated must be in LRU pool.
//
// Nov 2022... Felix issue: Allocating two CpuPtrs, can have an entry in LRU-q with CPUlock.
// and require to evict the AccPtr copy. Eviction was a mistake in CpuViewOpen
// but there is a weakness where CpuLock entries are attempted for erase
// Take these OUT LRU queue when CPU locked?
// Cannot take out the table as cpuLock data is important.
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty); assert(AccCache.state!=Empty);
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
assert(AccCache.accLock==0); (uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
assert(AccCache.cpuLock==0); (uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
assert(AccCache.accLock==0); // Cannot evict so logic bomb
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.state==AccDirty) { if(AccCache.state==AccDirty) {
Flush(AccCache); Flush(AccCache);
} }
assert(AccCache.CpuPtr!=(uint64_t)NULL);
if(AccCache.AccPtr) { if(AccCache.AccPtr) {
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
DeviceBytes -=AccCache.bytes;
LRUremove(AccCache); LRUremove(AccCache);
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes); AccCache.AccPtr=(uint64_t)NULL;
AccCache.state=CpuDirty; // CPU primary now
DeviceBytes -=AccCache.bytes;
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
} }
uint64_t CpuPtr = AccCache.CpuPtr; // uint64_t CpuPtr = AccCache.CpuPtr;
EntryErase(CpuPtr); DeviceEvictions++;
// EntryErase(CpuPtr);
} }
void MemoryManager::Flush(AcceleratorViewEntry &AccCache) void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
{ {
@ -150,7 +169,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL); assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL); assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes); acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes; DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++; DeviceToHostXfer++;
AccCache.state=Consistent; AccCache.state=Consistent;
@ -165,7 +184,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes; DeviceBytes+=AccCache.bytes;
} }
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes); acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes; HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++; HostToDeviceXfer++;
@ -191,6 +210,7 @@ void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::ViewClose(void* Ptr,ViewMode mode) void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
{ {
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
AcceleratorViewClose((uint64_t)Ptr); AcceleratorViewClose((uint64_t)Ptr);
} else if( (mode==CpuRead)||(mode==CpuWrite)){ } else if( (mode==CpuRead)||(mode==CpuWrite)){
CpuViewClose((uint64_t)Ptr); CpuViewClose((uint64_t)Ptr);
@ -202,6 +222,7 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
{ {
uint64_t CpuPtr = (uint64_t)_CpuPtr; uint64_t CpuPtr = (uint64_t)_CpuPtr;
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint); return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
} else if( (mode==CpuRead)||(mode==CpuWrite)){ } else if( (mode==CpuRead)||(mode==CpuWrite)){
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint); return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
@ -212,13 +233,16 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
} }
void MemoryManager::EvictVictims(uint64_t bytes) void MemoryManager::EvictVictims(uint64_t bytes)
{ {
assert(bytes<DeviceMaxBytes);
while(bytes+DeviceLRUBytes > DeviceMaxBytes){ while(bytes+DeviceLRUBytes > DeviceMaxBytes){
if ( DeviceLRUBytes > 0){ if ( DeviceLRUBytes > 0){
assert(LRU.size()>0); assert(LRU.size()>0);
uint64_t victim = LRU.back(); uint64_t victim = LRU.back(); // From the LRU
auto AccCacheIterator = EntryLookup(victim); auto AccCacheIterator = EntryLookup(victim);
auto & AccCache = AccCacheIterator->second; auto & AccCache = AccCacheIterator->second;
Evict(AccCache); Evict(AccCache);
} else {
return;
} }
} }
} }
@ -241,11 +265,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.cpuLock==0); // Programming error assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) { if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n", dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr, (uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr, (uint64_t)CpuPtr,
(uint64_t)AccCache.bytes, (uint64_t)AccCache.bytes,
(uint64_t)bytes); (uint64_t)bytes,
(uint64_t)AccCache.accLock);
assert(AccCache.CpuPtr == CpuPtr); assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes); assert(AccCache.bytes ==bytes);
} }
@ -280,6 +305,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // Empty + AccRead => Consistent AccCache.state = Consistent; // Empty + AccRead => Consistent
} }
AccCache.accLock= 1; AccCache.accLock= 1;
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==CpuDirty ){ } else if(AccCache.state==CpuDirty ){
if(mode==AcceleratorWriteDiscard) { if(mode==AcceleratorWriteDiscard) {
CpuDiscard(AccCache); CpuDiscard(AccCache);
@ -292,28 +318,30 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
} }
AccCache.accLock++; AccCache.accLock++;
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock); dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) { } else if(AccCache.state==Consistent) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else else
AccCache.state = Consistent; // Consistent + AccRead => Consistent AccCache.state = Consistent; // Consistent + AccRead => Consistent
AccCache.accLock++; AccCache.accLock++;
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock); dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) { } else if(AccCache.state==AccDirty) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else else
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
AccCache.accLock++; AccCache.accLock++;
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock); dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
} else { } else {
assert(0); assert(0);
} }
// If view is opened on device remove from LRU assert(AccCache.accLock>0);
// If view is opened on device must remove from LRU
if(AccCache.LRU_valid==1){ if(AccCache.LRU_valid==1){
// must possibly remove from LRU as now locked on GPU // must possibly remove from LRU as now locked on GPU
dprintf("AccCache entry removed from LRU \n");
LRUremove(AccCache); LRUremove(AccCache);
} }
@ -334,10 +362,12 @@ void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
assert(AccCache.accLock>0); assert(AccCache.accLock>0);
AccCache.accLock--; AccCache.accLock--;
// Move to LRU queue if not locked and close on device // Move to LRU queue if not locked and close on device
if(AccCache.accLock==0) { if(AccCache.accLock==0) {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
LRUinsert(AccCache); LRUinsert(AccCache);
} else {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
} }
} }
void MemoryManager::CpuViewClose(uint64_t CpuPtr) void MemoryManager::CpuViewClose(uint64_t CpuPtr)
@ -374,9 +404,10 @@ uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,V
auto AccCacheIterator = EntryLookup(CpuPtr); auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second; auto & AccCache = AccCacheIterator->second;
if (!AccCache.AccPtr) { // CPU doesn't need to free space
EvictVictims(bytes); // if (!AccCache.AccPtr) {
} // EvictVictims(bytes);
// }
assert((mode==CpuRead)||(mode==CpuWrite)); assert((mode==CpuRead)||(mode==CpuWrite));
assert(AccCache.accLock==0); // Programming error assert(AccCache.accLock==0); // Programming error
@ -430,20 +461,28 @@ void MemoryManager::NotifyDeletion(void *_ptr)
void MemoryManager::Print(void) void MemoryManager::Print(void)
{ {
PrintBytes(); PrintBytes();
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << "Memory Manager " << std::endl; std::cout << GridLogMessage << "Memory Manager " << std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl; std::cout << GridLogMessage << DeviceBytes << " bytes allocated on device " << std::endl;
std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl; std::cout << GridLogMessage << DeviceLRUBytes<< " bytes evictable on device " << std::endl;
std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl; std::cout << GridLogMessage << DeviceMaxBytes<< " bytes max on device " << std::endl;
std::cout << GridLogDebug << HostToDeviceXfer << " transfers to device " << std::endl; std::cout << GridLogMessage << HostToDeviceXfer << " transfers to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl; std::cout << GridLogMessage << DeviceToHostXfer << " transfers from device " << std::endl;
std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl; std::cout << GridLogMessage << HostToDeviceBytes<< " bytes transfered to device " << std::endl;
std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; std::cout << GridLogMessage << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl; std::cout << GridLogMessage << DeviceEvictions << " Evictions from device " << std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogMessage << DeviceDestroy << " Destroyed vectors on device " << std::endl;
std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl; std::cout << GridLogMessage << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
}
void MemoryManager::PrintAll(void)
{
Print();
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){ for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second; auto &AccCache = it->second;
@ -453,13 +492,13 @@ void MemoryManager::Print(void)
if ( AccCache.state==AccDirty ) str = std::string("AccDirty"); if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent"); if ( AccCache.state==Consistent)str = std::string("Consistent");
std::cout << GridLogDebug << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str << "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock << "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock << "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl; << "\t" << AccCache.LRU_valid<<std::endl;
} }
std::cout << GridLogDebug << "--------------------------------------------" << std::endl; std::cout << GridLogMessage << "--------------------------------------------" << std::endl;
}; };
int MemoryManager::isOpen (void* _CpuPtr) int MemoryManager::isOpen (void* _CpuPtr)
@ -473,6 +512,61 @@ int MemoryManager::isOpen (void* _CpuPtr)
return 0; return 0;
} }
} }
void MemoryManager::Audit(std::string s)
{
uint64_t CpuBytes=0;
uint64_t AccBytes=0;
uint64_t LruBytes1=0;
uint64_t LruBytes2=0;
uint64_t LruCnt=0;
uint64_t LockedBytes=0;
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
for(auto it=LRU.begin();it!=LRU.end();it++){
uint64_t cpuPtr = *it;
assert(EntryPresent(cpuPtr));
auto AccCacheIterator = EntryLookup(cpuPtr);
auto & AccCache = AccCacheIterator->second;
LruBytes2+=AccCache.bytes;
assert(AccCache.LRU_valid==1);
assert(AccCache.LRU_entry==it);
}
std::cout << " Memory Manager::Audit() LRU queue matches table entries "<<std::endl;
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
std::string str;
if ( AccCache.state==Empty ) str = std::string("Empty");
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
CpuBytes+=AccCache.bytes;
if( AccCache.AccPtr ) AccBytes+=AccCache.bytes;
if( AccCache.LRU_valid ) LruBytes1+=AccCache.bytes;
if( AccCache.LRU_valid ) LruCnt++;
if ( AccCache.cpuLock || AccCache.accLock ) {
assert(AccCache.LRU_valid==0);
std::cout << GridLogError << s<< "\n\t 0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t cpuLock " << AccCache.cpuLock
<< "\t accLock " << AccCache.accLock
<< "\t LRUvalid " << AccCache.LRU_valid<<std::endl;
}
assert( AccCache.cpuLock== 0 ) ;
assert( AccCache.accLock== 0 ) ;
}
std::cout << " Memory Manager::Audit() no locked table entries "<<std::endl;
assert(LruBytes1==LruBytes2);
assert(LruBytes1==DeviceLRUBytes);
std::cout << " Memory Manager::Audit() evictable bytes matches sum over table "<<std::endl;
assert(AccBytes==DeviceBytes);
std::cout << " Memory Manager::Audit() device bytes matches sum over table "<<std::endl;
assert(LruCnt == LRU.size());
std::cout << " Memory Manager::Audit() LRU entry count matches "<<std::endl;
}
void MemoryManager::PrintState(void* _CpuPtr) void MemoryManager::PrintState(void* _CpuPtr)
{ {
@ -489,8 +583,8 @@ void MemoryManager::PrintState(void* _CpuPtr)
if ( AccCache.state==EvictNext) str = std::string("EvictNext"); if ( AccCache.state==EvictNext) str = std::string("EvictNext");
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl; std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec std::cout << GridLogMessage << "\tx"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str << "\tx"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock << "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock << "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl; << "\t" << AccCache.LRU_valid<<std::endl;

View File

@ -12,7 +12,10 @@ uint64_t MemoryManager::HostToDeviceBytes;
uint64_t MemoryManager::DeviceToHostBytes; uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer; uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer; uint64_t MemoryManager::DeviceToHostXfer;
uint64_t MemoryManager::DeviceEvictions;
uint64_t MemoryManager::DeviceDestroy;
void MemoryManager::Audit(std::string s){};
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){}; void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; }; void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
int MemoryManager::isOpen (void* CpuPtr) { return 0;} int MemoryManager::isOpen (void* CpuPtr) { return 0;}
@ -21,6 +24,7 @@ void MemoryManager::PrintState(void* CpuPtr)
std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl; std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl;
}; };
void MemoryManager::Print(void){}; void MemoryManager::Print(void){};
void MemoryManager::PrintAll(void){};
void MemoryManager::NotifyDeletion(void *ptr){}; void MemoryManager::NotifyDeletion(void *ptr){};
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -392,9 +392,9 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { // if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir); // this->StencilSendToRecvFromComplete(list,dir);
} // }
return off_node_bytes; return off_node_bytes;
} }

File diff suppressed because it is too large Load Diff

View File

@ -46,3 +46,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_unary.h> #include <Grid/lattice/Lattice_unary.h>
#include <Grid/lattice/Lattice_transfer.h> #include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h> #include <Grid/lattice/Lattice_basis.h>
#include <Grid/lattice/Lattice_crc.h>

View File

@ -129,7 +129,7 @@ public:
auto exprCopy = expr; auto exprCopy = expr;
ExpressionViewOpen(exprCopy); ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard); auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy); auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp); coalescedWrite(me[ss],tmp);
@ -152,7 +152,7 @@ public:
auto exprCopy = expr; auto exprCopy = expr;
ExpressionViewOpen(exprCopy); ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard); auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy); auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp); coalescedWrite(me[ss],tmp);
@ -174,7 +174,7 @@ public:
this->checkerboard=cb; this->checkerboard=cb;
auto exprCopy = expr; auto exprCopy = expr;
ExpressionViewOpen(exprCopy); ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard); auto me = View(AcceleratorWrite);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy); auto tmp = eval(ss,exprCopy);
coalescedWrite(me[ss],tmp); coalescedWrite(me[ss],tmp);
@ -245,7 +245,7 @@ public:
/////////////////////////////////////////// ///////////////////////////////////////////
// user defined constructor // user defined constructor
/////////////////////////////////////////// ///////////////////////////////////////////
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) {
this->_grid = grid; this->_grid = grid;
resize(this->_grid->oSites()); resize(this->_grid->oSites());
assert((((uint64_t)&this->_odata[0])&0xF) ==0); assert((((uint64_t)&this->_odata[0])&0xF) ==0);
@ -288,7 +288,7 @@ public:
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0; typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
conformable(*this,r); conformable(*this,r);
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
auto me = View(AcceleratorWriteDiscard); auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead); auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss)); coalescedWrite(me[ss],him(ss));
@ -303,7 +303,7 @@ public:
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){ inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
conformable(*this,r); conformable(*this,r);
auto me = View(AcceleratorWriteDiscard); auto me = View(AcceleratorWrite);
auto him= r.View(AcceleratorRead); auto him= r.View(AcceleratorRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss)); coalescedWrite(me[ss],him(ss));

View File

@ -0,0 +1,55 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_crc.h
Copyright (C) 2021
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
{
auto ff = localNorm2(f);
if ( mu==-1 ) mu = f.Grid()->Nd()-1;
typedef typename vobj::tensor_reduced normtype;
typedef typename normtype::scalar_object scalar;
std::vector<scalar> sff;
sliceSum(ff,sff,mu);
for(int t=0;t<sff.size();t++){
std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl;
}
}
template<class vobj> uint32_t crc(Lattice<vobj> & buf)
{
autoView( buf_v , buf, CpuRead);
return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
}
#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
NAMESPACE_END(Grid);

View File

@ -28,6 +28,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
#if defined(GRID_CUDA)||defined(GRID_HIP) #if defined(GRID_CUDA)||defined(GRID_HIP)
#include <Grid/lattice/Lattice_reduction_gpu.h> #include <Grid/lattice/Lattice_reduction_gpu.h>
#endif #endif
#if defined(GRID_SYCL)
#include <Grid/lattice/Lattice_reduction_sycl.h>
#endif
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -127,7 +130,7 @@ inline Double max(const Double *arg, Integer osites)
template<class vobj> template<class vobj>
inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
{ {
#if defined(GRID_CUDA)||defined(GRID_HIP) #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sum_gpu(arg,osites); return sum_gpu(arg,osites);
#else #else
return sum_cpu(arg,osites); return sum_cpu(arg,osites);
@ -136,7 +139,7 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites)
template<class vobj> template<class vobj>
inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
{ {
#if defined(GRID_CUDA)||defined(GRID_HIP) #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu(arg,osites); return sumD_gpu(arg,osites);
#else #else
return sumD_cpu(arg,osites); return sumD_cpu(arg,osites);
@ -145,7 +148,7 @@ inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites)
template<class vobj> template<class vobj>
inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites) inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
{ {
#if defined(GRID_CUDA)||defined(GRID_HIP) #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
return sumD_gpu_large(arg,osites); return sumD_gpu_large(arg,osites);
#else #else
return sumD_cpu(arg,osites); return sumD_cpu(arg,osites);
@ -155,13 +158,13 @@ inline typename vobj::scalar_objectD sumD_large(const vobj *arg, Integer osites)
template<class vobj> template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg) inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{ {
#if defined(GRID_CUDA)||defined(GRID_HIP)
autoView( arg_v, arg, AcceleratorRead);
Integer osites = arg.Grid()->oSites(); Integer osites = arg.Grid()->oSites();
auto ssum= sum_gpu(&arg_v[0],osites); #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
typename vobj::scalar_object ssum;
autoView( arg_v, arg, AcceleratorRead);
ssum= sum_gpu(&arg_v[0],osites);
#else #else
autoView(arg_v, arg, CpuRead); autoView(arg_v, arg, CpuRead);
Integer osites = arg.Grid()->oSites();
auto ssum= sum_cpu(&arg_v[0],osites); auto ssum= sum_cpu(&arg_v[0],osites);
#endif #endif
arg.Grid()->GlobalSum(ssum); arg.Grid()->GlobalSum(ssum);
@ -171,7 +174,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
template<class vobj> template<class vobj>
inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg) inline typename vobj::scalar_object sum_large(const Lattice<vobj> &arg)
{ {
#if defined(GRID_CUDA)||defined(GRID_HIP) #if defined(GRID_CUDA)||defined(GRID_HIP)||defined(GRID_SYCL)
autoView( arg_v, arg, AcceleratorRead); autoView( arg_v, arg, AcceleratorRead);
Integer osites = arg.Grid()->oSites(); Integer osites = arg.Grid()->oSites();
auto ssum= sum_gpu_large(&arg_v[0],osites); auto ssum= sum_gpu_large(&arg_v[0],osites);
@ -235,11 +238,10 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
typedef decltype(innerProductD(vobj(),vobj())) inner_t; typedef decltype(innerProductD(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites); Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0]; auto inner_tmp_v = &inner_tmp[0];
{ {
autoView( left_v , left, AcceleratorRead); autoView( left_v , left, AcceleratorRead);
autoView( right_v,right, AcceleratorRead); autoView( right_v,right, AcceleratorRead);
// This code could read coalesce
// GPU - SIMT lane compliance... // GPU - SIMT lane compliance...
accelerator_for( ss, sites, 1,{ accelerator_for( ss, sites, 1,{
auto x_l = left_v[ss]; auto x_l = left_v[ss];

View File

@ -0,0 +1,125 @@
NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Possibly promote to double and sum
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_tensor(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_objectD sobjD;
sobj *mysum =(sobj *) malloc_shared(sizeof(sobj),*theGridAccelerator);
sobj identity; zeroit(identity);
sobj ret ;
Integer nsimd= vobj::Nsimd();
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(mysum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{osites},
Reduction,
[=] (cl::sycl::id<1> item, auto &sum) {
auto osite = item[0];
sum +=Reduce(lat[osite]);
});
});
theGridAccelerator->wait();
ret = mysum[0];
free(mysum,*theGridAccelerator);
sobjD dret; convertType(dret,ret);
return dret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_large(const vobj *lat, Integer osites)
{
return sumD_gpu_tensor(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_small(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites)
{
return sumD_gpu_large(lat,osites);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// Return as same precision as input performing reduction in double precision though
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class vobj>
inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu(lat,osites);
return result;
}
template <class vobj>
inline typename vobj::scalar_object sum_gpu_large(const vobj *lat, Integer osites)
{
typedef typename vobj::scalar_object sobj;
sobj result;
result = sumD_gpu_large(lat,osites);
return result;
}
NAMESPACE_END(Grid);
/*
template<class Double> Double svm_reduce(Double *vec,uint64_t L)
{
Double sumResult; zeroit(sumResult);
Double *d_sum =(Double *)cl::sycl::malloc_shared(sizeof(Double),*theGridAccelerator);
Double identity; zeroit(identity);
theGridAccelerator->submit([&](cl::sycl::handler &cgh) {
auto Reduction = cl::sycl::reduction(d_sum,identity,std::plus<>());
cgh.parallel_for(cl::sycl::range<1>{L},
Reduction,
[=] (cl::sycl::id<1> index, auto &sum) {
sum +=vec[index];
});
});
theGridAccelerator->wait();
Double ret = d_sum[0];
free(d_sum,*theGridAccelerator);
std::cout << " svm_reduce finished "<<L<<" sites sum = " << ret <<std::endl;
return ret;
}
template <class vobj>
inline typename vobj::scalar_objectD sumD_gpu_repack(const vobj *lat, Integer osites)
{
typedef typename vobj::vector_type vector;
typedef typename vobj::scalar_type scalar;
typedef typename vobj::scalar_typeD scalarD;
typedef typename vobj::scalar_objectD sobjD;
sobjD ret;
scalarD *ret_p = (scalarD *)&ret;
const int nsimd = vobj::Nsimd();
const int words = sizeof(vobj)/sizeof(vector);
Vector<scalar> buffer(osites*nsimd);
scalar *buf = &buffer[0];
vector *dat = (vector *)lat;
for(int w=0;w<words;w++) {
accelerator_for(ss,osites,nsimd,{
int lane = acceleratorSIMTlane(nsimd);
buf[ss*nsimd+lane] = dat[ss*words+w].getlane(lane);
});
//Precision change at this point is to late to gain precision
ret_p[w] = svm_reduce(buf,nsimd*osites);
}
return ret;
}
*/

View File

@ -0,0 +1,126 @@
NAMESPACE_BEGIN(Grid);
// If NOT CUDA or HIP -- we should provide
// -- atomicAdd(float *,float)
// -- atomicAdd(double *,double)
//
// Augment CUDA with complex atomics
#if !defined(GRID_HIP) || !defined(GRID_CUDA)
inline void atomicAdd(float *acc,float elem)
{
*acc += elem;
}
inline void atomicAdd(double *acc,double elem)
{
*acc += elem;
}
#endif
inline void atomicAdd(ComplexD *accum,ComplexD & elem)
{
double *a_p = (double *)accum;
double *e_p = (double *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
inline void atomicAdd(ComplexF *accum,ComplexF & elem)
{
float *a_p = (float *)accum;
float *e_p = (float *)&elem;
for(int w=0;w<2;w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Augment CUDA with vobj atomics
template<class vobj> accelerator_inline void atomicAdd(vobj *accum, vobj & elem)
{
typedef typename vobj::scalar_type scalar_type;
scalar_type *a_p= (scalar_type *)accum;
scalar_type *e_p= (scalar_type *)& elem;
for(int w=0;w<vobj::Nsimd();w++){
atomicAdd(&a_p[w],e_p[w]);
}
}
// Atomics based slice sum
template<class vobj> inline void sliceSumGpu(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object::scalar_type scalar_type;
GridBase *grid = Data.Grid();
assert(grid!=NULL);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
// Move to device memory and copy in / out
Vector<vobj> lvSum(rd); // will locally sum vectors first
Vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
autoView( Data_v, Data, AcceleratorRead);
auto lvSum_p=&lvSum[0];
int ostride = grid->_ostride[orthogdim];
accelerator_for( ree,rd*e1*e2,1, {
int b = ree%e2;
int re= ree/e2;
int n=re%e1;
int r=re/e1;
int so=r*ostride;
int ss=so+n*stride+b;
atomicAdd(&lvSum_p[r],Data_v[ss]);
});
// Sum across simd lanes in the plane, breaking out orthog dir.
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
extract(lvSum[rt],extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx];
}
}
// sum over nodes.
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
result[t]=lsSum[lt];
} else {
result[t]=Zero();
}
}
scalar_type * ptr = (scalar_type *) &result[0];
int words = fd*sizeof(sobj)/sizeof(scalar_type);
grid->GlobalSumVector(ptr, words);
}
NAMESPACE_END(Grid);

View File

@ -65,29 +65,40 @@ GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE"); GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE"); GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
void GridLogConfigure(std::vector<std::string> &logstreams) { void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(0); GridLogError.Active(1);
GridLogWarning.Active(0); GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on GridLogMessage.Active(1); // at least the messages should be always on
GridLogMemory.Active(0);
GridLogTracing.Active(0);
GridLogIterative.Active(0); GridLogIterative.Active(0);
GridLogDebug.Active(0); GridLogDebug.Active(0);
GridLogPerformance.Active(0); GridLogPerformance.Active(0);
GridLogDslash.Active(0);
GridLogIntegrator.Active(1); GridLogIntegrator.Active(1);
GridLogColours.Active(0); GridLogColours.Active(0);
GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) { for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Error")) GridLogError.Active(1); if (logstreams[i] == std::string("Tracing")) GridLogTracing.Active(1);
if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1); if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1); if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1); if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1); if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
} }
} }

View File

@ -138,7 +138,8 @@ public:
stream << std::setw(log.topWidth); stream << std::setw(log.topWidth);
} }
stream << log.topName << log.background()<< " : "; stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left; // stream << log.colour() << std::left;
stream << std::left;
if (log.chanWidth > 0) if (log.chanWidth > 0)
{ {
stream << std::setw(log.chanWidth); stream << std::setw(log.chanWidth);
@ -153,9 +154,9 @@ public:
stream << log.evidence() stream << log.evidence()
<< now << log.background() << " : " ; << now << log.background() << " : " ;
} }
stream << log.colour(); // stream << log.colour();
stream << std::right;
stream.flags(f); stream.flags(f);
return stream; return stream;
} else { } else {
return devnull; return devnull;
@ -180,8 +181,12 @@ extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage; extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug ; extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance; extern GridLogger GridLogPerformance;
extern GridLogger GridLogDslash;
extern GridLogger GridLogIterative ; extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ; extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;
extern GridLogger GridLogMemory;
extern GridLogger GridLogTracing;
extern Colours GridLogColours; extern Colours GridLogColours;
std::string demangle(const char* name) ; std::string demangle(const char* name) ;

View File

@ -31,6 +31,7 @@ directory
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <string>
#include <map> #include <map>
#include <pwd.h> #include <pwd.h>
@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter {
// Fill ILDG header data struct // Fill ILDG header data struct
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
ildgFormat ildgfmt ; ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge"); const std::string stNC = std::to_string( Nc ) ;
ildgfmt.field = std::string("su"+stNC+"gauge");
if ( format == std::string("IEEE32BIG") ) { if ( format == std::string("IEEE32BIG") ) {
ildgfmt.precision = 32; ildgfmt.precision = 32;
@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader {
} else { } else {
assert(found_ildgFormat); assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") ); const std::string stNC = std::to_string( Nc ) ;
assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") );
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can // Populate our Grid metadata as best we can
@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader {
std::ostringstream vers; vers << ildgFormat_.version; std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str(); FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC);
FieldMetaData_.nd=4; FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4); FieldMetaData_.dimension.resize(4);

View File

@ -6,8 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -182,8 +182,8 @@ class GaugeStatistics
public: public:
void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{ {
header.link_trace=WilsonLoops<Impl>::linkTrace(data); header.link_trace = WilsonLoops<Impl>::linkTrace(data);
header.plaquette =WilsonLoops<Impl>::avgPlaquette(data); header.plaquette = WilsonLoops<Impl>::avgPlaquette(data);
} }
}; };
typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics; typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics;
@ -203,20 +203,24 @@ template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzCo
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm) inline void reconstruct3(LorentzColourMatrix & cm)
{ {
const int x=0; assert( Nc < 4 && Nc > 1 ) ;
const int y=1;
const int z=2;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy #if Nc == 2
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz cm(mu)()(1,0) = -adj(cm(mu)()(0,y)) ;
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx cm(mu)()(1,1) = adj(cm(mu)()(0,x)) ;
#else
const int x=0 , y=1 , z=2 ; // a little disinenuous labelling
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
#endif
} }
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage // Some data types for intermediate storage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, Nc-1>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3; typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
@ -278,7 +282,6 @@ struct GaugeSimpleMunger{
template <class fobj, class sobj> template <class fobj, class sobj>
struct GaugeSimpleUnmunger { struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) { void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) { for (int i = 0; i < Nc; i++) {
@ -317,8 +320,8 @@ template<class fobj,class sobj>
struct Gauge3x2munger{ struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){ void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){ for(int i=0;i<Nc-1;i++){
for(int j=0;j<3;j++){ for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)(i)(j); out(mu)()(i,j) = in(mu)(i)(j);
}} }}
} }
@ -330,8 +333,8 @@ template<class fobj,class sobj>
struct Gauge3x2unmunger{ struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){ void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){ for(int i=0;i<Nc-1;i++){
for(int j=0;j<3;j++){ for(int j=0;j<Nc;j++){
out(mu)(i)(j) = in(mu)()(i,j); out(mu)(i)(j) = in(mu)()(i,j);
}} }}
} }

View File

@ -9,6 +9,7 @@
Author: Matt Spraggs <matthew.spraggs@gmail.com> Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -30,6 +31,8 @@
#ifndef GRID_NERSC_IO_H #ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H #define GRID_NERSC_IO_H
#include <string>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
using namespace Grid; using namespace Grid;
@ -145,15 +148,17 @@ public:
std::string format(header.floating_point); std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG")); const int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); const int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); const int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); const int ieee64 = (format == std::string("IEEE64") || \
format == std::string("IEEE64LITTLE"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) { const std::string stNC = std::to_string( Nc ) ;
if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F> BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F>
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format, (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
@ -164,7 +169,7 @@ public:
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format, (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
} }
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { } else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF> BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format, (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
@ -209,27 +214,29 @@ public:
template<class GaugeStats=PeriodicGaugeStatistics> template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file, std::string file,
std::string ens_label = std::string("DWF")) std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{ {
writeConfiguration(Umu,file,0,1,ens_label); writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number);
} }
template<class GaugeStats=PeriodicGaugeStatistics> template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file, std::string file,
int two_row, int two_row,
int bits32, int bits32,
std::string ens_label = std::string("DWF")) std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{ {
typedef vLorentzColourMatrixD vobj; typedef vLorentzColourMatrixD vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
FieldMetaData header; FieldMetaData header;
/////////////////////////////////////////// header.sequence_number = sequence_number;
// Following should become arguments header.ensemble_id = ens_id;
///////////////////////////////////////////
header.sequence_number = 1;
header.ensemble_id = std::string("UKQCD");
header.ensemble_label = ens_label; header.ensemble_label = ens_label;
header.hdr_version = "1.0" ;
typedef LorentzColourMatrixD fobj3D; typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D; typedef LorentzColour2x3D fobj2D;
@ -243,10 +250,14 @@ public:
uint64_t offset; uint64_t offset;
// Sod it -- always write 3x3 double // Sod it -- always write NcxNc double
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); const std::string stNC = std::to_string( Nc ) ;
GaugeSimpleUnmunger<fobj3D,sobj> munge; if( two_row ) {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE" );
} else {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC );
}
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
@ -254,8 +265,15 @@ public:
grid->Broadcast(0,(void *)&offset,sizeof(offset)); grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, if( two_row ) {
nersc_csum,scidac_csuma,scidac_csumb); Gauge3x2unmunger<fobj2D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
}
header.checksum = nersc_csum; header.checksum = nersc_csum;
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
writeHeader(header,file); writeHeader(header,file);
@ -287,8 +305,7 @@ public:
header.plaquette=0.0; header.plaquette=0.0;
MachineCharacteristics(header); MachineCharacteristics(header);
uint64_t offset; uint64_t offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48"); header.data_type = std::string("RANLUX48");
@ -328,7 +345,7 @@ public:
GridBase *grid = parallel.Grid(); GridBase *grid = parallel.Grid();
uint64_t offset = readHeader(file,grid,header); uint64_t offset = readHeader(file,grid,header);
FieldMetaData clone(header); FieldMetaData clone(header);

View File

@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
GridTimePoint theProgramStart = GridClock::now();
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16)) #define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
#define RawConfig(A,B) (A<<8|B) #define RawConfig(A,B) (A<<8|B)
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = { const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {

View File

@ -30,6 +30,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_PERFCOUNT_H #ifndef GRID_PERFCOUNT_H
#define GRID_PERFCOUNT_H #define GRID_PERFCOUNT_H
#ifndef __SSC_START
#define __SSC_START
#define __SSC_STOP
#endif
#include <sys/time.h> #include <sys/time.h>
#include <ctime> #include <ctime>
#include <chrono> #include <chrono>
@ -72,17 +78,9 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
inline uint64_t cyclecount(void){ inline uint64_t cyclecount(void){
return 0; return 0;
} }
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
#define __SSC_STOP __SSC_MARK(0x110)
#define __SSC_START __SSC_MARK(0x111)
#else #else
#define __SSC_MARK(mark)
#define __SSC_STOP
#define __SSC_START
/* /*
* cycle counters arch dependent * cycle counters arch dependent
*/ */

View File

@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid) NAMESPACE_BEGIN(Grid)
// Dress the output; use std::chrono //typedef std::chrono::system_clock GridClock;
// C++11 time facilities better? typedef std::chrono::high_resolution_clock GridClock;
inline double usecond(void) {
struct timeval tv;
#ifdef TIMERS_ON
gettimeofday(&tv,NULL);
#endif
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
}
typedef std::chrono::system_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint; typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::seconds GridSecs; typedef std::chrono::seconds GridSecs;
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs; typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime; typedef std::chrono::microseconds GridTime;
extern GridTimePoint theProgramStart;
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
return 1.0*usecs.count();
}
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time) inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{ {
stream << time.count()<<" s"; stream << time.count()<<" s";

70
Grid/perfmon/Tracing.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
NAMESPACE_BEGIN(Grid);
#ifdef GRID_TRACING_NVTX
#include <nvToolsExt.h>
class GridTracer {
public:
GridTracer(const char* name) {
nvtxRangePushA(name);
}
~GridTracer() {
nvtxRangePop();
}
};
inline void tracePush(const char *name) { nvtxRangePushA(name); }
inline void tracePop(const char *name) { nvtxRangePop(); }
inline int traceStart(const char *name) { }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_ROCTX
#include <roctracer/roctx.h>
class GridTracer {
public:
GridTracer(const char* name) {
roctxRangePushA(name);
std::cout << "roctxRangePush "<<name<<std::endl;
}
~GridTracer() {
roctxRangePop();
std::cout << "roctxRangePop "<<std::endl;
}
};
inline void tracePush(const char *name) { roctxRangePushA(name); }
inline void tracePop(const char *name) { roctxRangePop(); }
inline int traceStart(const char *name) { roctxRangeStart(name); }
inline void traceStop(int ID) { roctxRangeStop(ID); }
#endif
#ifdef GRID_TRACING_TIMER
class GridTracer {
public:
const char *name;
double elapsed;
GridTracer(const char* _name) {
name = _name;
elapsed=-usecond();
}
~GridTracer() {
elapsed+=usecond();
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
}
};
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#endif
#ifdef GRID_TRACING_NONE
#define GRID_TRACE(name)
inline void tracePush(const char *name) { }
inline void tracePop(const char *name) { }
inline int traceStart(const char *name) { return 0; }
inline void traceStop(int ID) { }
#else
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
#endif
NAMESPACE_END(Grid);

View File

@ -16,8 +16,12 @@
#ifdef __NVCC__ #ifdef __NVCC__
#pragma push #pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#else
#pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" #pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#endif #endif
#endif
#include "pugixml.h" #include "pugixml.h"

View File

@ -451,9 +451,20 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
// Fermion <-> propagator assignements // Fermion <-> propagator assignements
////////////////////////////////////////////// //////////////////////////////////////////////
//template <class Prop, class Ferm> //template <class Prop, class Ferm>
#define FAST_FERM_TO_PROP
template <class Fimpl> template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{ {
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuWrite);
autoView(f_v,f,CpuRead);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.)
}}
});
#else
for(int j = 0; j < Ns; ++j) for(int j = 0; j < Ns; ++j)
{ {
auto pjs = peekSpin(p, j, s); auto pjs = peekSpin(p, j, s);
@ -465,12 +476,23 @@ void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::Fermio
} }
pokeSpin(p, pjs, j, s); pokeSpin(p, pjs, j, s);
} }
#endif
} }
//template <class Prop, class Ferm> //template <class Prop, class Ferm>
template <class Fimpl> template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{ {
#ifdef FAST_FERM_TO_PROP
autoView(p_v,p,CpuRead);
autoView(f_v,f,CpuWrite);
thread_for(idx,p_v.oSites(),{
for(int ss = 0; ss < Ns; ++ss) {
for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index
}}
});
#else
for(int j = 0; j < Ns; ++j) for(int j = 0; j < Ns; ++j)
{ {
auto pjs = peekSpin(p, j, s); auto pjs = peekSpin(p, j, s);
@ -482,6 +504,7 @@ void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::Propagato
} }
pokeSpin(f, fj, j); pokeSpin(f, fj, j);
} }
#endif
} }
////////////////////////////////////////////// //////////////////////////////////////////////

View File

@ -68,9 +68,16 @@ public:
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Support for MADWF tricks // Support for MADWF tricks
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
RealD Mass(void) { return mass; }; RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; };
RealD MassPlus(void) { return mass_plus; };
RealD MassMinus(void) { return mass_minus; };
void SetMass(RealD _mass) { void SetMass(RealD _mass) {
mass=_mass; mass_plus=mass_minus=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void SetMass(RealD _mass_plus, RealD _mass_minus) {
mass_plus=_mass_plus;
mass_minus=_mass_minus;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ; } ;
void P(const FermionField &psi, FermionField &chi); void P(const FermionField &psi, FermionField &chi);
@ -108,7 +115,7 @@ public:
void MeooeDag5D (const FermionField &in, FermionField &out); void MeooeDag5D (const FermionField &in, FermionField &out);
// protected: // protected:
RealD mass; RealD mass_plus, mass_minus;
// Save arguments to SetCoefficientsInternal // Save arguments to SetCoefficientsInternal
Vector<Coeff_t> _gamma; Vector<Coeff_t> _gamma;

View File

@ -0,0 +1,333 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h
Copyright (C) 2017 - 2022
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
////////////////////////////////////////////
// Standard Clover
// (4+m0) + csw * clover_term
// Exp Clover
// (4+m0) * exp(csw/(4+m0) clover_term)
// = (4+m0) + csw * clover_term + ...
////////////////////////////////////////////
NAMESPACE_BEGIN(Grid);
//////////////////////////////////
// Generic Standard Clover
//////////////////////////////////
template<class Impl>
class CloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) {
GridBase *grid = CloverTerm.Grid();
CloverTerm += diag_mass;
int lvol = grid->lSites();
int DimRep = Impl::Dimension;
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
EigenInvCloverOp = EigenCloverOp.inverse();
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Generic Exp Clover
//////////////////////////////////
template<class Impl>
class ExpCloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef WilsonCloverHelpers<Impl> Helpers;
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
Clover *= (1.0/diag_mass);
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
// prepare inverse
CloverInv = (-1.0)*Clover;
Clover = ExpClover * diag_mass;
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
CloverInv = ExpClover * (1.0/diag_mass);
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
//////////////////////////////////
// Compact Standard Clover
//////////////////////////////////
template<class Impl>
class CompactCloverHelpers: public CompactWilsonCloverHelpers<Impl>,
public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
Clover += diag_mass;
}
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
// TODO: implement Cmunu for better performances with compact layout, but don't do it
// here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Compact Exp Clover
//////////////////////////////////
template<class Impl>
class CompactExpCloverHelpers: public CompactWilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void InstantiateClover(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
Clover *= (1.0/diag_mass);
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
// prepare inverse
CloverInv = (-1.0)*Clover;
Clover = ExpClover * diag_mass;
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
CloverInv = ExpClover * (1.0/diag_mass);
}
static void InvertClover(CloverField& InvClover,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle,
CloverDiagonalField& diagonalInv,
CloverTriangleField& triangleInv,
bool fixedBoundaries) {
if (fixedBoundaries)
{
CompactHelpers::Invert(diagonal, triangle, diagonalInv, triangleInv);
}
else
{
CompactHelpers::ConvertLayout(InvClover, diagonalInv, triangleInv);
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
NAMESPACE_END(Grid);

View File

@ -31,6 +31,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h> #include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h> #include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid);
// + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site // + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site
// = 84 complex words per site // = 84 complex words per site
template<class Impl> template<class Impl, class CloverHelpers>
class CompactWilsonCloverFermion : public WilsonFermion<Impl>, class CompactWilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl>, public WilsonCloverHelpers<Impl>,
public CompactWilsonCloverHelpers<Impl> { public CompactWilsonCloverHelpers<Impl> {
@ -224,7 +225,7 @@ public:
RealD csw_t; RealD csw_t;
RealD cF; RealD cF;
bool open_boundaries; bool fixedBoundaries;
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd; CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd; CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;

View File

@ -138,38 +138,52 @@ typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD; typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
// Clover fermions // Clover fermions
typedef WilsonCloverFermion<WilsonImplR> WilsonCloverFermionR; template <typename WImpl> using WilsonClover = WilsonCloverFermion<WImpl, CloverHelpers<WImpl>>;
typedef WilsonCloverFermion<WilsonImplF> WilsonCloverFermionF; template <typename WImpl> using WilsonExpClover = WilsonCloverFermion<WImpl, ExpCloverHelpers<WImpl>>;
typedef WilsonCloverFermion<WilsonImplD> WilsonCloverFermionD;
typedef WilsonCloverFermion<WilsonAdjImplR> WilsonCloverAdjFermionR; typedef WilsonClover<WilsonImplR> WilsonCloverFermionR;
typedef WilsonCloverFermion<WilsonAdjImplF> WilsonCloverAdjFermionF; typedef WilsonClover<WilsonImplF> WilsonCloverFermionF;
typedef WilsonCloverFermion<WilsonAdjImplD> WilsonCloverAdjFermionD; typedef WilsonClover<WilsonImplD> WilsonCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR; typedef WilsonExpClover<WilsonImplR> WilsonExpCloverFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF; typedef WilsonExpClover<WilsonImplF> WilsonExpCloverFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD; typedef WilsonExpClover<WilsonImplD> WilsonExpCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR; typedef WilsonClover<WilsonAdjImplR> WilsonCloverAdjFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF; typedef WilsonClover<WilsonAdjImplF> WilsonCloverAdjFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD; typedef WilsonClover<WilsonAdjImplD> WilsonCloverAdjFermionD;
typedef WilsonClover<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD;
// Compact Clover fermions // Compact Clover fermions
typedef CompactWilsonCloverFermion<WilsonImplR> CompactWilsonCloverFermionR; template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>;
typedef CompactWilsonCloverFermion<WilsonImplF> CompactWilsonCloverFermionF; template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>;
typedef CompactWilsonCloverFermion<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonAdjImplR> CompactWilsonCloverAdjFermionR; typedef CompactWilsonClover<WilsonImplR> CompactWilsonCloverFermionR;
typedef CompactWilsonCloverFermion<WilsonAdjImplF> CompactWilsonCloverAdjFermionF; typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF;
typedef CompactWilsonCloverFermion<WilsonAdjImplD> CompactWilsonCloverAdjFermionD; typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR; typedef CompactWilsonExpClover<WilsonImplR> CompactWilsonExpCloverFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF; typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD; typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR; typedef CompactWilsonClover<WilsonAdjImplR> CompactWilsonCloverAdjFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF; typedef CompactWilsonClover<WilsonAdjImplF> CompactWilsonCloverAdjFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD; typedef CompactWilsonClover<WilsonAdjImplD> CompactWilsonCloverAdjFermionD;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD;
// Domain Wall fermions // Domain Wall fermions
typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;

View File

@ -32,6 +32,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h> #include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h> #include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid);
// csw_r = csw_t to recover the isotropic version // csw_r = csw_t to recover the isotropic version
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
template <class Impl> template<class Impl, class CloverHelpers>
class WilsonCloverFermion : public WilsonFermion<Impl>, class WilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl> public WilsonCloverHelpers<Impl>
{ {

View File

@ -209,6 +209,8 @@ public:
}; };
////////////////////////////////////////////////////////
template<class Impl> class CompactWilsonCloverHelpers { template<class Impl> class CompactWilsonCloverHelpers {
public: public:

View File

@ -47,8 +47,6 @@ class CompactWilsonCloverTypes {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions");
static constexpr int Nred = Nc * Nhs; // 6 static constexpr int Nred = Nc * Nhs; // 6
static constexpr int Nblock = Nhs; // 2 static constexpr int Nblock = Nhs; // 2
static constexpr int Ndiagonal = Nred; // 6 static constexpr int Ndiagonal = Nred; // 6

View File

@ -117,19 +117,19 @@ public:
typedef decltype(coalescedRead(*in)) sobj; typedef decltype(coalescedRead(*in)) sobj;
typedef decltype(coalescedRead(*out0)) hsobj; typedef decltype(coalescedRead(*out0)) hsobj;
unsigned int Nsimd = vobj::Nsimd(); constexpr unsigned int Nsimd = vobj::Nsimd();
unsigned int mask = Nsimd >> (type + 1); unsigned int mask = Nsimd >> (type + 1);
int lane = acceleratorSIMTlane(Nsimd); int lane = acceleratorSIMTlane(Nsimd);
int j0 = lane &(~mask); // inner coor zero int j0 = lane &(~mask); // inner coor zero
int j1 = lane |(mask) ; // inner coor one int j1 = lane |(mask) ; // inner coor one
const vobj *vp0 = &in[k]; const vobj *vp0 = &in[k]; // out0[j] = merge low bit of type from in[k] and in[m]
const vobj *vp1 = &in[m]; const vobj *vp1 = &in[m]; // out1[j] = merge hi bit of type from in[k] and in[m]
const vobj *vp = (lane&mask) ? vp1:vp0; const vobj *vp = (lane&mask) ? vp1:vp0;// if my lane has high bit take vp1, low bit take vp0
auto sa = coalescedRead(*vp,j0); auto sa = coalescedRead(*vp,j0); // lane to read for out 0, NB 50% read coalescing
auto sb = coalescedRead(*vp,j1); auto sb = coalescedRead(*vp,j1); // lane to read for out 1
hsobj psa, psb; hsobj psa, psb;
projector::Proj(psa,sa,mu,dag); projector::Proj(psa,sa,mu,dag); // spin project the result0
projector::Proj(psb,sb,mu,dag); projector::Proj(psb,sb,mu,dag); // spin project the result1
coalescedWrite(out0[j],psa); coalescedWrite(out0[j],psa);
coalescedWrite(out1[j],psb); coalescedWrite(out1[j],psb);
#else #else

View File

@ -47,7 +47,7 @@ CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
FiveDimRedBlackGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimGrid,
FourDimRedBlackGrid,_M5,p), FourDimRedBlackGrid,_M5,p),
mass(_mass) mass_plus(_mass), mass_minus(_mass)
{ {
} }
@ -209,8 +209,8 @@ void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
Vector<Coeff_t> diag (Ls,1.0); Vector<Coeff_t> diag (Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass; Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass_minus;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass; Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass_plus;
M5D(psi,chi,chi,lower,diag,upper); M5D(psi,chi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -220,8 +220,8 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
Vector<Coeff_t> diag = bs; Vector<Coeff_t> diag = bs;
Vector<Coeff_t> upper= cs; Vector<Coeff_t> upper= cs;
Vector<Coeff_t> lower= cs; Vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,Din,lower,diag,upper); M5D(psi,psi,Din,lower,diag,upper);
} }
// FIXME Redunant with the above routine; check this and eliminate // FIXME Redunant with the above routine; check this and eliminate
@ -235,8 +235,8 @@ template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &
upper[i]=-ceo[i]; upper[i]=-ceo[i];
lower[i]=-ceo[i]; lower[i]=-ceo[i];
} }
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper); M5D(psi,psi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -250,8 +250,8 @@ void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &
upper[i]=-cee[i]; upper[i]=-cee[i];
lower[i]=-cee[i]; lower[i]=-cee[i];
} }
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper); M5D(psi,psi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -266,9 +266,9 @@ void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &
// Assemble the 5d matrix // Assemble the 5d matrix
if ( s==0 ) { if ( s==0 ) {
upper[s] = -cee[s+1] ; upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1]; lower[s] = mass_minus*cee[Ls-1];
} else if ( s==(Ls-1)) { } else if ( s==(Ls-1)) {
upper[s] = mass*cee[0]; upper[s] = mass_plus*cee[0];
lower[s] = -cee[s-1]; lower[s] = -cee[s-1];
} else { } else {
upper[s]=-cee[s+1]; upper[s]=-cee[s+1];
@ -291,8 +291,8 @@ void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
Vector<Coeff_t> diag(Ls,1.0); Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); Vector<Coeff_t> upper(Ls,-1.0);
Vector<Coeff_t> lower(Ls,-1.0); Vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_plus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_minus*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper); M5Ddag(psi,chi,chi,lower,diag,upper);
} }
@ -307,9 +307,9 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
for (int s=0;s<Ls;s++){ for (int s=0;s<Ls;s++){
if ( s== 0 ) { if ( s== 0 ) {
upper[s] = cs[s+1]; upper[s] = cs[s+1];
lower[s] =-mass*cs[Ls-1]; lower[s] =-mass_minus*cs[Ls-1];
} else if ( s==(Ls-1) ) { } else if ( s==(Ls-1) ) {
upper[s] =-mass*cs[0]; upper[s] =-mass_plus*cs[0];
lower[s] = cs[s-1]; lower[s] = cs[s-1];
} else { } else {
upper[s] = cs[s+1]; upper[s] = cs[s+1];
@ -552,7 +552,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0]; leem[i]=mass_minus*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) { for(int j=0;j<i;j++) {
assert(bee[j+1]!=Coeff_t(0.0)); assert(bee[j+1]!=Coeff_t(0.0));
leem[i]*= aee[j]/bee[j+1]; leem[i]*= aee[j]/bee[j+1];
@ -560,7 +560,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass; ueem[i]=mass_plus;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0]; ueem[i]*= aee[0]/bee[0];
@ -573,7 +573,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
} }
{ {
Coeff_t delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass_minus*cee[Ls-1];
for(int j=0;j<Ls-1;j++) { for(int j=0;j<Ls-1;j++) {
assert(bee[j] != Coeff_t(0.0)); assert(bee[j] != Coeff_t(0.0));
delta_d *= cee[j]/bee[j]; delta_d *= cee[j]/bee[j];
@ -642,6 +642,10 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if (!defined(GRID_HIP)) #if (!defined(GRID_HIP))
Gamma::Algebra Gmu [] = { Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX, Gamma::Algebra::GammaX,
@ -777,6 +781,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
assert(mu>=0); assert(mu>=0);
assert(mu<Nd); assert(mu<Nd);
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if 0 #if 0
int tshift = (mu == Nd-1) ? 1 : 0; int tshift = (mu == Nd-1) ? 1 : 0;

View File

@ -32,22 +32,23 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> #include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> template<class Impl, class CloverHelpers>
CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu, CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid, GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid, GridRedBlackCartesian& Hgrid,
const RealD _mass, const RealD _mass,
const RealD _csw_r, const RealD _csw_r,
const RealD _csw_t, const RealD _csw_t,
const RealD _cF, const RealD _cF,
const WilsonAnisotropyCoefficients& clover_anisotropy, const WilsonAnisotropyCoefficients& clover_anisotropy,
const ImplParams& impl_p) const ImplParams& impl_p)
: WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy) : WilsonBase(_Umu, Fgrid, Hgrid, _mass, impl_p, clover_anisotropy)
, csw_r(_csw_r) , csw_r(_csw_r)
, csw_t(_csw_t) , csw_t(_csw_t)
, cF(_cF) , cF(_cF)
, open_boundaries(impl_p.boundary_phases[Nd-1] == 0.0) , fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, Diagonal(&Fgrid), Triangle(&Fgrid) , Diagonal(&Fgrid), Triangle(&Fgrid)
, DiagonalEven(&Hgrid), TriangleEven(&Hgrid) , DiagonalEven(&Hgrid), TriangleEven(&Hgrid)
, DiagonalOdd(&Hgrid), TriangleOdd(&Hgrid) , DiagonalOdd(&Hgrid), TriangleOdd(&Hgrid)
@ -58,80 +59,85 @@ CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu,
, BoundaryMask(&Fgrid) , BoundaryMask(&Fgrid)
, BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid) , BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid)
{ {
assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3);
csw_r *= 0.5; csw_r *= 0.5;
csw_t *= 0.5; csw_t *= 0.5;
if (clover_anisotropy.isAnisotropic) if (clover_anisotropy.isAnisotropic)
csw_r /= clover_anisotropy.xi_0; csw_r /= clover_anisotropy.xi_0;
ImportGauge(_Umu); ImportGauge(_Umu);
if (open_boundaries) if (fixedBoundaries) {
this->BoundaryMaskEven.Checkerboard() = Even;
this->BoundaryMaskOdd.Checkerboard() = Odd;
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
}
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Dhop(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
WilsonBase::Dhop(in, out, dag); WilsonBase::Dhop(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopOE(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopOE(in, out, dag); WilsonBase::DhopOE(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopEO(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopEO(in, out, dag); WilsonBase::DhopEO(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
WilsonBase::DhopDir(in, out, dir, disp); WilsonBase::DhopDir(in, out, dir, disp);
if(this->open_boundaries) ApplyBoundaryMask(out); if(this->fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
WilsonBase::DhopDirAll(in, out); WilsonBase::DhopDirAll(in, out);
if(this->open_boundaries) { if(this->fixedBoundaries) {
for(auto& o : out) ApplyBoundaryMask(o); for(auto& o : out) ApplyBoundaryMask(o);
} }
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::M(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
Mooee(in, Tmp); Mooee(in, Tmp);
axpy(out, 1.0, out, Tmp); axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mdag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
MooeeDag(in, Tmp); MooeeDag(in, Tmp);
axpy(out, 1.0, out, Tmp); axpy(out, 1.0, out, Tmp);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Meooe(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
WilsonBase::Meooe(in, out); WilsonBase::Meooe(in, out);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MeooeDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
WilsonBase::MeooeDag(in, out); WilsonBase::MeooeDag(in, out);
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) { if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) { if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalOdd, TriangleOdd); MooeeInternal(in, out, DiagonalOdd, TriangleOdd);
@ -141,16 +147,16 @@ void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionFiel
} else { } else {
MooeeInternal(in, out, Diagonal, Triangle); MooeeInternal(in, out, Diagonal, Triangle);
} }
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField& in, FermionField& out) {
Mooee(in, out); // blocks are hermitian Mooee(in, out); // blocks are hermitian
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) { if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) { if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd);
@ -160,27 +166,27 @@ void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionF
} else { } else {
MooeeInternal(in, out, DiagonalInv, TriangleInv); MooeeInternal(in, out, DiagonalInv, TriangleInv);
} }
if(open_boundaries) ApplyBoundaryMask(out); if(fixedBoundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInvDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField& in, FermionField& out) {
MooeeInv(in, out); // blocks are hermitian MooeeInv(in, out); // blocks are hermitian
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
DhopDir(in, out, dir, disp); DhopDir(in, out, dir, disp);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MdirAll(const FermionField& in, std::vector<FermionField>& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
DhopDirAll(in, out); DhopDirAll(in, out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!open_boundaries); // TODO check for changes required for open bc assert(!fixedBoundaries); // TODO check for changes required for open bc
// NOTE: code copied from original clover term // NOTE: code copied from original clover term
conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), Y.Grid());
@ -251,7 +257,7 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
} }
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++; count++;
} }
@ -261,18 +267,18 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
force += clover_force; force += clover_force;
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0); assert(0);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0); assert(0);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField& in, void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField& in,
FermionField& out, FermionField& out,
const CloverDiagonalField& diagonal, const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle) { const CloverTriangleField& triangle) {
@ -285,8 +291,8 @@ void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField&
CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle); CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField& _Umu) {
// NOTE: parts copied from original implementation // NOTE: parts copied from original implementation
// Import gauge into base class // Import gauge into base class
@ -299,6 +305,7 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
GridBase* grid = _Umu.Grid(); GridBase* grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
CloverField TmpOriginal(grid); CloverField TmpOriginal(grid);
CloverField TmpInverse(grid);
// Compute the field strength terms mu>nu // Compute the field strength terms mu>nu
double t2 = usecond(); double t2 = usecond();
@ -318,22 +325,30 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
TmpOriginal += this->diag_mass;
// Instantiate the clover term
// - In case of the standard clover the mass term is added
// - In case of the exponential clover the clover term is exponentiated
double t4 = usecond();
CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, this->diag_mass);
// Convert the data layout of the clover term // Convert the data layout of the clover term
double t4 = usecond(); double t5 = usecond();
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
// Possible modify the boundary values // Modify the clover term at the temporal boundaries in case of open boundary conditions
double t5 = usecond();
if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the clover term in the improved layout
double t6 = usecond(); double t6 = usecond();
CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the Clover term
// In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved
// in TmpInverse can be used. In all other cases the clover term has to be explictly inverted.
// TODO: For now this inversion is explictly done on the CPU
double t7 = usecond();
CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries);
// Fill the remaining clover fields // Fill the remaining clover fields
double t7 = usecond(); double t8 = usecond();
pickCheckerboard(Even, DiagonalEven, Diagonal); pickCheckerboard(Even, DiagonalEven, Diagonal);
pickCheckerboard(Even, TriangleEven, Triangle); pickCheckerboard(Even, TriangleEven, Triangle);
pickCheckerboard(Odd, DiagonalOdd, Diagonal); pickCheckerboard(Odd, DiagonalOdd, Diagonal);
@ -344,20 +359,19 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); pickCheckerboard(Odd, TriangleInvOdd, TriangleInv);
// Report timings // Report timings
double t8 = usecond(); double t9 = usecond();
#if 0
std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:" std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl;
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
<< ", allocations = " << (t2 - t1) / 1e6 std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
<< ", field strength = " << (t3 - t2) / 1e6 std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
<< ", fill clover = " << (t4 - t3) / 1e6 std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
<< ", convert = " << (t5 - t4) / 1e6 std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl;
<< ", boundaries = " << (t6 - t5) / 1e6 std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl;
<< ", inversions = " << (t7 - t6) / 1e6 std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl;
<< ", pick cbs = " << (t8 - t7) / 1e6 std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl;
<< ", total = " << (t8 - t0) / 1e6 std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
<< std::endl; std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
#endif
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -34,8 +34,8 @@
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> template<class Impl, class CloverHelpers>
WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField& _Umu, WilsonCloverFermion<Impl, CloverHelpers>::WilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid, GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid, GridRedBlackCartesian& Hgrid,
const RealD _mass, const RealD _mass,
@ -74,8 +74,8 @@ WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField&
} }
// *NOT* EO // *NOT* EO
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField &in, FermionField &out)
{ {
FermionField temp(out.Grid()); FermionField temp(out.Grid());
@ -89,8 +89,8 @@ void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
out += temp; out += temp;
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField &in, FermionField &out)
{ {
FermionField temp(out.Grid()); FermionField temp(out.Grid());
@ -104,8 +104,8 @@ void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
out += temp; out += temp;
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu) void WilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField &_Umu)
{ {
double t0 = usecond(); double t0 = usecond();
WilsonFermion<Impl>::ImportGauge(_Umu); WilsonFermion<Impl>::ImportGauge(_Umu);
@ -131,47 +131,11 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
CloverTerm += Helpers::fillCloverXT(Ex) * csw_t; CloverTerm += Helpers::fillCloverXT(Ex) * csw_t;
CloverTerm += Helpers::fillCloverYT(Ey) * csw_t; CloverTerm += Helpers::fillCloverYT(Ey) * csw_t;
CloverTerm += Helpers::fillCloverZT(Ez) * csw_t; CloverTerm += Helpers::fillCloverZT(Ez) * csw_t;
CloverTerm += diag_mass;
double t4 = usecond(); double t4 = usecond();
int lvol = _Umu.Grid()->lSites(); CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass);
int DimRep = Impl::Dimension;
double t5 = usecond(); double t5 = usecond();
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();
//std::cout << EigenInvCloverOp << std::endl;
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
// }
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
double t6 = usecond();
// Separate the even and odd parts // Separate the even and odd parts
pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Even, CloverTermEven, CloverTerm);
pickCheckerboard(Odd, CloverTermOdd, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
@ -184,48 +148,44 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
double t7 = usecond(); double t6 = usecond();
#if 0 std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl;
std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
<< ", allocations = " << (t2 - t1) / 1e6 std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
<< ", field strength = " << (t3 - t2) / 1e6 std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
<< ", fill clover = " << (t4 - t3) / 1e6 std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl;
<< ", misc = " << (t5 - t4) / 1e6 std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl;
<< ", inversions = " << (t6 - t5) / 1e6 std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl;
<< ", pick cbs = " << (t7 - t6) / 1e6
<< ", total = " << (t7 - t0) / 1e6
<< std::endl;
#endif
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerNo, InverseNo); this->MooeeInternal(in, out, DaggerNo, InverseNo);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerYes, InverseNo); this->MooeeInternal(in, out, DaggerYes, InverseNo);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerNo, InverseYes); this->MooeeInternal(in, out, DaggerNo, InverseYes);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerYes, InverseYes); this->MooeeInternal(in, out, DaggerYes, InverseYes);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
{ {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
CloverField *Clover; CloverField *Clover;
@ -278,8 +238,8 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
} // MooeeInternal } // MooeeInternal
// Derivative parts unpreconditioned pseudofermions // Derivative parts unpreconditioned pseudofermions
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
{ {
conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid()); conformable(X.Grid(), force.Grid());
@ -349,7 +309,7 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
} }
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++; count++;
} }
@ -360,15 +320,15 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
} }
// Derivative parts // Derivative parts
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
{ {
assert(0); assert(0);
} }
// Derivative parts // Derivative parts
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
{ {
assert(0); // not implemented yet assert(0); // not implemented yet
} }

View File

@ -4,12 +4,13 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
Copyright (C) 2015 Copyright (C) 2022
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -599,11 +600,47 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid()); conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
assert(0); auto UGrid= this->GaugeGrid();
PropagatorField tmp_shifted(UGrid);
PropagatorField g5Lg5(UGrid);
PropagatorField R(UGrid);
PropagatorField gmuR(UGrid);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
g5Lg5=g5*q_in_1*g5;
tmp_shifted=Cshift(q_in_2,mu,1);
Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
gmuR=gmu*R;
q_out=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
tmp_shifted=Cshift(q_in_1,mu,1);
Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
g5Lg5=g5*g5Lg5*g5;
R=q_in_2;
gmuR=gmu*R;
q_out-=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
} }
@ -617,9 +654,51 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx) ComplexField &lattice_cmplx)
{ {
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
int tshift = (mu == Nd-1) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
conformable(_grid, q_in.Grid()); conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
assert(0); auto UGrid= this->GaugeGrid();
PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid);
PropagatorField L(UGrid);
PropagatorField zz (UGrid);
zz=Zero();
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
tmp = Cshift(q_in,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
tmp = q_in *lattice_cmplx;
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -( Utmp + gmu*Utmp );
// Mask the time
if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
unsigned int t0 = 0;
tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
} else {
tmp = where((lcoor>=tmin+tshift),tmp,zz);
}
q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -498,6 +498,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
#ifndef GRID_CUDA #ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
#endif #endif
acceleratorFenceComputeStream();
} else if( interior ) { } else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
@ -505,11 +506,13 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif #endif
} else if( exterior ) { } else if( exterior ) {
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA #ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif #endif
acceleratorFenceComputeStream();
} }
assert(0 && " Kernel optimisation case not covered "); assert(0 && " Kernel optimisation case not covered ");
} }

View File

@ -9,6 +9,7 @@
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com> Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -32,10 +33,12 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> #include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h> #include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#include "impl.h" #include "impl.h"
template class CompactWilsonCloverFermion<IMPLEMENTATION>; template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactCloverHelpers<IMPLEMENTATION>>;
template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -8,7 +8,8 @@
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -31,10 +32,12 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> #include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h> #include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#include "impl.h" #include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>; template class WilsonCloverFermion<IMPLEMENTATION, CloverHelpers<IMPLEMENTATION>>;
template class WilsonCloverFermion<IMPLEMENTATION, ExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD " GparityWilsonImplD "
COMPACT_WILSON_IMPL_LIST=" \
WilsonImplF \
WilsonImplD "
DWF_IMPL_LIST=" \ DWF_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
@ -40,13 +44,23 @@ EOF
done done
CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation"
for impl in $WILSON_IMPL_LIST for impl in $WILSON_IMPL_LIST
do do
for f in $CC_LIST for f in $CC_LIST
do do
ln -f -s ../$f.cc.master $impl/$f$impl.cc ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done
CC_LIST="CompactWilsonCloverFermionInstantiation"
for impl in $COMPACT_WILSON_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done done
done done
@ -63,14 +77,14 @@ for impl in $DWF_IMPL_LIST $GDWF_IMPL_LIST
do do
for f in $CC_LIST for f in $CC_LIST
do do
ln -f -s ../$f.cc.master $impl/$f$impl.cc ln -f -s ../$f.cc.master $impl/$f$impl.cc
done done
done done
# overwrite the .cc file in Gparity directories # overwrite the .cc file in Gparity directories
for impl in $GDWF_IMPL_LIST for impl in $GDWF_IMPL_LIST
do do
ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc ln -f -s ../WilsonKernelsInstantiationGparity.cc.master $impl/WilsonKernelsInstantiation$impl.cc
done done
@ -84,7 +98,7 @@ for impl in $STAG_IMPL_LIST
do do
for f in $CC_LIST for f in $CC_LIST
do do
ln -f -s ../$f.cc.master $impl/$f$impl.cc ln -f -s ../$f.cc.master $impl/$f$impl.cc
done done
done done

View File

@ -49,7 +49,7 @@ NAMESPACE_BEGIN(Grid);
typedef Lattice<SiteLink> LinkField; typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
typedef Field ComplexField; typedef LinkField ComplexField;
}; };
typedef QedGImpl<vComplex> QedGImplR; typedef QedGImpl<vComplex> QedGImplR;

View File

@ -55,12 +55,12 @@ public:
} }
} }
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid(); GridBase *grid = Umu.Grid();
GaugeMat xform(grid); GaugeMat xform(grid);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog); SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
} }
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid(); GridBase *grid = Umu.Grid();
@ -122,6 +122,8 @@ public:
} }
} }
std::cout << GridLogError << "Gauge fixing did not converge in " << maxiter << " iterations." << std::endl;
if (err_on_no_converge) assert(0);
}; };
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) { static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
GridBase *grid = U[0].Grid(); GridBase *grid = U[0].Grid();

View File

@ -125,7 +125,6 @@ public:
return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME return sumplaq / vol / faces / Nc; // Nd , Nc dependent... FIXME
} }
////////////////////////////////////////////////// //////////////////////////////////////////////////
// average over all x,y,z the temporal loop // average over all x,y,z the temporal loop
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -165,7 +164,7 @@ public:
double vol = Umu.Grid()->gSites(); double vol = Umu.Grid()->gSites();
return p.real() / vol / 4.0 / 3.0; return p.real() / vol / (4.0 * Nc ) ;
}; };
////////////////////////////////////////////////// //////////////////////////////////////////////////

View File

@ -26,7 +26,7 @@
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) #ifndef GRID_HIP
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -82,7 +82,7 @@ void JSONWriter::writeDefault(const std::string &s, const std::string &x)
if (s.size()) if (s.size())
ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ; ss_ << "\""<< s << "\" : \"" << os.str() << "\" ," ;
else else
ss_ << os.str() << " ," ; ss_ << "\""<< os.str() << "\" ," ;
} }
// Reader implementation /////////////////////////////////////////////////////// // Reader implementation ///////////////////////////////////////////////////////

View File

@ -54,7 +54,7 @@ namespace Grid
void pop(void); void pop(void);
template <typename U> template <typename U>
void writeDefault(const std::string &s, const U &x); void writeDefault(const std::string &s, const U &x);
#ifdef __NVCC__ #if defined(GRID_CUDA) || defined(GRID_HIP)
void writeDefault(const std::string &s, const Grid::ComplexD &x) void writeDefault(const std::string &s, const Grid::ComplexD &x)
{ {
std::complex<double> z(real(x),imag(x)); std::complex<double> z(real(x),imag(x));
@ -101,7 +101,7 @@ namespace Grid
void readDefault(const std::string &s, std::vector<U> &output); void readDefault(const std::string &s, std::vector<U> &output);
template <typename U, typename P> template <typename U, typename P>
void readDefault(const std::string &s, std::pair<U,P> &output); void readDefault(const std::string &s, std::pair<U,P> &output);
#ifdef __NVCC__ #if defined(GRID_CUDA) || defined(GRID_HIP)
void readDefault(const std::string &s, ComplexD &output) void readDefault(const std::string &s, ComplexD &output)
{ {
std::complex<double> z; std::complex<double> z;

View File

@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include "BinaryIO.h" #include "BinaryIO.h"
#include "TextIO.h" #include "TextIO.h"
#include "XmlIO.h" #include "XmlIO.h"
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) #ifndef GRID_HIP
#include "JSON_IO.h" #include "JSON_IO.h"
#endif #endif

View File

@ -80,11 +80,14 @@ void Gather_plane_simple_table (commVector<std::pair<int,int> >& table,const Lat
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> template<class cobj,class vobj,class compressor>
void Gather_plane_exchange_table(const Lattice<vobj> &rhs, void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
commVector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline)); commVector<cobj *> pointers,
int dimension,int plane,
int cbmask,compressor &compress,int type) __attribute__((noinline));
template<class cobj,class vobj,class compressor> template<class cobj,class vobj,class compressor>
void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const Lattice<vobj> &rhs, void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,
Vector<cobj *> pointers,int dimension,int plane,int cbmask, const Lattice<vobj> &rhs,
std::vector<cobj *> &pointers,int dimension,int plane,int cbmask,
compressor &compress,int type) compressor &compress,int type)
{ {
assert( (table.size()&0x1)==0); assert( (table.size()&0x1)==0);
@ -92,14 +95,15 @@ void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,const La
int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
auto rhs_v = rhs.View(AcceleratorRead); auto rhs_v = rhs.View(AcceleratorRead);
auto rhs_p = &rhs_v[0];
auto p0=&pointers[0][0]; auto p0=&pointers[0][0];
auto p1=&pointers[1][0]; auto p1=&pointers[1][0];
auto tp=&table[0]; auto tp=&table[0];
accelerator_forNB(j, num, vobj::Nsimd(), { accelerator_forNB(j, num, vobj::Nsimd(), {
compress.CompressExchange(p0,p1, &rhs_v[0], j, compress.CompressExchange(p0,p1, rhs_p, j,
so+tp[2*j ].second, so+tp[2*j ].second,
so+tp[2*j+1].second, so+tp[2*j+1].second,
type); type);
}); });
rhs_v.ViewClose(); rhs_v.ViewClose();
} }
@ -230,8 +234,8 @@ public:
}; };
struct Merge { struct Merge {
cobj * mpointer; cobj * mpointer;
Vector<scalar_object *> rpointers; // std::vector<scalar_object *> rpointers;
Vector<cobj *> vpointers; std::vector<cobj *> vpointers;
Integer buffer_size; Integer buffer_size;
Integer type; Integer type;
}; };
@ -406,6 +410,7 @@ public:
comms_bytes+=bytes; comms_bytes+=bytes;
shm_bytes +=2*Packets[i].bytes-bytes; shm_bytes +=2*Packets[i].bytes-bytes;
} }
_grid->StencilBarrier();// Synch shared memory on a single nodes
} }
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs) void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
@ -420,7 +425,7 @@ public:
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
void Communicate(void) void Communicate(void)
{ {
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){ if ( 0 ){
thread_region { thread_region {
// must be called in parallel region // must be called in parallel region
int mythread = thread_num(); int mythread = thread_num();
@ -569,7 +574,7 @@ public:
d.buffer_size = buffer_size; d.buffer_size = buffer_size;
dv.push_back(d); dv.push_back(d);
} }
void AddMerge(cobj *merge_p,Vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) { void AddMerge(cobj *merge_p,std::vector<cobj *> &rpointers,Integer buffer_size,Integer type,std::vector<Merge> &mv) {
Merge m; Merge m;
m.type = type; m.type = type;
m.mpointer = merge_p; m.mpointer = merge_p;
@ -582,6 +587,7 @@ public:
} }
template<class decompressor> void CommsMergeSHM(decompressor decompress) { template<class decompressor> void CommsMergeSHM(decompressor decompress) {
mpi3synctime-=usecond(); mpi3synctime-=usecond();
accelerator_barrier();
_grid->StencilBarrier();// Synch shared memory on a single nodes _grid->StencilBarrier();// Synch shared memory on a single nodes
mpi3synctime+=usecond(); mpi3synctime+=usecond();
shmmergetime-=usecond(); shmmergetime-=usecond();
@ -1114,8 +1120,8 @@ public:
int bytes = (reduced_buffer_size*datum_bytes)/simd_layout; int bytes = (reduced_buffer_size*datum_bytes)/simd_layout;
assert(bytes*simd_layout == reduced_buffer_size*datum_bytes); assert(bytes*simd_layout == reduced_buffer_size*datum_bytes);
Vector<cobj *> rpointers(maxl); std::vector<cobj *> rpointers(maxl);
Vector<cobj *> spointers(maxl); std::vector<cobj *> spointers(maxl);
/////////////////////////////////////////// ///////////////////////////////////////////
// Work out what to send where // Work out what to send where

View File

@ -195,12 +195,15 @@ void acceleratorInit(void)
#ifdef GRID_SYCL #ifdef GRID_SYCL
cl::sycl::queue *theGridAccelerator; cl::sycl::queue *theGridAccelerator;
cl::sycl::queue *theCopyAccelerator;
void acceleratorInit(void) void acceleratorInit(void)
{ {
int nDevices = 1; int nDevices = 1;
cl::sycl::gpu_selector selector; cl::sycl::gpu_selector selector;
cl::sycl::device selectedDevice { selector }; cl::sycl::device selectedDevice { selector };
theGridAccelerator = new sycl::queue (selectedDevice); theGridAccelerator = new sycl::queue (selectedDevice);
// theCopyAccelerator = new sycl::queue (selectedDevice);
theCopyAccelerator = theGridAccelerator; // Should proceed concurrenlty anyway.
#ifdef GRID_SYCL_LEVEL_ZERO_IPC #ifdef GRID_SYCL_LEVEL_ZERO_IPC
zeInit(0); zeInit(0);

View File

@ -247,7 +247,6 @@ inline int acceleratorIsCommunicable(void *ptr)
////////////////////////////////////////////// //////////////////////////////////////////////
// SyCL acceleration // SyCL acceleration
////////////////////////////////////////////// //////////////////////////////////////////////
#ifdef GRID_SYCL #ifdef GRID_SYCL
NAMESPACE_END(Grid); NAMESPACE_END(Grid);
#include <CL/sycl.hpp> #include <CL/sycl.hpp>
@ -262,6 +261,7 @@ NAMESPACE_END(Grid);
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
extern cl::sycl::queue *theGridAccelerator; extern cl::sycl::queue *theGridAccelerator;
extern cl::sycl::queue *theCopyAccelerator;
#ifdef __SYCL_DEVICE_ONLY__ #ifdef __SYCL_DEVICE_ONLY__
#define GRID_SIMT #define GRID_SIMT
@ -289,7 +289,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
cgh.parallel_for( \ cgh.parallel_for( \
cl::sycl::nd_range<3>(global,local), \ cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \ [=] (cl::sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(8)]] \ [[intel::reqd_sub_group_size(16)]] \
{ \ { \
auto iter1 = item.get_global_id(0); \ auto iter1 = item.get_global_id(0); \
auto iter2 = item.get_global_id(1); \ auto iter2 = item.get_global_id(1); \
@ -298,19 +298,19 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
}); \ }); \
}); });
#define accelerator_barrier(dummy) theGridAccelerator->wait(); #define accelerator_barrier(dummy) { theGridAccelerator->wait(); }
inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);}; inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);};
inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);}; inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);};
inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);}; inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) {
theGridAccelerator->memcpy(to,from,bytes); inline void acceleratorCopySynchronise(void) { theCopyAccelerator->wait(); }
} inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
inline void acceleratorCopySynchronise(void) { theGridAccelerator->wait(); std::cout<<"acceleratorCopySynchronise() wait "<<std::endl; } inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theGridAccelerator->memset(base,value,bytes); theGridAccelerator->wait();}
inline int acceleratorIsCommunicable(void *ptr) inline int acceleratorIsCommunicable(void *ptr)
{ {
#if 0 #if 0
@ -511,7 +511,16 @@ inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,
inline void acceleratorFreeCpu (void *ptr){free(ptr);}; inline void acceleratorFreeCpu (void *ptr){free(ptr);};
#endif #endif
//////////////////////////////////////////////
// Fencing needed ONLY for SYCL
//////////////////////////////////////////////
#ifdef GRID_SYCL
inline void acceleratorFenceComputeStream(void){ accelerator_barrier();};
#else
// Ordering within a stream guaranteed on Nvidia & AMD
inline void acceleratorFenceComputeStream(void){ };
#endif
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// Synchronise across local threads for divergence resynch // Synchronise across local threads for divergence resynch

View File

@ -27,6 +27,7 @@
/* END LEGAL */ /* END LEGAL */
extern "C" { extern "C" {
#include <openssl/sha.h> #include <openssl/sha.h>
#include <openssl/evp.h>
} }
#ifdef USE_IPP #ifdef USE_IPP
#include "ipp.h" #include "ipp.h"
@ -70,10 +71,8 @@ public:
static inline std::vector<unsigned char> sha256(const void *data,size_t bytes) static inline std::vector<unsigned char> sha256(const void *data,size_t bytes)
{ {
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH);
SHA256_CTX sha256; auto digest = EVP_get_digestbyname("SHA256");
SHA256_Init (&sha256); EVP_Digest(data, bytes, &hash[0], NULL, digest, NULL);
SHA256_Update(&sha256, data,bytes);
SHA256_Final (&hash[0], &sha256);
return hash; return hash;
} }
static inline std::vector<int> sha256_seeds(const std::string &s) static inline std::vector<int> sha256_seeds(const std::string &s)

View File

@ -148,7 +148,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional). - `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
- `--enable-numa`: enable NUMA first touch optimisation - `--enable-numa`: enable NUMA first touch optimisation
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. - `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 64 bytes).
- `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.

View File

@ -0,0 +1,131 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#ifdef GRID_CUDA
#define CUDA_PROFILE
#endif
#ifdef CUDA_PROFILE
#include <cuda_profiler_api.h>
#endif
using namespace std;
using namespace Grid;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt4= GridDefaultLatt();
Coordinate mpi = GridDefaultMpi();
Coordinate simd = GridDefaultSimd(Nd,vComplexF::Nsimd());
GridLogLayout();
int Ls=16;
for(int i=0;i<argc;i++)
if(std::string(argv[i]) == "-Ls"){
std::stringstream ss(argv[i+1]); ss >> Ls;
}
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd ,mpi);
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermionF src (FGrid); random(RNG5,src);
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeFieldF Umu(UGrid);
SU<Nc>::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
RealD mass=0.1;
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
const int ncall = 500;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionF::HaloGatherOpt "<<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
{
typename DomainWallFermionF::Compressor compressor(0);
FGrid->Barrier();
Dw.Stencil.HaloExchangeOptGather(src,compressor);
double t0=usecond();
for(int i=0;i<ncall;i++){
Dw.Stencil.HaloExchangeOptGather(src,compressor);
}
double t1=usecond();
FGrid->Barrier();
double bytes=0.0;
if(mpi[0]) bytes+=latt4[1]*latt4[2]*latt4[3];
if(mpi[1]) bytes+=latt4[0]*latt4[2]*latt4[3];
if(mpi[2]) bytes+=latt4[0]*latt4[1]*latt4[3];
if(mpi[3]) bytes+=latt4[0]*latt4[1]*latt4[2];
bytes = bytes * Ls * 8.* (24.+12.)* 2.0;
std::cout<<GridLogMessage << "Gather us /call = "<< (t1-t0)/ncall<<std::endl;
std::cout<<GridLogMessage << "Gather MBs /call = "<< bytes*ncall/(t1-t0)<<std::endl;
}
Grid_finalize();
exit(0);
}

View File

@ -81,8 +81,8 @@ int main (int argc, char ** argv)
Vector<Coeff_t> diag = Dw.bs; Vector<Coeff_t> diag = Dw.bs;
Vector<Coeff_t> upper= Dw.cs; Vector<Coeff_t> upper= Dw.cs;
Vector<Coeff_t> lower= Dw.cs; Vector<Coeff_t> lower= Dw.cs;
upper[Ls-1]=-Dw.mass*upper[Ls-1]; upper[Ls-1]=-Dw.mass_minus*upper[Ls-1];
lower[0] =-Dw.mass*lower[0]; lower[0] =-Dw.mass_plus*lower[0];
LatticeFermion r_eo(FGrid); LatticeFermion r_eo(FGrid);
LatticeFermion src_e (FrbGrid); LatticeFermion src_e (FrbGrid);

View File

@ -44,6 +44,13 @@ void bench_wilson (
double const volume, double const volume,
int const dag ); int const dag );
void bench_wilson_eo (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag );
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -110,8 +117,8 @@ int main (int argc, char ** argv)
bench_wilson(src,result,Dw,volume,DaggerYes); bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << "\t"; std::cout << "\t";
// EO // EO
bench_wilson(src,result,Dw,volume,DaggerNo); bench_wilson_eo(src_o,result_e,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes); bench_wilson_eo(src_o,result_e,Dw,volume,DaggerYes);
std::cout << std::endl; std::cout << std::endl;
} }
} }

View File

@ -159,7 +159,7 @@ case ${ac_ZMOBIUS} in
esac esac
############### Nc ############### Nc
AC_ARG_ENABLE([Nc], AC_ARG_ENABLE([Nc],
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])], [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])],
[ac_Nc=${enable_Nc}], [ac_Nc=3]) [ac_Nc=${enable_Nc}], [ac_Nc=3])
case ${ac_Nc} in case ${ac_Nc} in

View File

@ -0,0 +1,62 @@
#!/bin/sh
##SBATCH -p PVC-SPR-QZEH
##SBATCH -p PVC-ICX-QZNW
#SBATCH -p QZ1J-ICX-PVC
##SBATCH -p QZ1J-SPR-PVC-2C
source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh
export NT=8
export I_MPI_OFFLOAD=1
export I_MPI_OFFLOAD_TOPOLIB=level_zero
export I_MPI_OFFLOAD_DOMAIN_SIZE=-1
# export IGC_EnableLSCFenceUGMBeforeEOT=0
# export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file=False"
export SYCL_DEVICE_FILTER=gpu,level_zero
#export IGC_ShaderDumpEnable=1
#export IGC_DumpToCurrentDir=1
export I_MPI_OFFLOAD_CELL=tile
export EnableImplicitScaling=0
export EnableWalkerPartition=0
export ZE_AFFINITY_MASK=0.0
mpiexec -launcher ssh -n 1 -host localhost ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1 --device-mem 32768
export ZE_AFFINITY_MASK=0
export I_MPI_OFFLOAD_CELL=device
export EnableImplicitScaling=1
export EnableWalkerPartition=1
#mpiexec -launcher ssh -n 2 -host localhost vtune -collect gpu-hotspots -knob gpu-sampling-interval=1 -data-limit=0 -r ./vtune_run4 -- ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1
#mpiexec -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1
#mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 1
#mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-overlap --shm-mpi 1
#mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0
#mpirun -np 2 ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.32.32.64 --accelerator-threads $NT --comms-sequential --shm-mpi 0
#mpirun -np 2 ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --comms-sequential --shm-mpi 1

View File

@ -0,0 +1,34 @@
#!/bin/bash
##SBATCH -p PVC-SPR-QZEH
##SBATCH -p PVC-ICX-QZNW
#SBATCH -p QZ1J-ICX-PVC
source /nfs/site/home/paboylex/ATS/GridNew/Grid/systems/PVC-nightly/setup.sh
export NT=16
# export IGC_EnableLSCFenceUGMBeforeEOT=0
# export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file=False"
#export IGC_ShaderDumpEnable=1
#export IGC_DumpToCurrentDir=1
export I_MPI_OFFLOAD=1
export I_MPI_OFFLOAD_TOPOLIB=level_zero
export I_MPI_OFFLOAD_DOMAIN_SIZE=-1
export SYCL_DEVICE_FILTER=gpu,level_zero
export I_MPI_OFFLOAD_CELL=tile
export EnableImplicitScaling=0
export EnableWalkerPartition=0
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
for i in 0
do
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
done
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log

14
systems/PVC/benchmarks/wrap.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/sh
export ZE_AFFINITY_MASK=0.$MPI_LOCALRANKID
echo Ranke $MPI_LOCALRANKID ZE_AFFINITY_MASK is $ZE_AFFINITY_MASK
if [ $MPI_LOCALRANKID = "0" ]
then
# ~psteinbr/build_pti/ze_tracer -h $@
onetrace --chrome-device-timeline $@
else
$@
fi

View File

@ -0,0 +1,16 @@
INSTALL=/nfs/site/home/azusayax/install
../../configure \
--enable-simd=GPU \
--enable-gen-simd-width=64 \
--enable-comms=mpi-auto \
--disable-accelerator-cshift \
--disable-gparity \
--disable-fermion-reps \
--enable-shm=nvlink \
--enable-accelerator=sycl \
--enable-unified=no \
MPICXX=mpicxx \
CXX=dpcpp \
LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \
CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare"

11
systems/PVC/setup.sh Normal file
View File

@ -0,0 +1,11 @@
export https_proxy=http://proxy-chain.intel.com:911
export LD_LIBRARY_PATH=/nfs/site/home/azusayax/install/lib:$LD_LIBRARY_PATH
module load intel-release
source /opt/intel/oneapi/PVC_setup.sh
#source /opt/intel/oneapi/ATS_setup.sh
module load intel/mpich/pvc45.3
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH
#clsh embargo-ci-neo-022845
#source /opt/intel/vtune_amplifier/amplxe-vars.sh

View File

@ -1 +1 @@
CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi --enable-unified=no

View File

@ -147,7 +147,7 @@ int main (int argc, char ** argv)
Complex p = TensorRemove(Tp); Complex p = TensorRemove(Tp);
std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl; std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl;
Complex LinkTraceScale(1.0/vol/4.0/3.0); Complex LinkTraceScale(1.0/vol/4.0/(Real)Nc);
TComplex Tl = sum(LinkTrace); TComplex Tl = sum(LinkTrace);
Complex l = TensorRemove(Tl); Complex l = TensorRemove(Tl);
std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl; std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl;
@ -157,8 +157,10 @@ int main (int argc, char ** argv)
Complex ll= TensorRemove(TcP); Complex ll= TensorRemove(TcP);
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl; std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
std::string clone2x3("./ckpoint_clone2x3.4000"); const string stNc = to_string( Nc ) ;
std::string clone3x3("./ckpoint_clone3x3.4000"); const string stNcM1 = to_string( Nc-1 ) ;
std::string clone2x3("./ckpoint_clone"+stNcM1+"x"+stNc+".4000");
std::string clone3x3("./ckpoint_clone"+stNc+"x"+stNc+".4000");
NerscIO::writeConfiguration(Umu,clone3x3,0,precision32); NerscIO::writeConfiguration(Umu,clone3x3,0,precision32);
NerscIO::writeConfiguration(Umu,clone2x3,1,precision32); NerscIO::writeConfiguration(Umu,clone2x3,1,precision32);

View File

@ -793,6 +793,7 @@ int main (int argc, char ** argv)
} }
std::cout <<" OK ! "<<std::endl; std::cout <<" OK ! "<<std::endl;
#ifdef USE_FP16
// Double to Half // Double to Half
std::cout << GridLogMessage<< "Double to half" ; std::cout << GridLogMessage<< "Double to half" ;
precisionChange(&H[0],&D[0],Ndp); precisionChange(&H[0],&D[0],Ndp);
@ -822,6 +823,7 @@ int main (int argc, char ** argv)
assert( tmp < 1.0e-3 ); assert( tmp < 1.0e-3 );
} }
std::cout <<" OK ! "<<std::endl; std::cout <<" OK ! "<<std::endl;
#endif
} }
Grid_finalize(); Grid_finalize();

View File

@ -117,8 +117,8 @@ void runBenchmark(int* argc, char*** argv) {
// type definitions // type definitions
typedef WilsonImpl<vCoeff_t, FundamentalRepresentation, CoeffReal> WImpl; typedef WilsonImpl<vCoeff_t, FundamentalRepresentation, CoeffReal> WImpl;
typedef WilsonCloverFermion<WImpl> WilsonCloverOperator; typedef WilsonCloverFermion<WImpl, CloverHelpers<WImpl>> WilsonCloverOperator;
typedef CompactWilsonCloverFermion<WImpl> CompactWilsonCloverOperator; typedef CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>> CompactWilsonCloverOperator;
typedef typename WilsonCloverOperator::FermionField Fermion; typedef typename WilsonCloverOperator::FermionField Fermion;
typedef typename WilsonCloverOperator::GaugeField Gauge; typedef typename WilsonCloverOperator::GaugeField Gauge;

270
tests/core/Test_fft_matt.cc Normal file
View File

@ -0,0 +1,270 @@
/*************************************************************************************
grid` physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
Gamma::Algebra::Gamma5
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGRID(&GRID);
LatticeComplexD coor(&GRID);
ComplexD ci(0.0,1.0);
std::vector<int> seeds({1,2,3,4});
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); // naughty seeding
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(seeds);
LatticeGaugeFieldD Umu(&GRID);
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
////////////////////////////////////////////////////
// Wilson test
////////////////////////////////////////////////////
{
LatticeFermionD src(&GRID); gaussian(pRNG,src);
LatticeFermionD src_p(&GRID);
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD result(&GRID);
RealD mass=0.1;
WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
Dw.M(src,ref);
std::cout << "Norm src "<<norm2(src)<<std::endl;
std::cout << "Norm Dw x src "<<norm2(ref)<<std::endl;
{
FFT theFFT(&GRID);
////////////////
// operator in Fourier space
////////////////
tmp =ref;
theFFT.FFT_all_dim(result,tmp,FFT::forward);
std::cout<<"FFT[ Dw x src ] "<< norm2(result)<<std::endl;
tmp = src;
theFFT.FFT_all_dim(src_p,tmp,FFT::forward);
std::cout<<"FFT[ src ] "<< norm2(src_p)<<std::endl;
/////////////////////////////////////////////////////////////////
// work out the predicted FT from Fourier
/////////////////////////////////////////////////////////////////
auto FGrid = &GRID;
LatticeFermionD Kinetic(FGrid); Kinetic = Zero();
LatticeComplexD kmu(FGrid);
LatticeInteger scoor(FGrid);
LatticeComplexD sk (FGrid); sk = Zero();
LatticeComplexD sk2(FGrid); sk2= Zero();
LatticeComplexD W(FGrid); W= Zero();
LatticeComplexD one(FGrid); one =ComplexD(1.0,0.0);
ComplexD ci(0.0,1.0);
for(int mu=0;mu<Nd;mu++) {
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(kmu,mu);
kmu = TwoPiL * kmu;
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
sk = sk + sin(kmu) *sin(kmu);
// -1/2 Dw -> 1/2 gmu (eip - emip) = i sinp gmu
Kinetic = Kinetic + sin(kmu)*ci*(Gamma(Gmu[mu])*src_p);
}
W = mass + sk2;
Kinetic = Kinetic + W * src_p;
std::cout<<"Momentum space src "<< norm2(src_p)<<std::endl;
std::cout<<"Momentum space Dw x src "<< norm2(Kinetic)<<std::endl;
std::cout<<"FT[Coordinate space Dw] "<< norm2(result)<<std::endl;
result = result - Kinetic;
std::cout<<"diff "<< norm2(result)<<std::endl;
}
std::cout << " =======================================" <<std::endl;
std::cout << " Checking FourierFreePropagator x Dw = 1" <<std::endl;
std::cout << " =======================================" <<std::endl;
std::cout << "Dw src = " <<norm2(src)<<std::endl;
std::cout << "Dw tmp = " <<norm2(tmp)<<std::endl;
Dw.M(src,tmp);
Dw.FreePropagator(tmp,ref,mass);
std::cout << "Dw ref = " <<norm2(ref)<<std::endl;
ref = ref - src;
std::cout << "Dw ref-src = " <<norm2(ref)<<std::endl;
}
////////////////////////////////////////////////////
// Wilson prop
////////////////////////////////////////////////////
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Wilson Mom space 4d propagator \n";
std::cout<<"****************************************"<<std::endl;
LatticeFermionD src(&GRID); gaussian(pRNG,src);
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD diff(&GRID);
src=Zero();
Coordinate point(4,0); // 0,0,0,0
SpinColourVectorD ferm;
ferm=Zero();
ferm()(0)(0) = ComplexD(1.0);
pokeSite(ferm,src,point);
RealD mass=0.1;
WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
Dw.FreePropagator(src,ref,mass) ;
Gamma G5(Gamma::Algebra::Gamma5);
LatticeFermionD result(&GRID);
const int sdir=0;
////////////////////////////////////////////////////////////////////////
// Conjugate gradient on normal equations system
////////////////////////////////////////////////////////////////////////
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
Dw.Mdag(src,tmp);
src=tmp;
MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
CG(HermOp,src,result);
////////////////////////////////////////////////////////////////////////
std::cout << " Taking difference" <<std::endl;
std::cout << "Dw result "<<norm2(result)<<std::endl;
std::cout << "Dw ref "<<norm2(ref)<<std::endl;
diff = ref - result;
std::cout << "result - ref "<<norm2(diff)<<std::endl;
DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
}
////////////////////////////////////////////////////
//Gauge invariance test
////////////////////////////////////////////////////
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Gauge invariance test \n";
std::cout<<"****************************************"<<std::endl;
LatticeGaugeField U_GT(&GRID); // Gauge transformed field
LatticeColourMatrix g(&GRID); // local Gauge xform matrix
U_GT = Umu;
// Make a random xform to teh gauge field
SU<Nc>::RandomGaugeTransform(pRNG,U_GT,g); // Unit gauge
LatticeFermionD src(&GRID);
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD diff(&GRID);
// could loop over colors
src=Zero();
Coordinate point(4,0); // 0,0,0,0
SpinColourVectorD ferm;
ferm=Zero();
ferm()(0)(0) = ComplexD(1.0);
pokeSite(ferm,src,point);
RealD mass=0.1;
WilsonFermionD Dw(U_GT,GRID,RBGRID,mass);
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
Dw.FreePropagator(src,ref,mass) ;
Gamma G5(Gamma::Algebra::Gamma5);
LatticeFermionD result(&GRID);
const int sdir=0;
////////////////////////////////////////////////////////////////////////
// Conjugate gradient on normal equations system
////////////////////////////////////////////////////////////////////////
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
Dw.Mdag(src,tmp);
src=tmp;
MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
CG(HermOp,src,result);
////////////////////////////////////////////////////////////////////////
std::cout << " Taking difference" <<std::endl;
std::cout << "Dw result "<<norm2(result)<<std::endl;
std::cout << "Dw ref "<<norm2(ref)<<std::endl;
diff = ref - result;
std::cout << "result - ref "<<norm2(diff)<<std::endl;
DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
}
Grid_finalize();
}

View File

@ -9,6 +9,7 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -42,14 +43,14 @@ directory
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
; ;
int main(int argc, char** argv) { int main(int argc, char** argv) {
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
std::vector<int> latt({4, 4, 4, 8}); std::vector<int> latt({4, 4, 4, 8});
GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid( GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid(
latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi()); latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
@ -60,15 +61,19 @@ int main(int argc, char** argv) {
<< std::endl; << std::endl;
SU2::printGenerators(); SU2::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
// guard as this code fails to compile for Nc != 3
#if (Nc == 3)
SU2Adjoint::printGenerators(); SU2Adjoint::printGenerators();
SU2::testGenerators(); SU2::testGenerators();
SU2Adjoint::testGenerators(); SU2Adjoint::testGenerators();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl; std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
SU3::printGenerators(); SU3::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl;
SU3Adjoint::printGenerators(); SU3Adjoint::printGenerators();
@ -111,12 +116,10 @@ int main(int argc, char** argv) {
// AdjointRepresentation has the predefined number of colours Nc // AdjointRepresentation has the predefined number of colours Nc
// Representations<FundamentalRepresentation, AdjointRepresentation, TwoIndexSymmetricRepresentation> RepresentationTypes(grid); // Representations<FundamentalRepresentation, AdjointRepresentation, TwoIndexSymmetricRepresentation> RepresentationTypes(grid);
LatticeGaugeField U(grid), V(grid); LatticeGaugeField U(grid), V(grid);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V);
// Adjoint representation // Adjoint representation
// Test group structure // Test group structure
// (U_f * V_f)_r = U_r * V_r // (U_f * V_f)_r = U_r * V_r
@ -127,17 +130,17 @@ int main(int argc, char** argv) {
SU3::LatticeMatrix Vmu = peekLorentz(V,mu); SU3::LatticeMatrix Vmu = peekLorentz(V,mu);
pokeLorentz(UV,Umu*Vmu, mu); pokeLorentz(UV,Umu*Vmu, mu);
} }
AdjRep.update_representation(UV); AdjRep.update_representation(UV);
typename AdjointRep<Nc>::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r typename AdjointRep<Nc>::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r
AdjRep.update_representation(U); AdjRep.update_representation(U);
typename AdjointRep<Nc>::LatticeField Ur = AdjRep.U; // U_r typename AdjointRep<Nc>::LatticeField Ur = AdjRep.U; // U_r
AdjRep.update_representation(V); AdjRep.update_representation(V);
typename AdjointRep<Nc>::LatticeField Vr = AdjRep.U; // V_r typename AdjointRep<Nc>::LatticeField Vr = AdjRep.U; // V_r
typename AdjointRep<Nc>::LatticeField UrVr(grid); typename AdjointRep<Nc>::LatticeField UrVr(grid);
UrVr = Zero(); UrVr = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
@ -145,10 +148,10 @@ int main(int argc, char** argv) {
typename AdjointRep<Nc>::LatticeMatrix Vrmu = peekLorentz(Vr,mu); typename AdjointRep<Nc>::LatticeMatrix Vrmu = peekLorentz(Vr,mu);
pokeLorentz(UrVr,Urmu*Vrmu, mu); pokeLorentz(UrVr,Urmu*Vrmu, mu);
} }
typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr; typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl; std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl;
// Check correspondence of algebra and group transformations // Check correspondence of algebra and group transformations
// Create a random vector // Create a random vector
SU3::LatticeAlgebraVector h_adj(grid); SU3::LatticeAlgebraVector h_adj(grid);
@ -156,32 +159,31 @@ int main(int argc, char** argv) {
random(gridRNG,h_adj); random(gridRNG,h_adj);
h_adj = real(h_adj); h_adj = real(h_adj);
SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar); SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar);
// Re-extract h_adj // Re-extract h_adj
SU3::LatticeAlgebraVector h_adj2(grid); SU3::LatticeAlgebraVector h_adj2(grid);
SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar); SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar);
SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2; SU3::LatticeAlgebraVector h_diff = h_adj - h_adj2;
std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl; std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl;
// Exponentiate // Exponentiate
typename AdjointRep<Nc>::LatticeMatrix Uadj(grid); typename AdjointRep<Nc>::LatticeMatrix Uadj(grid);
Uadj = expMat(Ar, 1.0, 16); Uadj = expMat(Ar, 1.0, 16);
typename AdjointRep<Nc>::LatticeMatrix uno(grid); typename AdjointRep<Nc>::LatticeMatrix uno(grid);
uno = 1.0; uno = 1.0;
// Check matrix Uadj, must be real orthogonal // Check matrix Uadj, must be real orthogonal
typename AdjointRep<Nc>::LatticeMatrix Ucheck = Uadj - conjugate(Uadj); typename AdjointRep<Nc>::LatticeMatrix Ucheck = Uadj - conjugate(Uadj);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck)
<< std::endl; << std::endl;
Ucheck = Uadj * adj(Uadj) - uno; Ucheck = Uadj * adj(Uadj) - uno;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck) std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck)
<< std::endl; << std::endl;
Ucheck = adj(Uadj) * Uadj - uno; Ucheck = adj(Uadj) * Uadj - uno;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck) std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck)
<< std::endl; << std::endl;
// Construct the fundamental matrix in the group // Construct the fundamental matrix in the group
SU3::LatticeMatrix Af(grid); SU3::LatticeMatrix Af(grid);
SU3::FundamentalLieAlgebraMatrix(h_adj,Af); SU3::FundamentalLieAlgebraMatrix(h_adj,Af);
@ -193,72 +195,65 @@ int main(int argc, char** argv) {
SU3::LatticeMatrix UnitCheck(grid); SU3::LatticeMatrix UnitCheck(grid);
UnitCheck = Ufund * adj(Ufund) - uno_f; UnitCheck = Ufund * adj(Ufund) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck) std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck)
<< std::endl; << std::endl;
UnitCheck = adj(Ufund) * Ufund - uno_f; UnitCheck = adj(Ufund) * Ufund - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck) std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck)
<< std::endl; << std::endl;
// Tranform to the adjoint representation // Tranform to the adjoint representation
U = Zero(); // fill this with only one direction U = Zero(); // fill this with only one direction
pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields
AdjRep.update_representation(U); AdjRep.update_representation(U);
Ur = AdjRep.U; // U_r Ur = AdjRep.U; // U_r
typename AdjointRep<Nc>::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj typename AdjointRep<Nc>::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj
typename AdjointRep<Nc>::LatticeMatrix Diff_check_mat = Ur0 - Uadj; typename AdjointRep<Nc>::LatticeMatrix Diff_check_mat = Ur0 - Uadj;
std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl; std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl;
// TwoIndexRep tests // TwoIndexRep tests
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::printBase(); SU2TwoIndexSymm::printBase();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl; std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::printGenerators(); SU2TwoIndexSymm::printGenerators();
std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl; std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::testGenerators(); SU2TwoIndexSymm::testGenerators();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl; std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::printBase(); SU2TwoIndexAntiSymm::printBase();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::printGenerators(); SU2TwoIndexAntiSymm::printGenerators();
std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::testGenerators(); SU2TwoIndexAntiSymm::testGenerators();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors"
<< std::endl; << std::endl;
// Projectors // Projectors
SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid);
random(gridRNG,Gauss2); random(gridRNG,Gauss2);
@ -276,13 +271,13 @@ int main(int argc, char** argv) {
SU3::LatticeAlgebraVector diff2 = ha - hb; SU3::LatticeAlgebraVector diff2 = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors"
<< std::endl; << std::endl;
// Projectors // Projectors
SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid);
random(gridRNG,Gauss2a); random(gridRNG,Gauss2a);
@ -300,11 +295,11 @@ int main(int argc, char** argv) {
SU3::LatticeAlgebraVector diff2a = ha - hb; SU3::LatticeAlgebraVector diff2a = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure" std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure"
<< std::endl; << std::endl;
// Testing HMC representation classes // Testing HMC representation classes
TwoIndexRep< Nc, Symmetric > TIndexRep(grid); TwoIndexRep< Nc, Symmetric > TIndexRep(grid);
@ -313,7 +308,7 @@ int main(int argc, char** argv) {
LatticeGaugeField U2(grid), V2(grid); LatticeGaugeField U2(grid), V2(grid);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
LatticeGaugeField UV2(grid); LatticeGaugeField UV2(grid);
UV2 = Zero(); UV2 = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
@ -321,16 +316,16 @@ int main(int argc, char** argv) {
SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu); SU3::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
pokeLorentz(UV2,Umu2*Vmu2, mu); pokeLorentz(UV2,Umu2*Vmu2, mu);
} }
TIndexRep.update_representation(UV2); TIndexRep.update_representation(UV2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r
TIndexRep.update_representation(U2); TIndexRep.update_representation(U2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r
TIndexRep.update_representation(V2); TIndexRep.update_representation(V2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r
typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid); typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid);
Ur2Vr2 = Zero(); Ur2Vr2 = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
@ -338,11 +333,11 @@ int main(int argc, char** argv) {
typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu); typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu);
pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu); pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu);
} }
typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2; typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index Symmetric): " << norm2(Diff_check2) << std::endl; std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index Symmetric): " << norm2(Diff_check2) << std::endl;
// Check correspondence of algebra and group transformations // Check correspondence of algebra and group transformations
// Create a random vector // Create a random vector
SU3::LatticeAlgebraVector h_sym(grid); SU3::LatticeAlgebraVector h_sym(grid);
@ -350,34 +345,31 @@ int main(int argc, char** argv) {
random(gridRNG,h_sym); random(gridRNG,h_sym);
h_sym = real(h_sym); h_sym = real(h_sym);
SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym); SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym);
// Re-extract h_sym // Re-extract h_sym
SU3::LatticeAlgebraVector h_sym2(grid); SU3::LatticeAlgebraVector h_sym2(grid);
SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym); SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym);
SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
// Exponentiate // Exponentiate
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid); typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid);
U2iS = expMat(Ar_sym, 1.0, 16); U2iS = expMat(Ar_sym, 1.0, 16);
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid); typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid);
uno2iS = 1.0; uno2iS = 1.0;
// Check matrix U2iS, must be real orthogonal // Check matrix U2iS, must be real orthogonal
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS); typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS) std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS)
<< std::endl; << std::endl;
Ucheck2iS = U2iS * adj(U2iS) - uno2iS; Ucheck2iS = U2iS * adj(U2iS) - uno2iS;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS) std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS)
<< std::endl; << std::endl;
Ucheck2iS = adj(U2iS) * U2iS - uno2iS; Ucheck2iS = adj(U2iS) * U2iS - uno2iS;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS) std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS)
<< std::endl; << std::endl;
// Construct the fundamental matrix in the group // Construct the fundamental matrix in the group
SU3::LatticeMatrix Af_sym(grid); SU3::LatticeMatrix Af_sym(grid);
SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
@ -386,147 +378,137 @@ int main(int argc, char** argv) {
SU3::LatticeMatrix UnitCheck2(grid); SU3::LatticeMatrix UnitCheck2(grid);
UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f; UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2) std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2)
<< std::endl; << std::endl;
UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f; UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2) std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2)
<< std::endl; << std::endl;
// Tranform to the 2Index Sym representation // Tranform to the 2Index Sym representation
U = Zero(); // fill this with only one direction U = Zero(); // fill this with only one direction
pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields
TIndexRep.update_representation(U); TIndexRep.update_representation(U);
Ur2 = TIndexRep.U; // U_r Ur2 = TIndexRep.U; // U_r
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS; typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl; std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl;
if (TwoIndexRep<Nc, AntiSymmetric >::Dimension != 1){ if (TwoIndexRep<Nc, AntiSymmetric >::Dimension != 1){
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure" std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure"
<< std::endl; << std::endl;
// Testing HMC representation classes // Testing HMC representation classes
TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid); TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid);
// Test group structure // Test group structure
// (U_f * V_f)_r = U_r * V_r // (U_f * V_f)_r = U_r * V_r
LatticeGaugeField U2A(grid), V2A(grid); LatticeGaugeField U2A(grid), V2A(grid);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2A); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2A); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
LatticeGaugeField UV2A(grid); LatticeGaugeField UV2A(grid);
UV2A = Zero(); UV2A = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu); SU3::LatticeMatrix Umu2A = peekLorentz(U2,mu);
SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu); SU3::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
pokeLorentz(UV2A,Umu2A*Vmu2A, mu); pokeLorentz(UV2A,Umu2A*Vmu2A, mu);
}
TIndexRep.update_representation(UV2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r
TIndexRep.update_representation(U2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r
TIndexRep.update_representation(V2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid);
Ur2Vr2A = Zero();
for (int mu = 0; mu < Nd; mu++) {
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu);
pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu);
}
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index anti-Symmetric): " << norm2(Diff_check2A) << std::endl;
// Check correspondence of algebra and group transformations
// Create a random vector
SU3::LatticeAlgebraVector h_Asym(grid);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
random(gridRNG,h_Asym);
h_Asym = real(h_Asym);
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
// Re-extract h_sym
SU3::LatticeAlgebraVector h_Asym2(grid);
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
// Exponentiate
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid);
U2iAS = expMat(Ar_Asym, 1.0, 16);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid);
uno2iAS = 1.0;
// Check matrix U2iS, must be real orthogonal
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS)
<< std::endl;
// Construct the fundamental matrix in the group
SU3::LatticeMatrix Af_Asym(grid);
SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
SU3::LatticeMatrix Ufund2A(grid);
Ufund2A = expMat(Af_Asym, 1.0, 16);
SU3::LatticeMatrix UnitCheck2A(grid);
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
<< std::endl;
UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A)
<< std::endl;
// Tranform to the 2Index Sym representation
U = Zero(); // fill this with only one direction
pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields
TIndexRepA.update_representation(U);
Ur2A = TIndexRepA.U; // U_r
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl;
} else {
std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests "
"because representation is trivial (dim = 1)"
<< std::endl;
} }
TIndexRep.update_representation(UV2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r
TIndexRep.update_representation(U2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r
TIndexRep.update_representation(V2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid);
Ur2Vr2A = Zero();
for (int mu = 0; mu < Nd; mu++) {
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu);
pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu);
}
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index anti-Symmetric): " << norm2(Diff_check2A) << std::endl;
// Check correspondence of algebra and group transformations
// Create a random vector
SU3::LatticeAlgebraVector h_Asym(grid);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
random(gridRNG,h_Asym);
h_Asym = real(h_Asym);
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
// Re-extract h_sym
SU3::LatticeAlgebraVector h_Asym2(grid);
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
SU3::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
// Exponentiate
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid);
U2iAS = expMat(Ar_Asym, 1.0, 16);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid);
uno2iAS = 1.0;
// Check matrix U2iS, must be real orthogonal
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS)
<< std::endl;
// Construct the fundamental matrix in the group
SU3::LatticeMatrix Af_Asym(grid);
SU3::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
SU3::LatticeMatrix Ufund2A(grid);
Ufund2A = expMat(Af_Asym, 1.0, 16);
SU3::LatticeMatrix UnitCheck2A(grid);
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
<< std::endl;
UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A)
<< std::endl;
// Tranform to the 2Index Sym representation
U = Zero(); // fill this with only one direction
pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields
TIndexRepA.update_representation(U);
Ur2A = TIndexRepA.U; // U_r
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl;
} else {
std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests "
"because representation is trivial (dim = 1)"
<< std::endl;
}
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -0,0 +1,110 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_memory_manager.cc
Copyright (C) 2022
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
void MemoryTest(GridCartesian * FGrid,int N);
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
int N=100;
for(int i=0;i<N;i++){
std::cout << "============================"<<std::endl;
std::cout << "Epoch "<<i<<"/"<<N<<std::endl;
std::cout << "============================"<<std::endl;
MemoryTest(UGrid,256);
MemoryManager::Print();
AUDIT();
}
Grid_finalize();
}
void MemoryTest(GridCartesian * FGrid, int N)
{
LatticeComplexD zero(FGrid); zero=Zero();
std::vector<LatticeComplexD> A(N,zero);//FGrid);
std::vector<ComplexD> B(N,ComplexD(0.0)); // Update sequentially on host
for(int v=0;v<N;v++) A[v] = Zero();
uint64_t counter = 0;
for(int epoch = 0;epoch<10000;epoch++){
int v = random() %N; // Which vec
int w = random() %2; // Write or read
int e = random() %3; // expression or for loop
int dev= random() %2; // On device?
// int e=1;
ComplexD zc = counter++;
if ( w ) {
B[v] = B[v] + zc;
if ( e == 0 ) {
A[v] = A[v] + zc - A[v] + A[v];
} else {
if ( dev ) {
autoView(A_v,A[v],AcceleratorWrite);
accelerator_for(ss,FGrid->oSites(),1,{
A_v[ss] = A_v[ss] + zc;
});
} else {
autoView(A_v,A[v],CpuWrite);
thread_for(ss,FGrid->oSites(),{
A_v[ss] = A_v[ss] + zc;
});
}
}
} else {
if ( e == 0 ) {
A[v] = A[v] + A[v] - A[v];
} else {
if ( dev ) {
autoView(A_v,A[v],AcceleratorRead);
accelerator_for(ss,FGrid->oSites(),1,{
assert(B[v]==A_v[ss]()()().getlane(0));
});
// std::cout << "["<<v<<"] checked on GPU"<<B[v]<<std::endl;
} else {
autoView(A_v,A[v],CpuRead);
thread_for(ss,FGrid->oSites(),{
assert(B[v]==A_v[ss]()()().getlane(0));
});
// std::cout << "["<<v<<"] checked on CPU"<<B[v]<<std::endl;
}
}
}
}
}

View File

@ -122,14 +122,15 @@ int main (int argc, char ** argv)
std::cout << "Determinant defect before projection " <<norm2(detU)<<std::endl; std::cout << "Determinant defect before projection " <<norm2(detU)<<std::endl;
tmp = U*adj(U) - ident; tmp = U*adj(U) - ident;
std::cout << "Unitarity check before projection " << norm2(tmp)<<std::endl; std::cout << "Unitarity check before projection " << norm2(tmp)<<std::endl;
#if (Nc == 3)
ProjectSU3(U); ProjectSU3(U);
detU= Determinant(U) ; detU= Determinant(U) ;
detU= detU -1.0; detU= detU -1.0;
std::cout << "Determinant ProjectSU3 defect " <<norm2(detU)<<std::endl; std::cout << "Determinant ProjectSU3 defect " <<norm2(detU)<<std::endl;
tmp = U*adj(U) - ident; tmp = U*adj(U) - ident;
std::cout << "Unitarity check after projection " << norm2(tmp)<<std::endl; std::cout << "Unitarity check after projection " << norm2(tmp)<<std::endl;
#endif
ProjectSUn(UU); ProjectSUn(UU);
detUU= Determinant(UU); detUU= Determinant(UU);
detUU= detUU -1.0; detUU= detUU -1.0;

View File

@ -0,0 +1,73 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_poisson_fft.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/lattice/Lattice_slice_gpu.h>
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int N=16;
std::vector<int> latt_size ({N,N,N,N});
std::vector<int> simd_layout({vComplexD::Nsimd(),1,1,1});
std::vector<int> mpi_layout ({1,1,1,1});
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
LatticeComplexD rn(&GRID);
GridParallelRNG RNG(&GRID);
RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
gaussian(RNG,rn);
std::vector<TComplex> reduced_ref;
std::vector<TComplex> reduced_gpu;
for(int d=0;d<4;d++){
{
RealD t=-usecond();
sliceSum(rn,reduced_ref,d);
t+=usecond();
std::cout << " sliceSum took "<<t<<" usecs"<<std::endl;
}
{
RealD t=-usecond();
sliceSumGpu(rn,reduced_gpu,d);
t+=usecond();
std::cout << " sliceSumGpu took "<<t<<" usecs"<<std::endl;
}
for(int t=0;t<reduced_ref.size();t++){
std::cout << t<<" ref "<< reduced_ref[t] <<" opt " << reduced_gpu[t] << " diff "<<reduced_ref[t]-reduced_gpu[t]<<std::endl;
TComplex diff = reduced_ref[t]-reduced_gpu[t];
assert(abs(TensorRemove(diff)) < 1e-8 );
}
}
Grid_finalize();
}

View File

@ -2,11 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc Source file: ./tests/core/Test_wilson_clover.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -67,8 +68,6 @@ int main(int argc, char **argv)
tmp = Zero(); tmp = Zero();
FermionField err(&Grid); FermionField err(&Grid);
err = Zero(); err = Zero();
FermionField err2(&Grid);
err2 = Zero();
FermionField phi(&Grid); FermionField phi(&Grid);
random(pRNG, phi); random(pRNG, phi);
FermionField chi(&Grid); FermionField chi(&Grid);
@ -77,6 +76,8 @@ int main(int argc, char **argv)
SU<Nc>::HotConfiguration(pRNG, Umu); SU<Nc>::HotConfiguration(pRNG, Umu);
std::vector<LatticeColourMatrix> U(4, &Grid); std::vector<LatticeColourMatrix> U(4, &Grid);
double tolerance = 1e-4;
double volume = 1; double volume = 1;
for (int mu = 0; mu < Nd; mu++) for (int mu = 0; mu < Nd; mu++)
{ {
@ -88,7 +89,7 @@ int main(int argc, char **argv)
RealD csw_t = 1.0; RealD csw_t = 1.0;
WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
//Dwc.ImportGauge(Umu); // not necessary, included in the constructor CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl;
@ -112,7 +113,24 @@ int main(int argc, char **argv)
setCheckerboard(r_eo, r_e); setCheckerboard(r_eo, r_e);
err = ref - r_eo; err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc_compact.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc_compact.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
@ -152,6 +170,22 @@ int main(int argc, char **argv)
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl;
Dwc_compact.Meooe(chi_e, dchi_o);
Dwc_compact.Meooe(chi_o, dchi_e);
Dwc_compact.MeooeDag(phi_e, dphi_o);
Dwc_compact.MeooeDag(phi_o, dphi_e);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
@ -169,7 +203,21 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Mooee(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.Mooee(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl;
@ -188,7 +236,21 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInvDag(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl;
@ -207,7 +269,21 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "================================================================" << std::endl; std::cout << GridLogMessage << "================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl;
@ -249,7 +325,7 @@ int main(int argc, char **argv)
///////////////// /////////////////
WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
Dwc_prime.ImportGauge(U_prime); CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
tmp = Omega * src; tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp); pickCheckerboard(Even, src_e, tmp);
@ -262,7 +338,37 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi; err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_compact_prime.Mooee(src_e, phi_e);
Dwc_compact_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "=================================================================" << std::endl; std::cout << GridLogMessage << "=================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl;
@ -272,7 +378,6 @@ int main(int argc, char **argv)
phi = Zero(); phi = Zero();
WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params);
Dw.ImportGauge(Umu);
Dw.M(src, result); Dw.M(src, result);
Dwc.M(src, chi); Dwc.M(src, chi);
@ -280,13 +385,24 @@ int main(int argc, char **argv)
Dwc_prime.M(Omega * src, phi); Dwc_prime.M(Omega * src, phi);
WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params);
Dw_prime.ImportGauge(U_prime);
Dw_prime.M(Omega * src, result2); Dw_prime.M(Omega * src, result2);
err = result - adj(Omega) * result2;
std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
err = chi - adj(Omega) * phi; err = chi - adj(Omega) * phi;
err2 = result - adj(Omega) * result2; std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err) << std::endl;
std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err2) << std::endl;
chi = Zero();
phi = Zero();
Dwc_compact.M(src, chi);
Dwc_compact_prime.M(Omega * src, phi);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff CompactWilsonClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl;
@ -296,7 +412,6 @@ int main(int argc, char **argv)
phi = Zero(); phi = Zero();
err = Zero(); err = Zero();
WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0
Dwc_csw0.ImportGauge(Umu);
pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi); pickCheckerboard(Odd, phi_o, phi);
@ -316,7 +431,34 @@ int main(int argc, char **argv)
setCheckerboard(src, src_o); setCheckerboard(src, src_o);
err = chi - phi; err = chi - phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_compact_csw0.Mooee(src_e, phi_e);
Dwc_compact_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl;
@ -348,9 +490,41 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = ref - phi; err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff :" << norm2(ref) << std::endl; std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff :" << norm2(phi) << std::endl; std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff :" << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
Dwc_compact.Meooe(src_o, phi_e);
Dwc_compact.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Grid_finalize(); Grid_finalize();
} }

View File

@ -0,0 +1,253 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cayley_cg.cc
Copyright (C) 2022
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
template<class What>
void TestConserved(What & Dw,
LatticeGaugeField &Umu,
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
GridParallelRNG *RNG4);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
Gamma::Algebra::Gamma5
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid);
std::vector<int> seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid);
if( argc > 1 && argv[1][0] != '-' )
{
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, argv[1]);
}
else
{
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::HotConfiguration(RNG4,Umu);
}
typename WilsonCloverFermionR::ImplParams params;
WilsonAnisotropyCoefficients anis;
RealD mass = 0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,mass,params);
TestConserved<WilsonFermionR>(Dw,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonCloverFermionR Dwc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params);
TestConserved<WilsonCloverFermionR>(Dwc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"CompactWilsonCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
CompactWilsonCloverFermionR Dwcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params);
TestConserved<CompactWilsonCloverFermionR>(Dwcc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonExpCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonExpCloverFermionR Dewc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params);
TestConserved<WilsonExpCloverFermionR>(Dewc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"CompactWilsonExpCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
CompactWilsonExpCloverFermionR Dewcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params);
TestConserved<CompactWilsonExpCloverFermionR>(Dewcc,Umu,UGrid,UrbGrid,&RNG4);
Grid_finalize();
}
template<class Action>
void TestConserved(Action & Dw,
LatticeGaugeField &Umu,
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
GridParallelRNG *RNG4)
{
LatticePropagator phys_src(UGrid);
LatticePropagator seqsrc(UGrid);
LatticePropagator prop4(UGrid);
LatticePropagator Vector_mu(UGrid);
LatticeComplex SV (UGrid);
LatticeComplex VV (UGrid);
LatticePropagator seqprop(UGrid);
SpinColourMatrix kronecker; kronecker=1.0;
Coordinate coor({0,0,0,0});
phys_src=Zero();
pokeSite(kronecker,phys_src,coor);
ConjugateGradient<LatticeFermion> CG(1.0e-16,100000);
SchurRedBlackDiagTwoSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> zpg;
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
LatticeFermion src4 (UGrid);
PropToFerm<Action>(src4,phys_src,s,c);
LatticeFermion result4(UGrid); result4=Zero();
schur(Dw,src4,result4,zpg);
std::cout<<GridLogMessage<<"spin "<<s<<" color "<<c<<" norm2(sourc4d) "<<norm2(src4)
<<" norm2(result4d) "<<norm2(result4)<<std::endl;
FermToProp<Action>(prop4,result4,s,c);
}
}
auto curr = Current::Vector;
const int mu_J=0;
const int t_J=0;
LatticeComplex ph (UGrid); ph=1.0;
Dw.SeqConservedCurrent(prop4,
seqsrc,
phys_src,
curr,
mu_J,
t_J,
t_J,// whole lattice
ph);
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
LatticeFermion src4 (UGrid);
PropToFerm<Action>(src4,seqsrc,s,c);
LatticeFermion result4(UGrid); result4=Zero();
schur(Dw,src4,result4,zpg);
FermToProp<Action>(seqprop,result4,s,c);
}
}
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gT(Gamma::Algebra::GammaT);
std::vector<TComplex> sumSV;
std::vector<TComplex> sumVV;
Dw.ContractConservedCurrent(prop4,prop4,Vector_mu,phys_src,Current::Vector,Tdir);
SV = trace(Vector_mu); // Scalar-Vector conserved current
VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current
// Spatial sum
sliceSum(SV,sumSV,Tdir);
sliceSum(VV,sumVV,Tdir);
const int Nt{static_cast<int>(sumSV.size())};
std::cout<<GridLogMessage<<"Vector Ward identity by timeslice (~ 0)"<<std::endl;
for(int t=0;t<Nt;t++){
std::cout<<GridLogMessage <<" t "<<t<<" SV "<<real(TensorRemove(sumSV[t]))<<" VV "<<real(TensorRemove(sumVV[t]))<<std::endl;
assert(abs(real(TensorRemove(sumSV[t]))) < 1e-10);
assert(abs(real(TensorRemove(sumVV[t]))) < 1e-2);
}
///////////////////////////////
// 3pt vs 2pt check
///////////////////////////////
{
Gamma::Algebra gA = Gamma::Algebra::Identity;
Gamma g(gA);
LatticePropagator cur(UGrid);
LatticePropagator tmp(UGrid);
LatticeComplex c(UGrid);
SpinColourMatrix qSite;
peekSite(qSite, seqprop, coor);
Complex test_S, test_V, check_S, check_V;
std::vector<TComplex> check_buf;
test_S = trace(qSite*g);
test_V = trace(qSite*g*Gamma::gmu[mu_J]);
Dw.ContractConservedCurrent(prop4,prop4,cur,phys_src,curr,mu_J);
c = trace(cur*g);
sliceSum(c, check_buf, Tp);
check_S = TensorRemove(check_buf[t_J]);
auto gmu=Gamma::gmu[mu_J];
c = trace(cur*g*gmu);
sliceSum(c, check_buf, Tp);
check_V = TensorRemove(check_buf[t_J]);
std::cout<<GridLogMessage << std::setprecision(14)<<"Test S = " << abs(test_S) << std::endl;
std::cout<<GridLogMessage << "Test V = " << abs(test_V) << std::endl;
std::cout<<GridLogMessage << "Check S = " << abs(check_S) << std::endl;
std::cout<<GridLogMessage << "Check V = " << abs(check_V) << std::endl;
// Check difference = 0
check_S = check_S - test_S;
check_V = check_V - test_V;
std::cout<<GridLogMessage << "Consistency check for sequential conserved " <<std::endl;
std::cout<<GridLogMessage << "Diff S = " << abs(check_S) << std::endl;
assert(abs(check_S) < 1e-8);
std::cout<<GridLogMessage << "Diff V = " << abs(check_V) << std::endl;
assert(abs(check_V) < 1e-8);
}
}

View File

@ -0,0 +1,530 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/core/Test_wilson_exp_clover.cc
Copyright (C) 2022
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REALF" << sizeof(RealF) << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REALD" << sizeof(RealD) << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REAL" << sizeof(Real) << std::endl;
std::vector<int> seeds({1, 2, 3, 4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
typedef typename WilsonExpCloverFermionR::FermionField FermionField;
typename WilsonExpCloverFermionR::ImplParams params;
WilsonAnisotropyCoefficients anis;
FermionField src(&Grid);
random(pRNG, src);
FermionField result(&Grid);
result = Zero();
FermionField result2(&Grid);
result2 = Zero();
FermionField ref(&Grid);
ref = Zero();
FermionField tmp(&Grid);
tmp = Zero();
FermionField err(&Grid);
err = Zero();
FermionField phi(&Grid);
random(pRNG, phi);
FermionField chi(&Grid);
random(pRNG, chi);
LatticeGaugeField Umu(&Grid);
SU<Nc>::HotConfiguration(pRNG, Umu);
std::vector<LatticeColourMatrix> U(4, &Grid);
double tolerance = 1e-4;
double volume = 1;
for (int mu = 0; mu < Nd; mu++)
{
volume = volume * latt_size[mu];
}
RealD mass = 0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
FermionField src_e(&RBGrid);
FermionField src_o(&RBGrid);
FermionField r_e(&RBGrid);
FermionField r_o(&RBGrid);
FermionField r_eo(&Grid);
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
Dwc.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc_compact.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc_compact.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
FermionField chi_e(&RBGrid);
FermionField chi_o(&RBGrid);
FermionField dchi_e(&RBGrid);
FermionField dchi_o(&RBGrid);
FermionField phi_e(&RBGrid);
FermionField phi_o(&RBGrid);
FermionField dphi_e(&RBGrid);
FermionField dphi_o(&RBGrid);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc.Meooe(chi_e, dchi_o);
Dwc.Meooe(chi_o, dchi_e);
Dwc.MeooeDag(phi_e, dphi_o);
Dwc.MeooeDag(phi_o, dphi_e);
ComplexD pDce = innerProduct(phi_e, dchi_e);
ComplexD pDco = innerProduct(phi_o, dchi_o);
ComplexD cDpe = innerProduct(chi_e, dphi_e);
ComplexD cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl;
Dwc_compact.Meooe(chi_e, dchi_o);
Dwc_compact.Meooe(chi_o, dchi_e);
Dwc_compact.MeooeDag(phi_e, dphi_o);
Dwc_compact.MeooeDag(phi_o, dphi_e);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.Mooee(chi_e, src_e);
Dwc.MooeeInv(src_e, phi_e);
Dwc.Mooee(chi_o, src_o);
Dwc.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Mooee(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.Mooee(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.MooeeDag(chi_e, src_e);
Dwc.MooeeInvDag(src_e, phi_e);
Dwc.MooeeDag(chi_o, src_o);
Dwc.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInvDag(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.MooeeDag(chi_e, src_e);
Dwc.MooeeInv(src_e, phi_e);
Dwc.MooeeDag(chi_o, src_o);
Dwc.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl;
std::cout << GridLogMessage << "================================================================" << std::endl;
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc.Mooee(src_e, chi_e);
Dwc.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
////////////////////// Gauge Transformation
std::vector<int> seeds2({5, 6, 7, 8});
GridParallelRNG pRNG2(&Grid);
pRNG2.SeedFixedIntegers(seeds2);
LatticeColourMatrix Omega(&Grid);
LatticeColourMatrix ShiftedOmega(&Grid);
LatticeGaugeField U_prime(&Grid);
U_prime = Zero();
LatticeColourMatrix U_prime_mu(&Grid);
U_prime_mu = Zero();
SU<Nc>::LieRandomize(pRNG2, Omega, 1.0);
for (int mu = 0; mu < Nd; mu++)
{
U[mu] = peekLorentz(Umu, mu);
ShiftedOmega = Cshift(Omega, mu, 1);
U_prime_mu = Omega * U[mu] * adj(ShiftedOmega);
pokeLorentz(U_prime, U_prime_mu, mu);
}
/////////////////
WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_prime.Mooee(src_e, phi_e);
Dwc_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_compact_prime.Mooee(src_e, phi_e);
Dwc_compact_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "=================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl;
std::cout << GridLogMessage << "================================================================" << std::endl;
chi = Zero();
phi = Zero();
WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params);
Dw.M(src, result);
Dwc.M(src, chi);
Dwc_prime.M(Omega * src, phi);
WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params);
Dw_prime.M(Omega * src, result2);
err = result - adj(Omega) * result2;
std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff WilsonExpClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
Dwc_compact.M(src, chi);
Dwc_compact_prime.M(Omega * src, phi);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff CompactWilsonExpClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
chi = Zero();
phi = Zero();
err = Zero();
WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_csw0.Mooee(src_e, phi_e);
Dwc_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_compact_csw0.Mooee(src_e, phi_e);
Dwc_compact_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc.Mooee(src_e, chi_e);
Dwc.Mooee(src_o, chi_o);
Dwc.Meooe(src_o, phi_e);
Dwc.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
Dwc_compact.Meooe(src_o, phi_e);
Dwc_compact.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Grid_finalize();
}

View File

@ -132,8 +132,8 @@ int main(int argc, char **argv) {
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
//TheHMC.Resources.LoadBinaryCheckpointer(CPparams); TheHMC.Resources.LoadBinaryCheckpointer(CPparams);
TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); //TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); this breaks for compilation without lime
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);

View File

@ -74,10 +74,10 @@ int main(int argc, char **argv) {
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
//TheHMC.Resources.LoadNerscCheckpointer(CPparams); TheHMC.Resources.LoadNerscCheckpointer(CPparams);
// Store metadata in the Scidac checkpointer // Store metadata in the Scidac checkpointer - obviously breaks without LIME
TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar); //TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar);
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);

View File

@ -37,6 +37,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
#ifdef HAVE_LIME
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis> class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{ {
@ -249,3 +251,11 @@ int main (int argc, char ** argv) {
Grid_finalize(); Grid_finalize();
} }
#else
int main( void )
{
return 0 ;
}
#endif // HAVE_LIME_H

Some files were not shown because too many files have changed in this diff Show More