1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-21 09:12:03 +01:00

Compare commits

...

88 Commits

Author SHA1 Message Date
e28cf2b8be Merge 1dfaa08afb into b8a7004365 2023-09-03 09:29:21 -07:00
b8a7004365 Partial fraction test 2023-08-14 15:17:03 -04:00
994512048e Merge pull request #439 from felixerben/bugfix/IRL_convergence
Bugfix/irl convergence
2023-07-12 16:32:26 -04:00
1dfaa08afb The stencils for the staple and rect-staple padded cell implementations are now created and stored by workspace classes that allow for reuse providing the grids remain consistent
The workspaces are now used by the plaq+rectangle gauge action resulting in a further 2x performance improvement as measured on a 16^4 local volume for 2 nodes (16 ranks) of Crusher
2023-06-28 15:11:24 -04:00
f44dce390f Implemented acclerator-optimized versions of localCopyRegion and insertSliceLocal to speed up padding
Fixed const correctness on PaddedCell methods
Fixed compile issues on Crusher
Added timing breakdowns for PaddedCell::Expand and the padded implementations of the staples, visible under --log Performance
Optimized kernel for StaplePadded
Test_iwasaki_action_newstaple now repeats the calculation 10 times and reports average timings
2023-06-27 14:58:10 -04:00
bb71e9a96a Added PaddedCell and GeneralisedLocalStencil header includes to standard base headers
Moved versions of the padded-cell implementations of staple and rect-staple from test code to WilsonLoops header
Added StapleAndRectStapleAll which is now called by the plaq+rectangle action class. Under the hood it uses the padded cell implementations with maximal reuse of the padded gauge links
2023-06-27 11:23:30 -04:00
78bae9417c returning Nstop vectors even if not all meet true convergence criterion 2023-06-27 14:38:19 +01:00
dd170ead01 whitespace 2023-06-27 11:37:01 +01:00
014704856f do one more iteration if not all vectors converged 2023-06-27 11:33:30 +01:00
6f6844ccf1 Added new StapleAll and RectStapleAll functions that return the staples for all mu as an array
Modified plaq+rectangle gauge actions to use the above
Added a test code to confirm the above changes
2023-06-26 15:48:47 -04:00
4c6613d72c Modified RectStapleDouble and RectStapleOptimised to use Gauge-BC respecting CshiftLink
Added test code tests/debug/Test_optimized_staple_gaugebc demonstrating equivalence of above to RectStapleUnoptimised for cconj gauge BCs
Removed optimized staple only being used for periodic gauge BCs; it is now always used
2023-06-26 10:20:23 -04:00
ee92e08edb Merge pull request #435 from fjosw/fix/warnings_in_WilsonKernelsImplementation
Unused variable in WilsonKernelsImplementation
2023-06-23 11:47:19 -04:00
c1dcee9328 Merge pull request #437 from fjosw/fix/stencil_debug
Added GridLogDebug to BuildSurfaceList debug message
2023-06-23 11:47:00 -04:00
6b150961fe Better script 2023-06-23 18:09:25 +03:00
36cc9c524f Threaded the constructor of GeneralLocalStencil 2023-06-23 09:57:38 -04:00
5bafcaedfa Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-22 19:59:45 +03:00
bfeceae708 FTHMC 2023-06-22 12:58:18 -04:00
eacb66591f Config command 2023-06-22 19:56:40 +03:00
fadaa85626 Update 2023-06-22 19:56:27 +03:00
02a5b0d786 Updating run during testing 2023-06-22 19:52:46 +03:00
0e2141442a Dennis says broken 2023-06-22 19:19:51 +03:00
769eb0eecb Precision coverage 2023-06-22 19:19:20 +03:00
4241c7d4a3 Imported coalescedReadGeneralPermute GPU implementation from Christoph
Fixed bug in padded staple code where extract was being called on the result before the GPU view was closed
Fixed compile issue with pointer cast in padded staple code
Added timing summaries of padded staple code and timing breakdown of staple implementation to Test_padded_cell_staple
2023-06-21 16:01:01 -04:00
7b11075102 The user can now specify the implementation of Cshift used by the PaddedCell class through a virtual base class API. Implementations for default (regular Cshift) and for gauge links (which respects the gauge BCs)
Fixed const-correctness for PaddedCell and ConjugateGimpl::setDirections
Modified test code for padded-cell implementation of staple, rect-staple to use cconj BCs
2023-06-20 17:09:56 -04:00
abc658dca5 Added coalescedReadGeneralPermute CPU implementation based on Christoph's GPT code
In a test code, implemented a padded-cell version of the staple and rectangular-staple calculation
2023-06-20 16:14:25 -04:00
85e35c4da1 fix: added GridLogDebug to BuildSurfaceList debug message. 2023-06-16 10:31:16 +01:00
d72e914cf0 Profiling temporary code until optimised 2023-06-15 10:43:04 -04:00
3b5254e2d5 Optional checkpoint smeared configs for FTHMC 2023-06-15 10:43:04 -04:00
f1c358b596 Additional tests 2023-06-15 10:43:04 -04:00
c0ef210265 Hot start should be properly Hot 2023-06-15 10:43:04 -04:00
e3e1cc1962 Ta project 2023-06-15 10:43:04 -04:00
723eadbb5c Keep methods virtual 2023-06-15 10:43:04 -04:00
e24637ec1e Clean up 2023-06-15 10:43:04 -04:00
8b01ff4ce7 Integrator over to smeared force structure 2023-06-15 10:43:04 -04:00
588197c487 Smeared action virtual class 2023-06-15 10:43:04 -04:00
1352bad2e4 Sunspot compile 2023-06-15 11:22:46 +00:00
ffd7301649 Updated masked / fthmc smeared config container 2023-06-01 06:23:02 -04:00
d2a8494044 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-06-01 06:22:33 -04:00
0982e0d19b Jacobian action wrapper for FTHMC 2023-06-01 06:15:08 -04:00
3badbfc3c1 Refactor the Action and Smeared gauge configuration containers. Add first pass at FTHMC action 2023-06-01 06:14:28 -04:00
5465961e30 New test for FTHMC portion 2023-06-01 06:14:04 -04:00
477b794bc5 fix: unused variable removed. 2023-05-29 14:08:53 +01:00
4835fd1a87 HIP stream synch 2023-05-27 17:58:22 +03:00
6533c25814 Lumi 2023-05-27 16:13:32 +03:00
1b2914ec09 FT-HMC smearing, derivative chain rule, log det and force first pass. 2023-05-22 10:21:37 -04:00
519f795066 Header not liked by gcc on mac? puzzling 2023-05-22 10:21:12 -04:00
4240ad5ca8 Preparing for FTHMC 2023-05-19 21:21:55 -04:00
d418347d86 public for convenience to see rho params 2023-05-19 21:21:05 -04:00
29a4bfe5e5 Clean up 2023-05-19 21:20:45 -04:00
9955bf9daf Regresses to Qlat 2023-05-19 17:32:13 -04:00
876c8f4478 Nodes on padded cell 2023-05-11 12:35:49 -04:00
9c8750f261 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-05-11 12:29:09 -04:00
91efd08179 Option for Qlat generator basis 2023-05-11 12:27:45 -04:00
9953511b65 Mac compile 2023-05-11 12:27:29 -04:00
025fa9991a For FTHMC 2023-05-11 12:26:14 -04:00
e8c60c355b Padded cell code 2023-05-11 12:25:50 -04:00
6c9c7f9d85 Permute fix 2023-05-11 12:24:21 -04:00
f534523ede Debug 2023-05-11 12:23:11 -04:00
1b8a834beb Debug 2023-05-11 12:22:24 -04:00
3aa43e6065 Debug info 2023-04-20 14:21:13 -04:00
78ac4044ff HMC 2023-04-20 13:28:07 -04:00
119c3db47f Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-18 15:13:16 -04:00
21bbdb8fc2 Crusher 2023-04-18 15:11:16 -04:00
739bd7572c Example code 2023-04-17 21:51:55 +00:00
074627a5bd Pass file descriptors through AF_UNIX for level_zero 2023-04-17 21:50:52 +00:00
6a23b2c599 Drop UVM 2023-04-17 21:49:58 +00:00
bd891fb3f5 tests to compile 2023-04-12 18:32:44 -04:00
3984265851 Merge pull request #432 from paboyle/hotfix/nvcc-warnings
Unused statements generating warnings removed
2023-04-12 16:59:02 -04:00
45361d188f Merge pull request #427 from fjosw/feat/bug_report_issue_template
Feat/bug report issue template
2023-04-12 16:58:41 -04:00
80c9d77e02 Merge pull request #433 from paboyle/hotfix/virtual-dtor
Virtual destructor for LinearOperator
2023-04-12 16:56:18 -04:00
3aff64dddb Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-11 12:19:15 -07:00
b4f2ca81ff Copy queue and compute queue same as better concurrency 2023-04-11 12:18:21 -07:00
d1dea5f840 New driver 2023-04-11 12:16:52 -07:00
54f8b84d16 Fence 2023-04-11 12:16:08 -07:00
da503fef0e Name change on barrier routine 2023-04-11 12:14:04 -07:00
4a6802098a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2023-04-07 15:43:28 -04:00
f9b41a84d2 Trajectory runs to completion on Crusher within wall clock time 2023-04-07 15:42:45 -04:00
5d7e0d18b9 virtual destructor for LinearOperator 2023-04-07 14:30:38 +01:00
86dac5ff4f Better printing 2023-04-04 07:42:19 -07:00
4a382fad3f Use distinct SYCL queue for copies 2023-04-04 07:41:41 -07:00
cc753670d9 Barrier elimination, surface list build 2023-04-04 07:39:14 -07:00
cc9d88ea1c Fence changes and EXT kernel loop cout reduction 2023-04-04 07:37:23 -07:00
b281b0166e Put the barrier in the subroutine 2023-04-04 07:36:03 -07:00
6a21f694ff Apply barrier in Gather kernel sequence.
Could place before comms, or in Gather, but decided to insist Gather means Gather is done
2023-04-04 07:33:24 -07:00
39214702f6 feat: indentation fixed. 2023-03-28 16:30:34 +02:00
3e4614c63a feat: draft for bug-report issue template added. 2023-03-28 16:24:35 +02:00
ccd21f96ff Plaquette agreeing and moving to final form (slowly) need to optimise 2023-02-01 22:57:44 -05:00
4b90cb8888 First cut passes combining padded cell with general stencil towards fast plaquette and staggered force 2023-02-01 22:14:10 -05:00
65 changed files with 4646 additions and 348 deletions

54
.github/ISSUE_TEMPLATE/bug-report.yml vendored Normal file
View File

@ -0,0 +1,54 @@
name: Bug report
description: Report a bug.
title: "<insert title>"
labels: [bug]
body:
- type: markdown
attributes:
value: >
Thank you for taking the time to file a bug report.
Please check that the code is pointing to the HEAD of develop
or any commit in master which is tagged with a version number.
- type: textarea
attributes:
label: "Describe the issue:"
description: >
Describe the issue and any previous attempt to solve it.
validations:
required: true
- type: textarea
attributes:
label: "Code example:"
description: >
If relevant, show how to reproduce the issue using a minimal working
example.
placeholder: |
<< your code here >>
render: shell
validations:
required: false
- type: textarea
attributes:
label: "Target platform:"
description: >
Give a description of the target platform (CPU, network, compiler).
Please give the full CPU part description, using for example
`cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
or `sysctl machdep.cpu.brand_string` (macOS) and the full output
the `--version` option of your compiler.
validations:
required: true
- type: textarea
attributes:
label: "Configure options:"
description: >
Please give the exact configure command used and attach
`config.log`, `grid.config.summary` and the output of `make V=1`.
render: shell
validations:
required: true

View File

@ -542,6 +542,7 @@ public:
(*this)(in[i], out[i]);
}
}
virtual ~LinearFunction(){};
};
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {

View File

@ -166,16 +166,16 @@ public:
rsqf[s] =rsq[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
// ps_d[s] = src_d;
precisionChangeFast(ps_f[s],src_d);
precisionChange(ps_f[s],src_d);
}
// r and p for primary
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
r_d = p_d;
//MdagM+m[0]
precisionChangeFast(p_f,p_d);
precisionChange(p_f,p_d);
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
precisionChangeFast(tmp_d,mmp_f);
precisionChange(tmp_d,mmp_f);
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
@ -204,7 +204,7 @@ public:
for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
precisionChangeFast(psi_f[s],psi_d[s]);
precisionChange(psi_f[s],psi_d[s]);
}
///////////////////////////////////////
@ -225,7 +225,7 @@ public:
AXPYTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(r_f, r_d);
precisionChange(r_f, r_d);
PrecChangeTimer.Stop();
AXPYTimer.Start();
@ -243,13 +243,13 @@ public:
cp=c;
PrecChangeTimer.Start();
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
precisionChange(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop();
MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f);
MatrixTimer.Stop();
PrecChangeTimer.Start();
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
precisionChange(mmp_d, mmp_f); // From Float to Double
PrecChangeTimer.Stop();
d=real(innerProduct(p_d,mmp_d));
@ -311,7 +311,7 @@ public:
SolverTimer.Stop();
for(int s=0;s<nshift;s++){
precisionChangeFast(psi_d[s],psi_f[s]);
precisionChange(psi_d[s],psi_f[s]);
}

View File

@ -211,7 +211,7 @@ public:
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
// assert(norm2(tmp_d)< 1.0e-4);
assert(norm2(tmp_d)< 1.0);
axpy(mmp_d,mass[0],p_d,mmp_d);
RealD rn = norm2(p_d);

View File

@ -419,14 +419,15 @@ until convergence
}
}
if ( Nconv < Nstop )
if ( Nconv < Nstop ) {
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
}
eval=eval2;
//Keep only converged
eval.resize(Nconv);// Nstop?
evec.resize(Nconv,grid);// Nstop?
eval.resize(Nstop);// was Nconv
evec.resize(Nstop,grid);// was Nconv
basisSortInPlace(evec,eval,reverse);
}

View File

@ -27,9 +27,10 @@ Author: Christoph Lehner <christoph@lhnr.de>
*************************************************************************************/
/* END LEGAL */
#define Mheader "SharedMemoryMpi: "
#include <Grid/GridCore.h>
#include <pwd.h>
#include <syscall.h>
#ifdef GRID_CUDA
#include <cuda_runtime_api.h>
@ -39,11 +40,118 @@ Author: Christoph Lehner <christoph@lhnr.de>
#endif
#ifdef GRID_SYCL
#define GRID_SYCL_LEVEL_ZERO_IPC
#include <syscall.h>
#define SHM_SOCKETS
#endif
#include <sys/socket.h>
#include <sys/un.h>
NAMESPACE_BEGIN(Grid);
#ifdef SHM_SOCKETS
/*
* Barbaric extra intranode communication route in case we need sockets to pass FDs
* Forced by level_zero not being nicely designed
*/
static int sock;
static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
static char sock_path[256];
class UnixSockets {
public:
static void Open(int rank)
{
int errnum;
sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
unlink(sa_un.sun_path);
if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
perror("bind failure");
exit(EXIT_FAILURE);
}
}
static int RecvFileDescriptor(void)
{
int n;
int fd;
char buf[1];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
char cms[CMSG_SPACE(sizeof(int))];
iov.iov_base = buf;
iov.iov_len = 1;
memset(&msg, 0, sizeof msg);
msg.msg_name = 0;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof cms;
if((n=recvmsg(sock, &msg, 0)) < 0) {
perror("recvmsg failed");
return -1;
}
if(n == 0){
perror("recvmsg returned 0");
return -1;
}
cmsg = CMSG_FIRSTHDR(&msg);
memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
return fd;
}
static void SendFileDescriptor(int fildes,int xmit_to_rank)
{
struct msghdr msg;
struct iovec iov;
struct cmsghdr *cmsg = NULL;
char ctrl[CMSG_SPACE(sizeof(int))];
char data = ' ';
memset(&msg, 0, sizeof(struct msghdr));
memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
iov.iov_base = &data;
iov.iov_len = sizeof(data);
sprintf(sock_path,sock_path_fmt,xmit_to_rank);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
msg.msg_name = (void *)&sa_un;
msg.msg_namelen = sizeof(sa_un);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_controllen = CMSG_SPACE(sizeof(int));
msg.msg_control = ctrl;
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
*((int *) CMSG_DATA(cmsg)) = fildes;
sendmsg(sock, &msg, 0);
};
};
#endif
NAMESPACE_BEGIN(Grid);
#define header "SharedMemoryMpi: "
/*Construct from an MPI communicator*/
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
{
@ -66,8 +174,8 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
if ( WorldRank == 0) {
std::cout << header " World communicator of size " <<WorldSize << std::endl;
std::cout << header " Node communicator of size " <<WorldShmSize << std::endl;
std::cout << Mheader " World communicator of size " <<WorldSize << std::endl;
std::cout << Mheader " Node communicator of size " <<WorldShmSize << std::endl;
}
// WorldShmComm, WorldShmSize, WorldShmRank
@ -344,7 +452,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
#ifdef GRID_MPI3_SHMGET
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
@ -429,7 +537,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
SharedMemoryZero(ShmCommBuf,bytes);
@ -472,7 +580,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
exit(EXIT_FAILURE);
}
if ( WorldRank == 0 ){
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
}
SharedMemoryZero(ShmCommBuf,bytes);
@ -480,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef SHM_SOCKETS
UnixSockets::Open(WorldShmRank);
#endif
for(int r=0;r<WorldShmSize;r++){
MPI_Barrier(WorldShmComm);
#ifndef GRID_MPI3_SHM_NONE
//////////////////////////////////////////////////
// If it is me, pass around the IPC access key
@ -489,24 +602,32 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
void * thisBuf = ShmCommBuf;
if(!Stencil_force_mpi) {
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
typedef struct { int fd; pid_t pid ; } clone_mem_t;
typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
ze_ipc_mem_handle_t ihandle;
clone_mem_t handle;
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
handle.pid = getpid();
memcpy((void *)&handle.ze,(void *)&ihandle,sizeof(ihandle));
#ifdef SHM_SOCKETS
for(int rr=0;rr<WorldShmSize;rr++){
if(rr!=r){
UnixSockets::SendFileDescriptor(handle.fd,rr);
}
}
#endif
}
#endif
#ifdef GRID_CUDA
@ -534,6 +655,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Share this IPC handle across the Shm Comm
//////////////////////////////////////////////////
{
MPI_Barrier(WorldShmComm);
int ierr=MPI_Bcast(&handle,
sizeof(handle),
MPI_BYTE,
@ -549,6 +671,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
int myfd;
#ifdef SHM_SOCKETS
myfd=UnixSockets::RecvFileDescriptor();
#else
std::cout<<"mapping seeking remote pid/fd "
<<handle.pid<<"/"
<<handle.fd<<std::endl;
@ -556,16 +682,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
int myfd = syscall(438,pidfd,handle.fd,0);
std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
myfd = syscall(438,pidfd,handle.fd,0);
int err_t = errno;
if (myfd < 0) {
fprintf(stderr,"pidfd_getfd returned %d errno was %d\n", myfd,err_t); fflush(stderr);
perror("pidfd_getfd failed ");
assert(0);
}
#endif
std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
@ -600,6 +732,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#else
WorldShmCommBufs[r] = ShmCommBuf;
#endif
MPI_Barrier(WorldShmComm);
}
_ShmAllocBytes=bytes;
@ -611,7 +744,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -648,7 +781,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
assert(((uint64_t)ptr&0x3F)==0);
close(fd);
WorldShmCommBufs[r] =ptr;
// std::cout << header "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
// std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
}
_ShmAlloc=1;
_ShmAllocBytes = bytes;
@ -658,7 +791,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
#ifdef GRID_MPI3_SHM_NONE
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -705,7 +838,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
std::cout << header "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
MPI_Barrier(WorldShmComm);

View File

@ -47,3 +47,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h>
#include <Grid/lattice/Lattice_crc.h>
#include <Grid/lattice/PaddedCell.h>

View File

@ -697,8 +697,68 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
for(int d=0;d<nd;d++){
assert(Fg->_processors[d] == Tg->_processors[d]);
}
// the above should guarantee that the operations are local
#if 1
size_t nsite = 1;
for(int i=0;i<nd;i++) nsite *= RegionSize[i];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx, nsite, {
Coordinate from_coor, to_coor;
size_t rem = idx;
for(int i=0;i<nd;i++){
size_t base_i = rem % RegionSize[i]; rem /= RegionSize[i];
from_coor[i] = base_i + FromLowerLeft[i];
to_coor[i] = base_i + ToLowerLeft[i];
}
int foidx = Fg->oIndex(from_coor);
int fiidx = Fg->iIndex(from_coor);
int toidx = Tg->oIndex(to_coor);
int tiidx = Tg->iIndex(to_coor);
int* tt = table + 4*idx;
tt[0] = foidx;
tt[1] = fiidx;
tt[2] = toidx;
tt[3] = tiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(from_v,From,AcceleratorRead);
autoView(to_v,To,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&from_v[from_oidx];
vector_type* to = (vector_type *)&to_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
Coordinate ldf = Fg->_ldimensions;
Coordinate rdf = Fg->_rdimensions;
Coordinate isf = Fg->_istride;
@ -707,9 +767,9 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride;
autoView( t_v , To, AcceleratorWrite);
autoView( f_v , From, AcceleratorRead);
accelerator_for(idx,Fg->lSites(),1,{
autoView( t_v , To, CpuWrite);
autoView( f_v , From, CpuRead);
thread_for(idx,Fg->lSites(),{
sobj s;
Coordinate Fcoor(nd);
Coordinate Tcoor(nd);
@ -722,17 +782,24 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
}
if (in_region) {
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
vector_type * fp = (vector_type *)&f_v[odx_f];
vector_type * tp = (vector_type *)&t_v[odx_t];
#if 0
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); // inner index from
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); // inner index to
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); // outer index from
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); // outer index to
scalar_type * fp = (scalar_type *)&f_v[odx_f];
scalar_type * tp = (scalar_type *)&t_v[odx_t];
for(int w=0;w<words;w++){
tp[w].putlane(fp[w].getlane(idx_f),idx_t);
}
#else
peekLocalSite(s,f_v,Fcoor);
pokeLocalSite(s,t_v,Tcoor);
#endif
}
});
#endif
}
@ -825,6 +892,8 @@ void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slic
}
//Insert subvolume orthogonal to direction 'orthog' with slice index 'slice_lo' from 'lowDim' onto slice index 'slice_hi' of higherDim
//The local dimensions of both 'lowDim' and 'higherDim' orthogonal to 'orthog' should be the same
template<class vobj>
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
@ -841,11 +910,70 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
#if 1
size_t nsite = lg->lSites()/lg->LocalDimensions()[orthog];
size_t tbytes = 4*nsite*sizeof(int);
int *table = (int*)malloc(tbytes);
thread_for(idx,nsite,{
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lcoor[orthog] = slice_lo;
hcoor[orthog] = slice_hi;
size_t rem = idx;
for(int mu=0;mu<nl;mu++){
if(mu != orthog){
int xmu = rem % lg->LocalDimensions()[mu]; rem /= lg->LocalDimensions()[mu];
lcoor[mu] = hcoor[mu] = xmu;
}
}
int loidx = lg->oIndex(lcoor);
int liidx = lg->iIndex(lcoor);
int hoidx = hg->oIndex(hcoor);
int hiidx = hg->iIndex(hcoor);
int* tt = table + 4*idx;
tt[0] = loidx;
tt[1] = liidx;
tt[2] = hoidx;
tt[3] = hiidx;
});
int* table_d = (int*)acceleratorAllocDevice(tbytes);
acceleratorCopyToDevice(table,table_d,tbytes);
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
autoView(lowDim_v,lowDim,AcceleratorRead);
autoView(higherDim_v,higherDim,AcceleratorWrite);
accelerator_for(idx,nsite,1,{
static const int words=sizeof(vobj)/sizeof(vector_type);
int* tt = table_d + 4*idx;
int from_oidx = *tt++;
int from_lane = *tt++;
int to_oidx = *tt++;
int to_lane = *tt;
const vector_type* from = (const vector_type *)&lowDim_v[from_oidx];
vector_type* to = (vector_type *)&higherDim_v[to_oidx];
scalar_type stmp;
for(int w=0;w<words;w++){
stmp = getlane(from[w], from_lane);
putlane(to[w], stmp, to_lane);
}
});
acceleratorFreeDevice(table_d);
free(table);
#else
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuRead);
autoView(higherDimv,higherDim,CpuWrite);
@ -861,6 +989,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
pokeLocalSite(s,higherDimv,hcoor);
}
});
#endif
}

174
Grid/lattice/PaddedCell.h Normal file
View File

@ -0,0 +1,174 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/PaddedCell.h
Copyright (C) 2019
Author: Peter Boyle pboyle@bnl.gov
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include<Grid/cshift/Cshift.h>
NAMESPACE_BEGIN(Grid);
//Allow the user to specify how the C-shift is performed, e.g. to respect the appropriate boundary conditions
template<typename vobj>
struct CshiftImplBase{
virtual Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const = 0;
virtual ~CshiftImplBase(){}
};
template<typename vobj>
struct CshiftImplDefault: public CshiftImplBase<vobj>{
Lattice<vobj> Cshift(const Lattice<vobj> &in, int dir, int shift) const override{ return Grid::Cshift(in,dir,shift); }
};
template<typename Gimpl>
struct CshiftImplGauge: public CshiftImplBase<typename Gimpl::GaugeLinkField::vector_object>{
typename Gimpl::GaugeLinkField Cshift(const typename Gimpl::GaugeLinkField &in, int dir, int shift) const override{ return Gimpl::CshiftLink(in,dir,shift); }
};
class PaddedCell {
public:
GridCartesian * unpadded_grid;
int dims;
int depth;
std::vector<GridCartesian *> grids;
~PaddedCell()
{
DeleteGrids();
}
PaddedCell(int _depth,GridCartesian *_grid)
{
unpadded_grid = _grid;
depth=_depth;
dims=_grid->Nd();
AllocateGrids();
Coordinate local =unpadded_grid->LocalDimensions();
for(int d=0;d<dims;d++){
assert(local[d]>=depth);
}
}
void DeleteGrids(void)
{
for(int d=0;d<grids.size();d++){
delete grids[d];
}
grids.resize(0);
};
void AllocateGrids(void)
{
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate simd =unpadded_grid->_simd_layout;
Coordinate processors=unpadded_grid->_processors;
Coordinate plocal =unpadded_grid->LocalDimensions();
Coordinate global(dims);
// expand up one dim at a time
for(int d=0;d<dims;d++){
plocal[d] += 2*depth;
for(int d=0;d<dims;d++){
global[d] = plocal[d]*processors[d];
}
grids.push_back(new GridCartesian(global,simd,processors));
}
};
template<class vobj>
inline Lattice<vobj> Extract(const Lattice<vobj> &in) const
{
Lattice<vobj> out(unpadded_grid);
Coordinate local =unpadded_grid->LocalDimensions();
Coordinate fll(dims,depth); // depends on the MPI spread
Coordinate tll(dims,0); // depends on the MPI spread
localCopyRegion(in,out,fll,tll,local);
return out;
}
template<class vobj>
inline Lattice<vobj> Exchange(const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
int dims = old_grid->Nd();
Lattice<vobj> tmp = in;
for(int d=0;d<dims;d++){
tmp = Expand(d,tmp,cshift); // rvalue && assignment
}
return tmp;
}
// expand up one dim at a time
template<class vobj>
inline Lattice<vobj> Expand(int dim, const Lattice<vobj> &in, const CshiftImplBase<vobj> &cshift = CshiftImplDefault<vobj>()) const
{
GridBase *old_grid = in.Grid();
GridCartesian *new_grid = grids[dim];//These are new grids
Lattice<vobj> padded(new_grid);
Lattice<vobj> shifted(old_grid);
Coordinate local =old_grid->LocalDimensions();
Coordinate plocal =new_grid->LocalDimensions();
if(dim==0) conformable(old_grid,unpadded_grid);
else conformable(old_grid,grids[dim-1]);
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
double tins=0, tshift=0;
// Middle bit
double t = usecond();
for(int x=0;x<local[dim];x++){
InsertSliceLocal(in,padded,x,depth+x,dim);
}
tins += usecond() - t;
// High bit
t = usecond();
shifted = cshift.Cshift(in,dim,depth);
tshift += usecond() - t;
t=usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
}
tins += usecond() - t;
// Low bit
t = usecond();
shifted = cshift.Cshift(in,dim,-depth);
tshift += usecond() - t;
t = usecond();
for(int x=0;x<depth;x++){
InsertSliceLocal(shifted,padded,x,x,dim);
}
tins += usecond() - t;
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
return padded;
}
};
NAMESPACE_END(Grid);

View File

@ -104,6 +104,7 @@ template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iSca
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
template<typename vtype> using iLorentzComplex = iVector<iScalar<iScalar<vtype> >, Nd > ;
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
@ -178,6 +179,15 @@ typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
// LorentzComplex
typedef iLorentzComplex<Complex > LorentzComplex;
typedef iLorentzComplex<ComplexF > LorentzComplexF;
typedef iLorentzComplex<ComplexD > LorentzComplexD;
typedef iLorentzComplex<vComplex > vLorentzComplex;
typedef iLorentzComplex<vComplexF> vLorentzComplexF;
typedef iLorentzComplex<vComplexD> vLorentzComplexD;
// DoubleStored gauge field
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
@ -307,6 +317,10 @@ typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
typedef Lattice<vLorentzComplex> LatticeLorentzComplex;
typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF;
typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD;
// DoubleStored gauge field
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;

View File

@ -34,10 +34,24 @@ directory
NAMESPACE_BEGIN(Grid);
///////////////////////////////////
// Smart configuration base class
///////////////////////////////////
template< class Field >
class ConfigurationBase
{
public:
ConfigurationBase() {}
virtual ~ConfigurationBase() {}
virtual void set_Field(Field& U) =0;
virtual void smeared_force(Field&) = 0;
virtual Field& get_SmearedU() =0;
virtual Field &get_U(bool smeared = false) = 0;
};
template <class GaugeField >
class Action
{
public:
bool is_smeared = false;
RealD deriv_norm_sum;
@ -77,11 +91,39 @@ public:
void refresh_timer_stop(void) { refresh_us+=usecond(); }
void S_timer_start(void) { S_us-=usecond(); }
void S_timer_stop(void) { S_us+=usecond(); }
/////////////////////////////
// Heatbath?
/////////////////////////////
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
/////////////////////////////////////////////////////////////
// virtual smeared interface through configuration container
/////////////////////////////////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
refresh(U.get_U(is_smeared),sRNG,pRNG);
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
return S(U.get_U(is_smeared));
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return Sinitial(U.get_U(is_smeared));
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
deriv(U.get_U(is_smeared),dSdU);
if ( is_smeared ) {
U.smeared_force(dSdU);
}
}
///////////////////////////////
// Logging
///////////////////////////////
virtual std::string action_name() = 0; // return the action name
virtual std::string LogParameters() = 0; // prints action parameters
virtual ~Action(){}

View File

@ -30,6 +30,8 @@ directory
#ifndef QCD_ACTION_CORE
#define QCD_ACTION_CORE
#include <Grid/qcd/action/gauge/GaugeImplementations.h>
#include <Grid/qcd/action/ActionBase.h>
NAMESPACE_CHECK(ActionBase);
#include <Grid/qcd/action/ActionSet.h>

View File

@ -507,6 +507,7 @@ public:
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
}
};

View File

@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
/////////////////////////////
{
GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor);
accelerator_barrier();
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
}
std::vector<std::vector<CommsRequest_t> > requests;

View File

@ -423,14 +423,14 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = ss/Ls; \
int sU = sF/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
}); \
accelerator_barrier();
#define ASM_CALL(A) \
thread_for( sss, Nsite, { \
@ -474,9 +474,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
#endif
} else if( exterior ) {
// dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
#endif
@ -506,9 +507,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif
} else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif

View File

@ -176,7 +176,7 @@ public:
return PeriodicBC::CshiftLink(Link,mu,shift);
}
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline void setDirections(const std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
static inline std::vector<int> getDirections(void) { return _conjDirs; }
static inline bool isPeriodicGaugeField(void) { return false; }
};

View File

@ -43,7 +43,7 @@ public:
private:
RealD c_plaq;
RealD c_rect;
typename WilsonLoops<Gimpl>::StapleAndRectStapleAllWorkspace workspace;
public:
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
@ -79,27 +79,18 @@ public:
GridBase *grid = Umu.Grid();
std::vector<GaugeLinkField> U (Nd,grid);
std::vector<GaugeLinkField> U2(Nd,grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
}
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
GaugeLinkField dSdU_mu(grid);
GaugeLinkField staple(grid);
for (int mu=0; mu < Nd; mu++){
// Staple in direction mu
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
dSdU_mu = Ta(U[mu]*staple)*factor_p;
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}

View File

@ -53,9 +53,10 @@ NAMESPACE_BEGIN(Grid);
Integer ReliableUpdateFreq;
protected:
//Action evaluation
//Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
#if 0
#if 1
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
msCG(schurOp,in, out);
@ -70,9 +71,10 @@ NAMESPACE_BEGIN(Grid);
msCG(schurOpD, in, out);
#endif
}
//Force evaluation
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
@ -84,20 +86,15 @@ NAMESPACE_BEGIN(Grid);
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid());
precisionChange(Uf, Ud);
precisionChange(Ud2, Ud);
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl;
NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf);
NumOpD.ImportGauge(Ud2);
DenOpD.ImportGauge(Ud2);
}
public:

View File

@ -207,20 +207,27 @@ NAMESPACE_BEGIN(Grid);
//X = (Mdag M)^-1 V^dag phi
//Y = (Mdag)^-1 V^dag phi
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
X=Zero();
DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
std::cout << GridLogMessage <<" X "<<norm2(X)<<std::endl;
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
// phi^dag V (Mdag M)^-1 dV^dag phi
Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU = force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// phi^dag dV (Mdag M)^-1 V^dag phi
Vpc.MpcDeriv(force , PhiOdd, X ); dSdU = dSdU+force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi
// - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi
Mpc.MpcDeriv(force,Y,X); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
Mpc.MpcDagDeriv(force,X,Y); dSdU = dSdU-force;
std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
// FIXME No force contribution from EvenEven assumed here
// Needs a fix for clover.

View File

@ -283,12 +283,13 @@ public:
std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
TheIntegrator.print_timer();
TheIntegrator.Smearer.set_Field(Ucur);
for (int obs = 0; obs < Observables.size(); obs++) {
std::cout << GridLogDebug << "Observables # " << obs << std::endl;
std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl;
std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
Observables[obs]->TrajectoryComplete(traj + 1, TheIntegrator.Smearer, sRNG, pRNG);
}
std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
}

View File

@ -35,13 +35,16 @@ class CheckpointerParameters : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters,
std::string, config_prefix,
std::string, smeared_prefix,
std::string, rng_prefix,
int, saveInterval,
bool, saveSmeared,
std::string, format, );
CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
CheckpointerParameters(std::string cf = "cfg", std::string sf="cfg_smr" , std::string rn = "rng",
int savemodulo = 1, const std::string &f = "IEEE64BIG")
: config_prefix(cf),
smeared_prefix(sf),
rng_prefix(rn),
saveInterval(savemodulo),
format(f){};
@ -61,13 +64,21 @@ template <class Impl>
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
public:
void build_filenames(int traj, CheckpointerParameters &Params,
std::string &conf_file, std::string &rng_file) {
std::string &conf_file,
std::string &smear_file,
std::string &rng_file) {
{
std::ostringstream os;
os << Params.rng_prefix << "." << traj;
rng_file = os.str();
}
{
std::ostringstream os;
os << Params.smeared_prefix << "." << traj;
smear_file = os.str();
}
{
std::ostringstream os;
os << Params.config_prefix << "." << traj;
@ -84,6 +95,11 @@ public:
}
virtual void initialize(const CheckpointerParameters &Params) = 0;
virtual void TrajectoryComplete(int traj,
typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) { assert(0); } ; // HMC should pass the smart config with smeared and unsmeared
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) = 0;

View File

@ -61,11 +61,14 @@ public:
fout.close();
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG, GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
uint32_t nersc_csum;
uint32_t scidac_csuma;
@ -74,9 +77,15 @@ public:
BinarySimpleUnmunger<sobj_double, sobj> munge;
truncate(rng);
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
truncate(config);
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
truncate(config);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(false), config, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Configuration " << config
@ -85,6 +94,18 @@ public:
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
truncate(smr);
BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(true), smr, munge, 0, Params.format,
nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written Binary Smeared Configuration " << smr
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@ -69,17 +69,27 @@ public:
}
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
std::cout << GridLogMessage << "Written BINARY RNG " << rng
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(config);
_IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(false), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
@ -88,6 +98,21 @@ public:
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
if ( Params.saveSmeared ) {
IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(smr);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, config, config);
_IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << smr
<< " checksum " << std::hex
<< nersc_csum<<"/"
<< scidac_csuma<<"/"
<< scidac_csumb
<< std::dec << std::endl;
}
}
};

View File

@ -52,23 +52,29 @@ public:
Params.format = "IEEE64BIG"; // fixed, overwrite any other choice
}
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
virtual void TrajectoryComplete(int traj,
ConfigurationBase<GaugeField> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng);
int precision32 = 1;
int tworow = 0;
NerscIO::writeRNGState(sRNG, pRNG, rng);
NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(false), config, tworow, precision32);
if ( Params.saveSmeared ) {
NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(true), smr, tworow, precision32);
}
}
};
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
std::string config, rng, smr;
this->build_filenames(traj, Params, config, smr, rng );
this->check_filename(rng);
this->check_filename(config);

View File

@ -70,19 +70,37 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
}
}
void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) {
std::string config, rng;
this->build_filenames(traj, Params, config, rng);
GridBase *grid = U.Grid();
std::string config, rng,smr;
this->build_filenames(traj, Params, config, smr, rng);
GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(U, MData);
_ScidacWriter.close();
std::cout << GridLogMessage << "Written Binary RNG " << rng
<< " checksum " << std::hex
<< nersc_csum <<"/"
<< scidac_csuma <<"/"
<< scidac_csumb
<< std::dec << std::endl;
{
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(config);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(false), MData);
_ScidacWriter.close();
}
if ( Params.saveSmeared ) {
ScidacWriter _ScidacWriter(grid->IsBoss());
_ScidacWriter.open(smr);
_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(true), MData);
_ScidacWriter.close();
}
std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
}
};

View File

@ -66,6 +66,7 @@ public:
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
public:
typedef FieldImplementation_ FieldImplementation;
typedef typename FieldImplementation::Field MomentaField; //for readability
typedef typename FieldImplementation::Field Field;
@ -96,7 +97,6 @@ protected:
{
t_P[level] += ep;
update_P(P, U, level, ep);
std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
}
@ -130,28 +130,20 @@ protected:
Field force(U.Grid());
conformable(U.Grid(), Mom.Grid());
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] before"<<std::endl;
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv(Smearer, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] after"<<std::endl;
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = FieldImplementation::projectForce(force); // Ta for gauge fields
double end_force = usecond();
// DumpSliceNorm("force ",force,Nd-1);
MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<< std::endl;
DumpSliceNorm("force filtered ",force,Nd-1);
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
@ -377,14 +369,9 @@ public:
auto name = as[level].actions.at(actionID)->action_name();
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] before"<<std::endl;
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh(Smearer, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] after"<<std::endl;
}
@ -425,10 +412,9 @@ public:
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Us);
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
@ -469,12 +455,11 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->Sinitial(Us);
as[level].actions.at(actionID)->S_timer_stop();
as[level].actions.at(actionID)->S_timer_start();
Hterm = as[level].actions.at(actionID)->S(Smearer);
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;

View File

@ -34,6 +34,13 @@ NAMESPACE_BEGIN(Grid);
template <class Field>
class HmcObservable {
public:
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
};
virtual void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@ -42,6 +42,18 @@ public:
// necessary for HmcObservable compatibility
typedef typename Impl::Field Field;
virtual void TrajectoryComplete(int traj,
ConfigurationBase<Field> &SmartConfig,
GridSerialRNG &sRNG,
GridParallelRNG &pRNG)
{
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << "Unsmeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "Smeared plaquette"<<std::endl;
TrajectoryComplete(traj,SmartConfig.get_U(true),sRNG,pRNG); // Unsmeared observable
std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
};
void TrajectoryComplete(int traj,
Field &U,
GridSerialRNG &sRNG,

View File

@ -7,26 +7,27 @@
NAMESPACE_BEGIN(Grid);
//trivial class for no smearing
template< class Impl >
class NoSmearing
class NoSmearing : public ConfigurationBase<typename Impl::Field>
{
public:
INHERIT_FIELD_TYPES(Impl);
Field* ThinField;
Field* ThinLinks;
NoSmearing(): ThinField(NULL) {}
NoSmearing(): ThinLinks(NULL) {}
void set_Field(Field& U) { ThinField = &U; }
virtual void set_Field(Field& U) { ThinLinks = &U; }
void smeared_force(Field&) const {}
virtual void smeared_force(Field&) {}
Field& get_SmearedU() { return *ThinField; }
virtual Field& get_SmearedU() { return *ThinLinks; }
Field &get_U(bool smeared = false)
virtual Field &get_U(bool smeared = false)
{
return *ThinField;
return *ThinLinks;
}
};
@ -42,19 +43,24 @@ public:
It stores a list of smeared configurations.
*/
template <class Gimpl>
class SmearedConfiguration
class SmearedConfiguration : public ConfigurationBase<typename Gimpl::Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
protected:
const unsigned int smearingLevels;
Smear_Stout<Gimpl> *StoutSmearing;
std::vector<GaugeField> SmearedSet;
public:
GaugeField* ThinLinks; /* Pointer to the thin links configuration */ // move to base???
protected:
// Member functions
//====================================================================
void fill_smearedSet(GaugeField &U)
// Overridden in masked version
virtual void fill_smearedSet(GaugeField &U)
{
ThinLinks = &U; // attach the smearing routine to the field U
@ -82,9 +88,10 @@ private:
}
}
}
//====================================================================
GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
//overridden in masked verson
virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK) const
{
GridBase* grid = GaugeK.Grid();
GaugeField C(grid), SigmaK(grid), iLambda(grid);
@ -213,8 +220,6 @@ private:
//====================================================================
public:
GaugeField*
ThinLinks; /* Pointer to the thin links configuration */
/* Standard constructor */
SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
@ -230,7 +235,7 @@ public:
: smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set
void set_Field(GaugeField &U)
virtual void set_Field(GaugeField &U)
{
double start = usecond();
fill_smearedSet(U);
@ -240,7 +245,7 @@ public:
}
//====================================================================
void smeared_force(GaugeField &SigmaTilde) const
virtual void smeared_force(GaugeField &SigmaTilde)
{
if (smearingLevels > 0)
{
@ -267,14 +272,16 @@ public:
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;
std::cout << GridLogMessage << " GaugeConfiguration: Smeared Force chain rule took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
}
//====================================================================
GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
virtual GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
GaugeField &get_U(bool smeared = false)
virtual GaugeField &get_U(bool smeared = false)
{
// get the config, thin links by default
if (smeared)

View File

@ -0,0 +1,813 @@
/*!
@file GaugeConfiguration.h
@brief Declares the GaugeConfiguration class
*/
#pragma once
NAMESPACE_BEGIN(Grid);
/*!
@brief Smeared configuration masked container
Modified for a multi-subset smearing (aka Luscher Flowed HMC)
*/
template <class Gimpl>
class SmearedConfigurationMasked : public SmearedConfiguration<Gimpl>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
// These live in base class
// const unsigned int smearingLevels;
// Smear_Stout<Gimpl> *StoutSmearing;
// std::vector<GaugeField> SmearedSet;
std::vector<LatticeLorentzComplex> masks;
typedef typename SU3Adjoint::AMatrix AdjMatrix;
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
// Adjoint vector to GaugeField force
void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu)
{
Complex ci(0,1);
GaugeLinkField Fdet_pol(Fdet.Grid());
Fdet_pol=Zero();
for(int e=0;e<8;e++){
ColourMatrix te;
SU3::generator(e, te);
auto tmp=peekColour(Fdet_nu,e);
Fdet_pol=Fdet_pol + ci*tmp*te; // but norm of te is different.. why?
}
pokeLorentz(Fdet, Fdet_pol, nu);
}
void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 )
{
GaugeLinkField UtaU(PlaqL.Grid());
GaugeLinkField D(PlaqL.Grid());
AdjMatrixField Dbc(PlaqL.Grid());
LatticeComplex tmp(PlaqL.Grid());
const int Ngen = SU3Adjoint::Dimension;
Complex ci(0,1);
ColourMatrix ta,tb,tc;
for(int a=0;a<Ngen;a++) {
SU3::generator(a, ta);
// Qlat Tb = 2i Tb^Grid
UtaU= 2.0*ci*adj(PlaqL)*ta*PlaqR;
for(int c=0;c<Ngen;c++) {
SU3::generator(c, tc);
D = Ta( (2.0)*ci*tc *UtaU);
for(int b=0;b<Ngen;b++){
SU3::generator(b, tb);
tmp =-trace(ci*tb*D);
PokeIndex<ColourIndex>(Dbc,tmp,b,c); // Adjoint rep
}
}
tmp = trace(MpInvJx * Dbc);
PokeIndex<ColourIndex>(Fdet2,tmp,a);
}
}
void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd)
{
GaugeLinkField Nx(PlaqL.Grid());
const int Ngen = SU3Adjoint::Dimension;
Complex ci(0,1);
ColourMatrix tb;
ColourMatrix tc;
for(int b=0;b<Ngen;b++) {
SU3::generator(b, tb);
Nx = (2.0)*Ta( adj(PlaqL)*ci*tb * PlaqR );
for(int c=0;c<Ngen;c++) {
SU3::generator(c, tc);
auto tmp =closure( -trace(ci*tc*Nx));
PokeIndex<ColourIndex>(NxAd,tmp,c,b);
}
}
}
void ApplyMask(GaugeField &U,int smr)
{
LatticeComplex tmp(U.Grid());
GaugeLinkField Umu(U.Grid());
for(int mu=0;mu<Nd;mu++){
Umu=PeekIndex<LorentzIndex>(U,mu);
tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
Umu=Umu*tmp;
PokeIndex<LorentzIndex>(U, Umu, mu);
}
}
public:
void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr)
{
GridBase* grid = U.Grid();
ColourMatrix tb;
ColourMatrix tc;
ColourMatrix ta;
GaugeField C(grid);
GaugeField Umsk(grid);
std::vector<GaugeLinkField> Umu(Nd,grid);
GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon
GaugeLinkField Zx(grid); // U times Staple, contains factor of epsilon
GaugeLinkField Nxx(grid); // Nxx fundamental space
GaugeLinkField Utmp(grid);
GaugeLinkField PlaqL(grid);
GaugeLinkField PlaqR(grid);
const int Ngen = SU3Adjoint::Dimension;
AdjMatrix TRb;
ColourMatrix Ident;
LatticeComplex cplx(grid);
AdjVectorField dJdXe_nMpInv(grid);
AdjVectorField dJdXe_nMpInv_y(grid);
AdjMatrixField MpAd(grid); // Mprime luchang's notes
AdjMatrixField MpAdInv(grid); // Mprime inverse
AdjMatrixField NxxAd(grid); // Nxx in adjoint space
AdjMatrixField JxAd(grid);
AdjMatrixField ZxAd(grid);
AdjMatrixField mZxAd(grid);
AdjMatrixField X(grid);
Complex ci(0,1);
RealD t0 = usecond();
Ident = ComplexD(1.0);
for(int d=0;d<Nd;d++){
Umu[d] = peekLorentz(U, d);
}
int mu= (smr/2) %Nd;
////////////////////////////////////////////////////////////////////////////////
// Mask the gauge field
////////////////////////////////////////////////////////////////////////////////
auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
Umsk = U;
ApplyMask(Umsk,smr);
Utmp = peekLorentz(Umsk,mu);
////////////////////////////////////////////////////////////////////////////////
// Retrieve the eps/rho parameter(s) -- could allow all different but not so far
////////////////////////////////////////////////////////////////////////////////
double rho=this->StoutSmearing->SmearRho[1];
int idx=0;
for(int mu=0;mu<4;mu++){
for(int nu=0;nu<4;nu++){
if ( mu!=nu) assert(this->StoutSmearing->SmearRho[idx]==rho);
else assert(this->StoutSmearing->SmearRho[idx]==0.0);
idx++;
}}
//////////////////////////////////////////////////////////////////
// Assemble the N matrix
//////////////////////////////////////////////////////////////////
// Computes ALL the staples -- could compute one only and do it here
RealD time;
time=-usecond();
this->StoutSmearing->BaseSmear(C, U);
Cmu = peekLorentz(C, mu);
//////////////////////////////////////////////////////////////////
// Assemble Luscher exp diff map J matrix
//////////////////////////////////////////////////////////////////
// Ta so Z lives in Lie algabra
Zx = Ta(Cmu * adj(Umu[mu]));
time+=usecond();
std::cout << GridLogMessage << "Z took "<<time<< " us"<<std::endl;
time=-usecond();
// Move Z to the Adjoint Rep == make_adjoint_representation
ZxAd = Zero();
for(int b=0;b<8;b++) {
// Adj group sets traceless antihermitian T's -- Guido, really????
SU3::generator(b, tb); // Fund group sets traceless hermitian T's
SU3Adjoint::generator(b,TRb);
TRb=-TRb;
cplx = 2.0*trace(ci*tb*Zx); // my convention 1/2 delta ba
ZxAd = ZxAd + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
}
time+=usecond();
std::cout << GridLogMessage << "ZxAd took "<<time<< " us"<<std::endl;
//////////////////////////////////////
// J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
//////////////////////////////////////
time=-usecond();
X=1.0;
JxAd = X;
mZxAd = (-1.0)*ZxAd;
RealD kpfac = 1;
for(int k=1;k<12;k++){
X=X*mZxAd;
kpfac = kpfac /(k+1);
JxAd = JxAd + X * kpfac;
}
time+=usecond();
std::cout << GridLogMessage << "Jx took "<<time<< " us"<<std::endl;
//////////////////////////////////////
// dJ(x)/dxe
//////////////////////////////////////
time=-usecond();
std::vector<AdjMatrixField> dJdX; dJdX.resize(8,grid);
AdjMatrixField tbXn(grid);
AdjMatrixField sumXtbX(grid);
AdjMatrixField t2(grid);
AdjMatrixField dt2(grid);
AdjMatrixField t3(grid);
AdjMatrixField dt3(grid);
AdjMatrixField aunit(grid);
for(int b=0;b<8;b++){
aunit = ComplexD(1.0);
SU3Adjoint::generator(b, TRb); //dt2
X = (-1.0)*ZxAd;
t2 = X;
dt2 = TRb;
for (int j = 20; j > 1; --j) {
t3 = t2*(1.0 / (j + 1)) + aunit;
dt3 = dt2*(1.0 / (j + 1));
t2 = X * t3;
dt2 = TRb * t3 + X * dt3;
}
dJdX[b] = -dt2;
}
time+=usecond();
std::cout << GridLogMessage << "dJx took "<<time<< " us"<<std::endl;
/////////////////////////////////////////////////////////////////
// Mask Umu for this link
/////////////////////////////////////////////////////////////////
time=-usecond();
PlaqL = Ident;
PlaqR = Utmp*adj(Cmu);
ComputeNxy(PlaqL,PlaqR,NxxAd);
time+=usecond();
std::cout << GridLogMessage << "ComputeNxy took "<<time<< " us"<<std::endl;
////////////////////////////
// Mab
////////////////////////////
MpAd = Complex(1.0,0.0);
MpAd = MpAd - JxAd * NxxAd;
/////////////////////////
// invert the 8x8
/////////////////////////
time=-usecond();
MpAdInv = Inverse(MpAd);
time+=usecond();
std::cout << GridLogMessage << "MpAdInv took "<<time<< " us"<<std::endl;
RealD t3a = usecond();
/////////////////////////////////////////////////////////////////
// Nxx Mp^-1
/////////////////////////////////////////////////////////////////
AdjVectorField FdetV(grid);
AdjVectorField Fdet1_nu(grid);
AdjVectorField Fdet2_nu(grid);
AdjVectorField Fdet2_mu(grid);
AdjVectorField Fdet1_mu(grid);
AdjMatrixField nMpInv(grid);
nMpInv= NxxAd *MpAdInv;
AdjMatrixField MpInvJx(grid);
AdjMatrixField MpInvJx_nu(grid);
MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
Fdet2_mu=FdetV;
Fdet1_mu=Zero();
for(int e =0 ; e<8 ; e++){
LatticeComplexD tr(grid);
ColourMatrix te;
SU3::generator(e, te);
tr = trace(dJdX[e] * nMpInv);
pokeColour(dJdXe_nMpInv,tr,e);
}
///////////////////////////////
// Mask it off
///////////////////////////////
auto tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
dJdXe_nMpInv = dJdXe_nMpInv*tmp;
// dJdXe_nMpInv needs to multiply:
// Nxx_mu (site local) (1)
// Nxy_mu one site forward in each nu direction (3)
// Nxy_mu one site backward in each nu direction (3)
// Nxy_nu 0,0 ; +mu,0; 0,-nu; +mu-nu [ 3x4 = 12]
// 19 terms.
AdjMatrixField Nxy(grid);
GaugeField Fdet1(grid);
GaugeField Fdet2(grid);
GaugeLinkField Fdet_pol(grid); // one polarisation
RealD t4 = usecond();
for(int nu=0;nu<Nd;nu++){
if (nu!=mu) {
///////////////// +ve nu /////////////////
// __
// | |
// x== // nu polarisation -- clockwise
time=-usecond();
PlaqL=Ident;
PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftBackward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu))));
time+=usecond();
std::cout << GridLogMessage << "PlaqLR took "<<time<< " us"<<std::endl;
time=-usecond();
dJdXe_nMpInv_y = dJdXe_nMpInv;
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = transpose(Nxy)*dJdXe_nMpInv_y;
time+=usecond();
std::cout << GridLogMessage << "ComputeNxy (occurs 6x) took "<<time<< " us"<<std::endl;
time=-usecond();
PlaqR=(-1.0)*PlaqR;
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
Fdet2_nu = FdetV;
time+=usecond();
std::cout << GridLogMessage << "Compute_MpInvJx_dNxxSy (occurs 6x) took "<<time<< " us"<<std::endl;
// x==
// | |
// .__| // nu polarisation -- anticlockwise
PlaqR=(rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftBackward(Umu[mu], mu,
Gimpl::CovShiftIdentityBackward(Umu[nu], nu)));
PlaqL=Gimpl::CovShiftIdentityBackward(Utmp, mu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
ComputeNxy(PlaqL, PlaqR,Nxy);
Fdet1_nu = Fdet1_nu+transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,mu,-1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
///////////////// -ve nu /////////////////
// __
// | |
// x== // nu polarisation -- clockwise
PlaqL=(rho)* Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR = Gimpl::CovShiftIdentityForward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
// x==
// | |
// |__| // nu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu));
PlaqR=Gimpl::CovShiftBackward(Umu[mu], mu,
Gimpl::CovShiftIdentityForward(Umu[nu], nu));
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv_y,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,mu,-1);
MpInvJx_nu = Cshift(MpInvJx_nu,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_nu = Fdet2_nu+FdetV;
/////////////////////////////////////////////////////////////////////
// Set up the determinant force contribution in 3x3 algebra basis
/////////////////////////////////////////////////////////////////////
InsertForce(Fdet1,Fdet1_nu,nu);
InsertForce(Fdet2,Fdet2_nu,nu);
//////////////////////////////////////////////////
// Parallel direction terms
//////////////////////////////////////////////////
// __
// | "
// |__"x // mu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftBackward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,-1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_mu = Fdet2_mu+FdetV;
// __
// " |
// x__| // mu polarisation
PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
Gimpl::CovShiftForward(Umu[nu], nu,
Gimpl::CovShiftIdentityBackward(Utmp, mu)));
PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu);
dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
ComputeNxy(PlaqL,PlaqR,Nxy);
Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
MpInvJx_nu = Cshift(MpInvJx,nu,1);
Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
Fdet2_mu = Fdet2_mu+FdetV;
}
}
RealD t5 = usecond();
Fdet1_mu = Fdet1_mu + transpose(NxxAd)*dJdXe_nMpInv;
InsertForce(Fdet1,Fdet1_mu,mu);
InsertForce(Fdet2,Fdet2_mu,mu);
force= (-0.5)*( Fdet1 + Fdet2);
RealD t1 = usecond();
std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t3-t0 "<<t3a-t0<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t4-t3 dJdXe_nMpInv "<<t4-t3a<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t5-t4 mu nu loop "<<t5-t4<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce t1-t5 "<<t1-t5<<" us "<<std::endl;
std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
}
RealD logDetJacobianLevel(const GaugeField &U,int smr)
{
GridBase* grid = U.Grid();
GaugeField C(grid);
GaugeLinkField Nb(grid);
GaugeLinkField Z(grid);
GaugeLinkField Umu(grid), Cmu(grid);
ColourMatrix Tb;
ColourMatrix Tc;
typedef typename SU3Adjoint::AMatrix AdjMatrix;
typedef typename SU3Adjoint::LatticeAdjMatrix AdjMatrixField;
typedef typename SU3Adjoint::LatticeAdjVector AdjVectorField;
const int Ngen = SU3Adjoint::Dimension;
AdjMatrix TRb;
LatticeComplex cplx(grid);
AdjVectorField AlgV(grid);
AdjMatrixField Mab(grid);
AdjMatrixField Ncb(grid);
AdjMatrixField Jac(grid);
AdjMatrixField Zac(grid);
AdjMatrixField mZac(grid);
AdjMatrixField X(grid);
int mu= (smr/2) %Nd;
auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
//////////////////////////////////////////////////////////////////
// Assemble the N matrix
//////////////////////////////////////////////////////////////////
// Computes ALL the staples -- could compute one only here
this->StoutSmearing->BaseSmear(C, U);
Cmu = peekLorentz(C, mu);
Umu = peekLorentz(U, mu);
Complex ci(0,1);
for(int b=0;b<Ngen;b++) {
SU3::generator(b, Tb);
// Qlat Tb = 2i Tb^Grid
Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu));
for(int c=0;c<Ngen;c++) {
SU3::generator(c, Tc);
auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important
PokeIndex<ColourIndex>(Ncb,tmp,c,b);
}
}
//////////////////////////////////////////////////////////////////
// Assemble Luscher exp diff map J matrix
//////////////////////////////////////////////////////////////////
// Ta so Z lives in Lie algabra
Z = Ta(Cmu * adj(Umu));
// Move Z to the Adjoint Rep == make_adjoint_representation
Zac = Zero();
for(int b=0;b<8;b++) {
// Adj group sets traceless antihermitian T's -- Guido, really????
// Is the mapping of these the same? Same structure constants
// Might never have been checked.
SU3::generator(b, Tb); // Fund group sets traceless hermitian T's
SU3Adjoint::generator(b,TRb);
TRb=-TRb;
cplx = 2.0*trace(ci*Tb*Z); // my convention 1/2 delta ba
Zac = Zac + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
}
//////////////////////////////////////
// J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
//////////////////////////////////////
X=1.0;
Jac = X;
mZac = (-1.0)*Zac;
RealD kpfac = 1;
for(int k=1;k<12;k++){
X=X*mZac;
kpfac = kpfac /(k+1);
Jac = Jac + X * kpfac;
}
////////////////////////////
// Mab
////////////////////////////
Mab = Complex(1.0,0.0);
Mab = Mab - Jac * Ncb;
////////////////////////////
// det
////////////////////////////
LatticeComplex det(grid);
det = Determinant(Mab);
////////////////////////////
// ln det
////////////////////////////
LatticeComplex ln_det(grid);
ln_det = log(det);
////////////////////////////
// Masked sum
////////////////////////////
ln_det = ln_det * mask;
Complex result = sum(ln_det);
return result.real();
}
public:
RealD logDetJacobian(void)
{
RealD ln_det = 0;
if (this->smearingLevels > 0)
{
double start = usecond();
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr);
}
ln_det +=logDetJacobianLevel(*(this->ThinLinks),0);
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl;
}
return ln_det;
}
void logDetJacobianForce(GaugeField &force)
{
force =Zero();
GaugeField force_det(force.Grid());
if (this->smearingLevels > 0)
{
double start = usecond();
GaugeLinkField tmp_mu(force.Grid());
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
// remove U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
// Propagate existing force
force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr);
// Add back U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
// Get this levels determinant force
force_det = Zero();
logDetJacobianForceLevel(this->get_smeared_conf(ismr-1),force_det,ismr);
// Sum the contributions
force = force + force_det;
}
// remove U in UdSdU...
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = adj(peekLorentz(this->get_smeared_conf(0), mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
for (int mu = 0; mu < Nd; mu++) {
tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
force_det = Zero();
logDetJacobianForceLevel(*this->ThinLinks,force_det,0);
force = force + force_det;
force=Ta(force); // Ta
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: lnDetJacobianForce took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
}
private:
//====================================================================
// Override base clas here to mask it
virtual void fill_smearedSet(GaugeField &U)
{
this->ThinLinks = &U; // attach the smearing routine to the field U
// check the pointer is not null
if (this->ThinLinks == NULL)
std::cout << GridLogError << "[SmearedConfigurationMasked] Error in ThinLinks pointer\n";
if (this->smearingLevels > 0)
{
std::cout << GridLogMessage << "[SmearedConfigurationMasked] Filling SmearedSet\n";
GaugeField previous_u(this->ThinLinks->Grid());
GaugeField smeared_A(this->ThinLinks->Grid());
GaugeField smeared_B(this->ThinLinks->Grid());
previous_u = *this->ThinLinks;
double start = usecond();
for (int smearLvl = 0; smearLvl < this->smearingLevels; ++smearLvl)
{
this->StoutSmearing->smear(smeared_A, previous_u);
ApplyMask(smeared_A,smearLvl);
smeared_B = previous_u;
ApplyMask(smeared_B,smearLvl);
// Replace only the masked portion
this->SmearedSet[smearLvl] = previous_u-smeared_B + smeared_A;
previous_u = this->SmearedSet[smearLvl];
// For debug purposes
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(previous_u);
std::cout << GridLogMessage << "[SmearedConfigurationMasked] smeared Plaq: " << impl_plaq << std::endl;
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << "GaugeConfigurationMasked: Link smearing took " << time << " ms" << std::endl;
}
}
//====================================================================
// Override base to add masking
virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
const GaugeField& GaugeK,int level)
{
GridBase* grid = GaugeK.Grid();
GaugeField C(grid), SigmaK(grid), iLambda(grid);
GaugeField SigmaKPrimeA(grid);
GaugeField SigmaKPrimeB(grid);
GaugeLinkField iLambda_mu(grid);
GaugeLinkField iQ(grid), e_iQ(grid);
GaugeLinkField SigmaKPrime_mu(grid);
GaugeLinkField GaugeKmu(grid), Cmu(grid);
this->StoutSmearing->BaseSmear(C, GaugeK);
SigmaK = Zero();
iLambda = Zero();
SigmaKPrimeA = SigmaKPrime;
ApplyMask(SigmaKPrimeA,level);
SigmaKPrimeB = SigmaKPrime - SigmaKPrimeA;
// Could get away with computing only one polarisation here
// int mu= (smr/2) %Nd;
// SigmaKprime_A has only one component
for (int mu = 0; mu < Nd; mu++)
{
Cmu = peekLorentz(C, mu);
GaugeKmu = peekLorentz(GaugeK, mu);
SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
iQ = Ta(Cmu * adj(GaugeKmu));
this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
pokeLorentz(iLambda, iLambda_mu, mu);
}
this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK); // derivative of SmearBase
////////////////////////////////////////////////////////////////////////////////////
// propagate the rest of the force as identity map, just add back
////////////////////////////////////////////////////////////////////////////////////
SigmaK = SigmaK+SigmaKPrimeB;
return SigmaK;
}
public:
/* Standard constructor */
SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout<Gimpl>& Stout)
: SmearedConfiguration<Gimpl>(_UGrid, Nsmear,Stout)
{
assert(Nsmear%(2*Nd)==0); // Or multiply by 8??
// was resized in base class
assert(this->SmearedSet.size()==Nsmear);
GridRedBlackCartesian * UrbGrid;
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(_UGrid);
LatticeComplex one(_UGrid); one = ComplexD(1.0,0.0);
LatticeComplex tmp(_UGrid);
for (unsigned int i = 0; i < this->smearingLevels; ++i) {
masks.push_back(*(new LatticeLorentzComplex(_UGrid)));
int mu= (i/2) %Nd;
int cb= (i%2);
LatticeComplex tmpcb(UrbGrid);
masks[i]=Zero();
////////////////////
// Setup the mask
////////////////////
tmp = Zero();
pickCheckerboard(cb,tmpcb,one);
setCheckerboard(tmp,tmpcb);
PokeIndex<LorentzIndex>(masks[i],tmp, mu);
}
delete UrbGrid;
}
virtual void smeared_force(GaugeField &SigmaTilde)
{
if (this->smearingLevels > 0)
{
double start = usecond();
GaugeField force = SigmaTilde; // actually = U*SigmaTilde
GaugeLinkField tmp_mu(SigmaTilde.Grid());
// Remove U from UdSdU
for (int mu = 0; mu < Nd; mu++)
{
// to get just SigmaTilde
tmp_mu = adj(peekLorentz(this->SmearedSet[this->smearingLevels - 1], mu)) * peekLorentz(force, mu);
pokeLorentz(force, tmp_mu, mu);
}
for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1),ismr);
}
force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
// Add U to UdSdU
for (int mu = 0; mu < Nd; mu++)
{
tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
pokeLorentz(SigmaTilde, tmp_mu, mu);
}
double end = usecond();
double time = (end - start)/ 1e3;
std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl;
} // if smearingLevels = 0 do nothing
SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
}
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,87 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/JacobianAction.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////
// Jacobian Action ..
////////////////////////////////////////////////////////////////////////
template <class Gimpl>
class JacobianAction : public Action<typename Gimpl::GaugeField> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
SmearedConfigurationMasked<Gimpl> * smearer;
/////////////////////////// constructors
explicit JacobianAction(SmearedConfigurationMasked<Gimpl> * _smearer ) { smearer=_smearer;};
virtual std::string action_name() {return "JacobianAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "[JacobianAction] " << std::endl;
return sstream.str();
}
//////////////////////////////////
// Usual cases are not used
//////////////////////////////////
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){ assert(0);};
virtual RealD S(const GaugeField &U) { assert(0); }
virtual void deriv(const GaugeField &U, GaugeField &dSdU) { assert(0); }
//////////////////////////////////
// Functions of smart configs only
//////////////////////////////////
virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
{
return;
}
virtual RealD S(ConfigurationBase<GaugeField>& U)
{
// det M = e^{ - ( - logDetM) }
assert( &U == smearer );
return -smearer->logDetJacobian();
}
virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)
{
return S(U);
}
virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
{
assert( &U == smearer );
smearer->logDetJacobianForce(dSdU);
}
private:
};
NAMESPACE_END(Grid);

View File

@ -40,7 +40,9 @@ template <class Gimpl>
class Smear_Stout : public Smear<Gimpl> {
private:
int OrthogDim = -1;
public:
const std::vector<double> SmearRho;
private:
// Smear<Gimpl>* ownership semantics:
// Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
// Smear<Gimpl>* created within constructor need to be deleted as part of the destructor

View File

@ -37,13 +37,14 @@ NAMESPACE_BEGIN(Grid);
// Make these members of an Impl class for BC's.
namespace PeriodicBC {
//Out(x) = Link(x)*field(x+mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftForward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
{
return Link*Cshift(field,mu,1);// moves towards negative mu
}
//Out(x) = Link^dag(x-mu)*field(x-mu)
template<class covariant,class gauge> Lattice<covariant> CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const Lattice<covariant> &field)
@ -52,19 +53,19 @@ namespace PeriodicBC {
tmp = adj(Link)*field;
return Cshift(tmp,mu,-1);// moves towards positive mu
}
//Out(x) = Link^dag(x-mu)
template<class gauge> Lattice<gauge>
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
{
return Cshift(adj(Link), mu, -1);
}
//Out(x) = Link(x)
template<class gauge> Lattice<gauge>
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
{
return Link;
}
//Link(x) = Link(x+mu)
template<class gauge> Lattice<gauge>
ShiftStaple(const Lattice<gauge> &Link, int mu)
{

View File

@ -34,6 +34,61 @@ directory
NAMESPACE_BEGIN(Grid);
template<int N, class Vec>
Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
typedef typename Vec::scalar_type scalar;
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<scalar, N> > > Us;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
scalar tmp= Us()()(i,j);
ComplexD ztmp(real(tmp),imag(tmp));
EigenU(i,j)=ztmp;
}}
ComplexD detD = EigenU.determinant();
typename Vec::scalar_type det(detD.real(),detD.imag());
pokeLocalSite(det,ret_v,lcoor);
});
return ret;
}
template<int N, class Vec>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
{
Umu = ProjectOnGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for(int i=0;i<N;i++){
auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
element = element * det;
PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
}
}
template<int N,class Vec>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<Vec, N> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
template <int ncolour>
class SU {
public:
@ -741,8 +796,14 @@ public:
typedef Lattice<vMatrixType> LatticeMatrixType;
LatticeMatrixType Umu(out.Grid());
LatticeMatrixType tmp(out.Grid());
for (int mu = 0; mu < Nd; mu++) {
LieRandomize(pRNG, Umu, 1.0);
// LieRandomize(pRNG, Umu, 1.0);
// PokeIndex<LorentzIndex>(out, Umu, mu);
gaussian(pRNG,Umu);
tmp = Ta(Umu);
taExp(tmp,Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(out, Umu, mu);
}
}
@ -799,12 +860,12 @@ public:
};
template<int N>
LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
GridBase *grid=Umu.Grid();
auto lvol = grid->lSites();
LatticeComplexD ret(grid);
Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
autoView(Umu_v,Umu,CpuRead);
autoView(ret_v,ret,CpuWrite);
thread_for(site,lvol,{
@ -812,42 +873,21 @@ LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N>
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;
peekLocalSite(Us, Umu_v, lcoor);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
EigenU(i,j) = Us()()(i,j);
}}
ComplexD det = EigenU.determinant();
pokeLocalSite(det,ret_v,lcoor);
Eigen::MatrixXcd EigenUinv = EigenU.inverse();
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
Ui()()(i,j) = EigenUinv(i,j);
}}
pokeLocalSite(Ui,ret_v,lcoor);
});
return ret;
}
template<int N>
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
{
Umu = ProjectOnGroup(Umu);
auto det = Determinant(Umu);
det = conjugate(det);
for(int i=0;i<N;i++){
auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
element = element * det;
PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
}
}
template<int N>
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplexD, N> >,Nd> > &U)
{
GridBase *grid=U.Grid();
// Reunitarise
for(int mu=0;mu<Nd;mu++){
auto Umu = PeekIndex<LorentzIndex>(U,mu);
Umu = ProjectOnGroup(Umu);
ProjectSUn(Umu);
PokeIndex<LorentzIndex>(U,Umu,mu);
}
}
// Explicit specialisation for SU(3).
// Explicit specialisation for SU(3).
static void

View File

@ -51,6 +51,7 @@ public:
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
typedef Lattice<iScalar<iScalar<iVector<vComplex, Dimension> > > > LatticeAdjVector;
template <class cplx>
static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {

View File

@ -290,7 +290,7 @@ public:
}
*/
//////////////////////////////////////////////////
// the sum over all staples on each site
// the sum over all nu-oriented staples for nu != mu on each site
//////////////////////////////////////////////////
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
@ -300,6 +300,10 @@ public:
for (int d = 0; d < Nd; d++) {
U[d] = PeekIndex<LorentzIndex>(Umu, d);
}
Staple(staple, U, mu);
}
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) {
staple = Zero();
for (int nu = 0; nu < Nd; nu++) {
@ -335,6 +339,202 @@ public:
}
}
/////////////
//Staples for each direction mu, summed over nu != mu
//staple: output staples for each mu (Nd)
//U: link array (Nd)
/////////////
static void StapleAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U) {
assert(staple.size() == Nd); assert(U.size() == Nd);
for(int mu=0;mu<Nd;mu++) Staple(staple[mu], U, mu);
}
//A workspace class allowing reuse of the stencil
class WilsonLoopPaddedStencilWorkspace{
std::unique_ptr<GeneralLocalStencil> stencil;
size_t nshift;
void generateStencil(GridBase* padded_grid){
double t0 = usecond();
//Generate shift arrays
std::vector<Coordinate> shifts = this->getShifts();
nshift = shifts.size();
double t1 = usecond();
//Generate local stencil
stencil.reset(new GeneralLocalStencil(padded_grid,shifts));
double t2 = usecond();
std::cout << GridLogPerformance << " WilsonLoopPaddedWorkspace timings: coord:" << (t1-t0)/1000 << "ms, stencil:" << (t2-t1)/1000 << "ms" << std::endl;
}
public:
//Get the stencil. If not already generated, or if generated using a different Grid than in PaddedCell, it will be created on-the-fly
const GeneralLocalStencil & getStencil(const PaddedCell &pcell){
assert(pcell.depth >= this->paddingDepth());
if(!stencil || stencil->Grid() != (GridBase*)pcell.grids.back() ) generateStencil((GridBase*)pcell.grids.back());
return *stencil;
}
size_t Nshift() const{ return nshift; }
virtual std::vector<Coordinate> getShifts() const = 0;
virtual int paddingDepth() const = 0; //padding depth required
virtual ~WilsonLoopPaddedStencilWorkspace(){}
};
//This workspace allows the sharing of a common PaddedCell object between multiple stencil workspaces
class WilsonLoopPaddedWorkspace{
std::vector<WilsonLoopPaddedStencilWorkspace*> stencil_wk;
std::unique_ptr<PaddedCell> pcell;
void generatePcell(GridBase* unpadded_grid){
assert(stencil_wk.size());
int max_depth = 0;
for(auto const &s : stencil_wk) max_depth=std::max(max_depth, s->paddingDepth());
pcell.reset(new PaddedCell(max_depth, dynamic_cast<GridCartesian*>(unpadded_grid)));
}
public:
//Add a stencil definition. This should be done before the first call to retrieve a stencil object.
//Takes ownership of the pointer
void addStencil(WilsonLoopPaddedStencilWorkspace *stencil){
assert(!pcell);
stencil_wk.push_back(stencil);
}
const GeneralLocalStencil & getStencil(const size_t stencil_idx, GridBase* unpadded_grid){
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
return stencil_wk[stencil_idx]->getStencil(*pcell);
}
const PaddedCell & getPaddedCell(GridBase* unpadded_grid){
if(!pcell || pcell->unpadded_grid != unpadded_grid) generatePcell(unpadded_grid);
return *pcell;
}
~WilsonLoopPaddedWorkspace(){
for(auto &s : stencil_wk) delete s;
}
};
//A workspace class allowing reuse of the stencil
class StaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
public:
std::vector<Coordinate> getShifts() const override{
std::vector<Coordinate> shifts;
for(int mu=0;mu<Nd;mu++){
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
Coordinate shift_0(Nd,0);
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
shifts.push_back(shift_0);
shifts.push_back(shift_nu);
shifts.push_back(shift_mu);
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu_pmu);
}
}
}
return shifts;
}
int paddingDepth() const override{ return 1; }
};
//Padded cell implementation of the staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
StaplePaddedAllWorkspace wk;
StaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
}
//Padded cell implementation of the staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
//gStencil: the precomputed generalized local stencil for the staple
static void StaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
double t0 = usecond();
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
assert(Cell.depth >= 1);
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
int shift_mu_off = gStencil._npoints/Nd;
//Open views to padded gauge links and keep open over mu loop
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType);
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
GaugeMat gStaple(ggrid);
int outer_off = 0;
for(int mu=0;mu<Nd;mu++){
{ //view scope
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
stencil_ss = Zero();
int off = outer_off;
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
GeneralStencilEntry const* e = gStencil_v.GetEntry(off++,ss);
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
auto U2 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U2 * U1 * U0;
e = gStencil_v.GetEntry(off++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(off++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(off++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U2 * U1 * U0;
}
}
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
} //ensure views are all closed!
staple[mu] = Cell.Extract(gStaple);
outer_off += shift_mu_off;
}//mu loop
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
free(Ug_dirs_v_host);
acceleratorFreeDevice(Ug_dirs_v);
double t1=usecond();
std::cout << GridLogPerformance << "StaplePaddedAll timing:" << (t1-t0)/1000 << "ms" << std::endl;
}
//////////////////////////////////////////////////
// the sum over all staples on each site in direction mu,nu, upper part
//////////////////////////////////////////////////
@ -707,18 +907,14 @@ public:
// the sum over all staples on each site
//////////////////////////////////////////////////
static void RectStapleDouble(GaugeMat &U2, const GaugeMat &U, int mu) {
U2 = U * Cshift(U, mu, 1);
U2 = U * Gimpl::CshiftLink(U, mu, 1);
}
////////////////////////////////////////////////////////////////////////////
// Hop by two optimisation strategy does not work nicely with Gparity. (could
// do,
// but need to track two deep where cross boundary and apply a conjugation).
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do
// so .
// Hop by two optimisation strategy. Use RectStapleDouble to obtain 'U2'
////////////////////////////////////////////////////////////////////////////
static void RectStapleOptimised(GaugeMat &Stap, std::vector<GaugeMat> &U2,
std::vector<GaugeMat> &U, int mu) {
static void RectStapleOptimised(GaugeMat &Stap, const std::vector<GaugeMat> &U2,
const std::vector<GaugeMat> &U, int mu) {
Stap = Zero();
@ -732,9 +928,9 @@ public:
// Up staple ___ ___
// | |
tmp = Cshift(adj(U[nu]), nu, -1);
tmp = Gimpl::CshiftLink(adj(U[nu]), nu, -1);
tmp = adj(U2[mu]) * tmp;
tmp = Cshift(tmp, mu, -2);
tmp = Gimpl::CshiftLink(tmp, mu, -2);
Staple2x1 = Gimpl::CovShiftForward(U[nu], nu, tmp);
@ -742,14 +938,14 @@ public:
// |___ ___|
//
tmp = adj(U2[mu]) * U[nu];
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Cshift(tmp, mu, -2));
Staple2x1 += Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CshiftLink(tmp, mu, -2));
// ___ ___
// | ___|
// |___ ___|
//
Stap += Cshift(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
Stap += Gimpl::CshiftLink(Gimpl::CovShiftForward(U[mu], mu, Staple2x1), mu, 1);
// ___ ___
// |___ |
@ -758,7 +954,7 @@ public:
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
// Stap+= Cshift(tmp,mu,1) ;
Stap += Cshift(Staple2x1, mu, 1) * Cshift(U[mu], mu, -1);
Stap += Gimpl::CshiftLink(Staple2x1, mu, 1) * Gimpl::CshiftLink(U[mu], mu, -1);
;
// --
@ -766,10 +962,10 @@ public:
//
// | |
tmp = Cshift(adj(U2[nu]), nu, -2);
tmp = Gimpl::CshiftLink(adj(U2[nu]), nu, -2);
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
tmp = U2[nu] * Cshift(tmp, nu, 2);
Stap += Cshift(tmp, mu, 1);
tmp = U2[nu] * Gimpl::CshiftLink(tmp, nu, 2);
Stap += Gimpl::CshiftLink(tmp, mu, 1);
// | |
//
@ -778,25 +974,12 @@ public:
tmp = Gimpl::CovShiftBackward(U[mu], mu, U2[nu]);
tmp = adj(U2[nu]) * tmp;
tmp = Cshift(tmp, nu, -2);
Stap += Cshift(tmp, mu, 1);
tmp = Gimpl::CshiftLink(tmp, nu, -2);
Stap += Gimpl::CshiftLink(tmp, mu, 1);
}
}
}
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
RectStapleUnoptimised(Stap, Umu, mu);
}
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
int mu) {
if (Gimpl::isPeriodicGaugeField()) {
RectStapleOptimised(Stap, U2, U, mu);
} else {
RectStapleUnoptimised(Stap, Umu, mu);
}
}
static void RectStapleUnoptimised(GaugeMat &Stap, const GaugeLorentz &Umu,
int mu) {
GridBase *grid = Umu.Grid();
@ -895,6 +1078,288 @@ public:
}
}
static void RectStaple(GaugeMat &Stap, const GaugeLorentz &Umu, int mu) {
RectStapleUnoptimised(Stap, Umu, mu);
}
static void RectStaple(const GaugeLorentz &Umu, GaugeMat &Stap,
std::vector<GaugeMat> &U2, std::vector<GaugeMat> &U,
int mu) {
RectStapleOptimised(Stap, U2, U, mu);
}
//////////////////////////////////////////////////////
//Compute the rectangular staples for all orientations
//Stap : Array of staples (Nd)
//U: Gauge links in each direction (Nd)
/////////////////////////////////////////////////////
static void RectStapleAll(std::vector<GaugeMat> &Stap, const std::vector<GaugeMat> &U){
assert(Stap.size() == Nd); assert(U.size() == Nd);
std::vector<GaugeMat> U2(Nd,U[0].Grid());
for(int mu=0;mu<Nd;mu++) RectStapleDouble(U2[mu], U[mu], mu);
for(int mu=0;mu<Nd;mu++) RectStapleOptimised(Stap[mu], U2, U, mu);
}
//A workspace class allowing reuse of the stencil
class RectStaplePaddedAllWorkspace: public WilsonLoopPaddedStencilWorkspace{
public:
std::vector<Coordinate> getShifts() const override{
std::vector<Coordinate> shifts;
for (int mu = 0; mu < Nd; mu++){
for (int nu = 0; nu < Nd; nu++) {
if (nu != mu) {
auto genShift = [&](int mushift,int nushift){
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
};
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+2,0));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
shifts.push_back(genShift(+2,-1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,+1));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,0));
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(0,+2));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(+1,-2));
shifts.push_back(genShift(+1,-1));
}
}
}
return shifts;
}
int paddingDepth() const override{ return 2; }
};
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell) {
RectStaplePaddedAllWorkspace wk;
RectStaplePaddedAll(staple,U_padded,Cell,wk.getStencil(Cell));
}
//Padded cell implementation of the rectangular staple method for all mu, summed over nu != mu
//staple: output staple for each mu, summed over nu != mu (Nd)
//U_padded: the gauge link fields padded out using the PaddedCell class
//Cell: the padded cell class
//gStencil: the stencil
static void RectStaplePaddedAll(std::vector<GaugeMat> &staple, const std::vector<GaugeMat> &U_padded, const PaddedCell &Cell, const GeneralLocalStencil &gStencil) {
double t0 = usecond();
assert(U_padded.size() == Nd); assert(staple.size() == Nd);
assert(U_padded[0].Grid() == (GridBase*)Cell.grids.back());
assert(Cell.depth >= 2);
GridBase *ggrid = U_padded[0].Grid(); //padded cell grid
size_t nshift = gStencil._npoints;
int mu_off_delta = nshift / Nd;
//Open views to padded gauge links and keep open over mu loop
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType);
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = U_padded[i].View(AcceleratorRead);
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
GaugeMat gStaple(ggrid); //temp staple object on padded grid
int offset = 0;
for(int mu=0; mu<Nd; mu++){
{ //view scope
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
stencil_ss = Zero();
int s=offset;
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
e = gStencil_v.GetEntry(s++,ss);
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
}
}
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
offset += mu_off_delta;
}//kernel/view scope
staple[mu] = Cell.Extract(gStaple);
}//mu loop
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
free(Ug_dirs_v_host);
acceleratorFreeDevice(Ug_dirs_v);
double t1 = usecond();
std::cout << GridLogPerformance << "RectStaplePaddedAll timings:" << (t1-t0)/1000 << "ms" << std::endl;
}
//A workspace for reusing the PaddedCell and GeneralLocalStencil objects
class StapleAndRectStapleAllWorkspace: public WilsonLoopPaddedWorkspace{
public:
StapleAndRectStapleAllWorkspace(){
this->addStencil(new StaplePaddedAllWorkspace);
this->addStencil(new RectStaplePaddedAllWorkspace);
}
};
//////////////////////////////////////////////////////
//Compute the 1x1 and 1x2 staples for all orientations
//Stap : Array of staples (Nd)
//RectStap: Array of rectangular staples (Nd)
//U: Gauge links in each direction (Nd)
/////////////////////////////////////////////////////
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U){
StapleAndRectStapleAllWorkspace wk;
StapleAndRectStapleAll(Stap,RectStap,U,wk);
}
//////////////////////////////////////////////////////
//Compute the 1x1 and 1x2 staples for all orientations
//Stap : Array of staples (Nd)
//RectStap: Array of rectangular staples (Nd)
//U: Gauge links in each direction (Nd)
//wk: a workspace containing stored PaddedCell and GeneralLocalStencil objects to maximize reuse
/////////////////////////////////////////////////////
static void StapleAndRectStapleAll(std::vector<GaugeMat> &Stap, std::vector<GaugeMat> &RectStap, const std::vector<GaugeMat> &U, StapleAndRectStapleAllWorkspace &wk){
#if 0
StapleAll(Stap, U);
RectStapleAll(RectStap, U);
#else
double t0 = usecond();
GridCartesian* unpadded_grid = dynamic_cast<GridCartesian*>(U[0].Grid());
const PaddedCell &Ghost = wk.getPaddedCell(unpadded_grid);
CshiftImplGauge<Gimpl> cshift_impl;
std::vector<GaugeMat> U_pad(Nd, Ghost.grids.back());
for(int mu=0;mu<Nd;mu++) U_pad[mu] = Ghost.Exchange(U[mu], cshift_impl);
double t1 = usecond();
StaplePaddedAll(Stap, U_pad, Ghost, wk.getStencil(0,unpadded_grid) );
double t2 = usecond();
RectStaplePaddedAll(RectStap, U_pad, Ghost, wk.getStencil(1,unpadded_grid));
double t3 = usecond();
std::cout << GridLogPerformance << "StapleAndRectStapleAll timings: pad:" << (t1-t0)/1000 << "ms, staple:" << (t2-t1)/1000 << "ms, rect-staple:" << (t3-t2)/1000 << "ms" << std::endl;
#endif
}
//////////////////////////////////////////////////
// Wilson loop of size (R1, R2), oriented in mu,nu plane
//////////////////////////////////////////////////

View File

@ -79,60 +79,60 @@ public:
this->_entries.resize(npoints* osites);
this->_entries_p = &_entries[0];
thread_for(site, osites, {
Coordinate Coor;
Coordinate NbrCoor;
Coordinate Coor;
Coordinate NbrCoor;
for(Integer site=0;site<osites;site++){
for(Integer ii=0;ii<npoints;ii++){
Integer lex = site*npoints+ii;
GeneralStencilEntry SE;
////////////////////////////////////////////////
// Outer index of neighbour Offset calculation
////////////////////////////////////////////////
grid->oCoorFromOindex(Coor,site);
for(int d=0;d<Coor.size();d++){
int rd = grid->_rdimensions[d];
NbrCoor[d] = (Coor[d] + shifts[ii][d] + rd )%rd;
for(Integer ii=0;ii<npoints;ii++){
Integer lex = site*npoints+ii;
GeneralStencilEntry SE;
////////////////////////////////////////////////
// Outer index of neighbour Offset calculation
////////////////////////////////////////////////
grid->oCoorFromOindex(Coor,site);
for(int d=0;d<Coor.size();d++){
int rd = grid->_rdimensions[d];
NbrCoor[d] = (Coor[d] + shifts[ii][d] + rd )%rd;
}
SE._offset = grid->oIndexReduced(NbrCoor);
////////////////////////////////////////////////
// Inner index permute calculation
// Simpler version using icoor calculation
////////////////////////////////////////////////
SE._permute =0;
for(int d=0;d<Coor.size();d++){
int fd = grid->_fdimensions[d];
int rd = grid->_rdimensions[d];
int ly = grid->_simd_layout[d];
assert((ly==1)||(ly==2));
int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1
int x = Coor[d]; // x in [0... rd-1] as an oSite
int permute_dim = grid->PermuteDim(d);
int permute_slice=0;
if(permute_dim){
int num = shift%rd; // Slice within dest osite cell of slice zero
int wrap = shift/rd; // Number of osite local volume cells crossed through
// x+num < rd dictates whether we are in same permute state as slice 0
if ( x< rd-num ) permute_slice=wrap;
else permute_slice=(wrap+1)%ly;
}
if ( permute_slice ) {
int ptype =grid->PermuteType(d);
uint8_t mask =0x1<<ptype;
SE._permute |= mask;
}
}
////////////////////////////////////////////////
// Store in look up table
////////////////////////////////////////////////
this->_entries[lex] = SE;
}
SE._offset = grid->oIndexReduced(NbrCoor);
////////////////////////////////////////////////
// Inner index permute calculation
// Simpler version using icoor calculation
////////////////////////////////////////////////
SE._permute =0;
for(int d=0;d<Coor.size();d++){
int fd = grid->_fdimensions[d];
int rd = grid->_rdimensions[d];
int ly = grid->_simd_layout[d];
assert((ly==1)||(ly==2));
int shift = (shifts[ii][d]+fd)%fd; // make it strictly positive 0.. L-1
int x = Coor[d]; // x in [0... rd-1] as an oSite
int permute_dim = grid->PermuteDim(d);
int permute_slice=0;
if(permute_dim){
int num = shift%rd; // Slice within dest osite cell of slice zero
int wrap = shift/rd; // Number of osite local volume cells crossed through
// x+num < rd dictates whether we are in same permute state as slice 0
if ( x< rd-num ) permute_slice=wrap;
else permute_slice=(wrap+1)%ly;
}
if ( permute_slice ) {
int ptype =grid->PermuteType(d);
uint8_t mask =grid->Nsimd() >> (ptype + 1);
SE._permute |= mask;
}
}
////////////////////////////////////////////////
// Store in look up table
////////////////////////////////////////////////
this->_entries[lex] = SE;
}
}
});
}
};

View File

@ -32,6 +32,7 @@
#include <Grid/stencil/SimpleCompressor.h> // subdir aggregate
#include <Grid/stencil/Lebesgue.h> // subdir aggregate
#include <Grid/stencil/GeneralLocalStencil.h>
//////////////////////////////////////////////////////////////////////////////////////////
// Must not lose sight that goal is to be able to construct really efficient
@ -339,8 +340,8 @@ public:
// Vectors that live on the symmetric heap in case of SHMEM
// These are used; either SHM objects or refs to the above symmetric heap vectors
// depending on comms target
Vector<cobj *> u_simd_send_buf;
Vector<cobj *> u_simd_recv_buf;
std::vector<cobj *> u_simd_send_buf;
std::vector<cobj *> u_simd_recv_buf;
int u_comm_offset;
int _unified_buffer_size;
@ -348,7 +349,7 @@ public:
////////////////////////////////////////
// Stencil query
////////////////////////////////////////
#ifdef SHM_FAST_PATH
#if 1
inline int SameNode(int point) {
int dimension = this->_directions[point];
@ -434,7 +435,6 @@ public:
////////////////////////////////////////////////////////////////////////
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
{
accelerator_barrier();
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromBegin(MpiReqs,
Packets[i].send_buf,
@ -452,7 +452,6 @@ public:
else if ( this->fullDirichlet ) DslashLogDirichlet();
else DslashLogFull();
acceleratorCopySynchronise();
// Everyone agrees we are all done
_grid->StencilBarrier();
}
////////////////////////////////////////////////////////////////////////
@ -541,6 +540,7 @@ public:
compress.Point(point);
HaloGatherDir(source,compress,point,face_idx);
}
accelerator_barrier();
face_table_computed=1;
assert(u_comm_offset==_unified_buffer_size);
@ -666,11 +666,9 @@ public:
for(int i=0;i<mm.size();i++){
decompressor::MergeFace(decompress,mm[i]);
}
if ( mm.size() ) acceleratorFenceComputeStream();
for(int i=0;i<dd.size();i++){
decompressor::DecompressFace(decompress,dd[i]);
}
if ( dd.size() ) acceleratorFenceComputeStream();
}
////////////////////////////////////////
// Set up routines
@ -708,6 +706,7 @@ public:
}
}
}
std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
}
/// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block)

View File

@ -73,6 +73,16 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int
return vec;
}
}
//'perm_mask' acts as a bitmask
template<class vobj> accelerator_inline
vobj coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=0)
{
auto obj = vec, tmp = vec;
for (int d=0;d<nd;d++)
if (perm_mask & (0x1 << d)) { permute(obj,tmp,d); tmp=obj;}
return obj;
}
template<class vobj> accelerator_inline
void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0)
{
@ -83,7 +93,7 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
{
vstream(vec, extracted);
}
#else
#else //==GRID_SIMT
//#ifndef GRID_SYCL
@ -166,6 +176,14 @@ typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,
return extractLane(plane,vec);
}
template<class vobj> accelerator_inline
typename vobj::scalar_object coalescedReadGeneralPermute(const vobj & __restrict__ vec,int perm_mask,int nd,int lane=acceleratorSIMTlane(vobj::Nsimd()))
{
int plane = lane;
for (int d=0;d<nd;d++)
plane = (perm_mask & (0x1 << d)) ? plane ^ (vobj::Nsimd() >> (d + 1)) : plane;
return extractLane(plane,vec);
}
template<class vobj> accelerator_inline
void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd()))
{
insertLane(lane,vec,extracted);

View File

@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
// Specialisation: Cayley-Hamilton exponential for SU(3)
#ifndef GRID_ACCELERATED
#if 0
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{

View File

@ -526,7 +526,7 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
//////////////////////////////////////////////
#ifdef GRID_SYCL
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();};
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); };
#else
// Ordering within a stream guaranteed on Nvidia & AMD
inline void acceleratorFenceComputeStream(void){ };

224
HMC/FTHMC2p1f.cc Normal file
View File

@ -0,0 +1,224 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Copyright (C) 2023
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h>
using namespace Grid;
int main(int argc, char **argv)
{
std::cout << std::setprecision(12);
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionD FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 12;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 0;
HMCparams.Trajectories = 200;
HMCparams.NoMetropolisUntil= 20;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
HMCparams.StartingType =std::string("HotStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_EODWF_lat";
CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
CPparams.rng_prefix = "ckpoint_EODWF_rng";
CPparams.saveInterval = 1;
CPparams.saveSmeared = true;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 16;
Real beta = 2.13;
Real light_mass = 0.01;
Real strange_mass = 0.04;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD b = 1.0; // Scale factor two
RealD c = 0.0;
OneFlavourRationalParams OFRp;
OFRp.lo = 1.0e-2;
OFRp.hi = 64;
OFRp.MaxIter = 10000;
OFRp.tolerance= 1.0e-10;
OFRp.degree = 14;
OFRp.precision= 40;
std::vector<Real> hasenbusch({ 0.1 });
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
LatticeGaugeField Uhot(GridPtr);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
double StoppingCondition = 1e-10;
double MaxCGIterations = 30000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
bool ApplySmearing = true;
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(2);
ActionLevel<HMCWrapper::Field> Level3(4);
////////////////////////////////////
// Strange action
////////////////////////////////////
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy>
EOFA(Strange_Op_L, Strange_Op_R,
CG,
CG, CG,
CG, CG,
OFRp, false);
EOFA.is_smeared = ApplySmearing;
Level1.push_back(&EOFA);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]);
light_num.push_back(hasenbusch[h]);
}
light_num.push_back(pv_mass);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage << " 2f quotient Action "<< light_num[h] << " / " << light_den[h]<< std::endl;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
}
for(int h=0;h<n_hasenbusch+1;h++){
Quotients[h]->is_smeared = ApplySmearing;
Level1.push_back(Quotients[h]);
}
/////////////////////////////////////////////////////////////
// lnDetJacobianAction
/////////////////////////////////////////////////////////////
double rho = 0.1; // smearing parameter
int Nsmear = 1; // number of smearing levels - must be multiple of 2Nd
int Nstep = 8*Nsmear; // number of smearing levels - must be multiple of 2Nd
Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
if( ApplySmearing ) Level2.push_back(&Jacobian);
std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
// GaugeAction.is_smeared = ApplySmearing;
GaugeAction.is_smeared = true;
Level3.push_back(&GaugeAction);
std::cout << GridLogMessage << " ************************************************"<< std::endl;
std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
std::cout << GridLogMessage << " ************************************************"<< std::endl;
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
TheHMC.Run(SmearingPolicy); // for smearing
Grid_finalize();
} // main

View File

@ -227,7 +227,7 @@ int main(int argc, char **argv) {
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
int SP_iters=10000;
int SP_iters=9000;
RationalActionParams OFRp; // Up/down
OFRp.lo = 6.0e-5;
@ -362,12 +362,12 @@ int main(int argc, char **argv) {
// Probably dominates the force - back to EOFA.
OneFlavourRationalParams SFRp;
SFRp.lo = 0.25;
SFRp.lo = 0.1;
SFRp.hi = 25.0;
SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-5;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 2.0e-4;
SFRp.degree = 8;
SFRp.degree = 12;
SFRp.precision= 50;
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);

View File

@ -146,6 +146,8 @@ NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
std::cout << " Grid Initialise "<<std::endl;
Grid_init(&argc, &argv);
CartesianCommunicator::BarrierWorld();
@ -170,24 +172,24 @@ int main(int argc, char **argv) {
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
typedef GenericHMCRunner<ForceGradient> HMCWrapper;
MD.name = std::string("Force Gradient");
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
// MD.name = std::string("MinimumNorm2");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
// TrajL = 2
// 4/2 => 0.6 dH
// 3/3 => 0.8 dH .. depth 3, slower
//MD.MDsteps = 4;
MD.MDsteps = 12;
MD.MDsteps = 14;
MD.trajL = 0.5;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.Trajectories = 20;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.StartingType =std::string("ColdStart");
// HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
@ -223,7 +225,7 @@ int main(int argc, char **argv) {
Real pv_mass = 1.0;
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
auto GridPtr = TheHMC.Resources.GetCartesian();
@ -244,11 +246,6 @@ int main(int argc, char **argv) {
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
//////////////////////////
// Fermion Grids
@ -277,15 +274,13 @@ int main(int argc, char **argv) {
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
FermionActionF::ImplParams ParamsF(boundary);
Params.dirichlet=NonDirichlet;
ParamsF.dirichlet=NonDirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-8;
double MDStoppingCondition = 1e-7;
double MDStoppingConditionLoose = 1e-7;
double MDStoppingConditionStrange = 1e-7;
double StoppingCondition = 1e-9;
double MDStoppingCondition = 1e-8;
double MDStoppingConditionLoose = 1e-8;
double MDStoppingConditionStrange = 1e-8;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
@ -305,12 +300,12 @@ int main(int argc, char **argv) {
// Probably dominates the force - back to EOFA.
OneFlavourRationalParams SFRp;
SFRp.lo = 0.25;
SFRp.hi = 25.0;
SFRp.lo = 0.1;
SFRp.hi = 30.0;
SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-5;
SFRp.mdtolerance= 2.0e-4;
SFRp.degree = 8;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 2.0e-6;
SFRp.degree = 10;
SFRp.precision= 50;
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
@ -370,19 +365,17 @@ int main(int argc, char **argv) {
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
light_den.push_back(light_mass);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(0);
light_den.push_back(hasenbusch[h]);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(0);
light_num.push_back(hasenbusch[h]);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
light_num.push_back(pv_mass);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
@ -408,9 +401,7 @@ int main(int argc, char **argv) {
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
@ -418,21 +409,11 @@ int main(int argc, char **argv) {
FermionActionF::ImplParams ParamsDenF(boundary);
FermionActionF::ImplParams ParamsNumF(boundary);
ParamsNum.dirichlet = NonDirichlet;
ParamsDen.dirichlet = NonDirichlet;
ParamsNum.partialDirichlet = 0;
ParamsDen.partialDirichlet = 0;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
ParamsDenF.dirichlet = ParamsDen.dirichlet;
ParamsDenF.partialDirichlet = ParamsDen.partialDirichlet;
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
ParamsNumF.dirichlet = ParamsNum.dirichlet;
ParamsNumF.partialDirichlet = ParamsNum.partialDirichlet;
NumeratorsF.push_back (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
@ -469,7 +450,6 @@ int main(int argc, char **argv) {
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
// TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;

View File

@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
err = r_eo-result;
n2e= norm2(err);
std::cout<<GridLogMessage << "norm diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
std::cout<<GridLogMessage << "norm diff "<< n2e<<std::endl;
assert(n2e<1.0e-4);
pickCheckerboard(Even,src_e,err);

View File

@ -0,0 +1,90 @@
Branch: develop
Files:
Grid/lattice/PaddedCell.h -- Halo exchange
tests/Test_general_stencil.cc -- test local off axis stencil addressing
tests/debug/Test_padded_cell.cc -- test PaddedCell halo exchange and the General local stencil by computing ALL plaquettes on lattice
Functionality:
-- extend a lattice field:
Grid/lattice/PaddedCell.h
// Constructor
PaddedCell(int _depth,GridCartesian *_grid)
// Expand a field "in" to depth "d"
template<class vobj>
inline Lattice<vobj> Exchange(Lattice<vobj> &in)
// Take the "apple core" of in to a smaller local volume
template<class vobj>
inline Lattice<vobj> Extract(Lattice<vobj> &in)
-- Plaquette test:
tests/debug/Test_padded_cell.cc
/////////////////////////////////////////////////
// Create a padded cell of extra padding depth=1
/////////////////////////////////////////////////
int depth = 1;
PaddedCell Ghost(depth,&GRID);
LatticeGaugeField Ughost = Ghost.Exchange(Umu);
///// Array for the site plaquette
GridBase *GhostGrid = Ughost.Grid();
LatticeComplex gplaq(GhostGrid);
std::vector<Coordinate> shifts;
for(int mu=0;mu<Nd;mu++){
for(int nu=mu+1;nu<Nd;nu++){
// Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x)
Coordinate shift_0(Nd,0);
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
shifts.push_back(shift_0);
shifts.push_back(shift_mu);
shifts.push_back(shift_nu);
shifts.push_back(shift_0);
}
}
GeneralLocalStencil gStencil(GhostGrid,shifts);
gplaq=Zero();
{
autoView( gp_v , gplaq, CpuWrite);
autoView( t_v , trplaq, CpuRead);
autoView( U_v , Ughost, CpuRead);
for(int ss=0;ss<gp_v.size();ss++){
int s=0;
for(int mu=0;mu<Nd;mu++){
for(int nu=mu+1;nu<Nd;nu++){
auto SE0 = gStencil.GetEntry(s+0,ss);
auto SE1 = gStencil.GetEntry(s+1,ss);
auto SE2 = gStencil.GetEntry(s+2,ss);
auto SE3 = gStencil.GetEntry(s+3,ss);
int o0 = SE0->_offset;
int o1 = SE1->_offset;
int o2 = SE2->_offset;
int o3 = SE3->_offset;
auto U0 = U_v[o0](mu);
auto U1 = U_v[o1](nu);
auto U2 = adj(U_v[o2](mu));
auto U3 = adj(U_v[o3](nu));
gpermute(U0,SE0->_permute);
gpermute(U1,SE1->_permute);
gpermute(U2,SE2->_permute);
gpermute(U3,SE3->_permute);
gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 );
s=s+4;
}
}
}
}
cplaq = Ghost.Extract(gplaq);

133
examples/socket_grid.cc Normal file
View File

@ -0,0 +1,133 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include <stdio.h>
#include <err.h>
#include <fcntl.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
static int sock;
static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
static char sock_path[256];
class UnixSockets {
public:
static void Open(int rank)
{
int errnum;
sock = socket(AF_UNIX, SOCK_DGRAM, 0); assert(sock>0);
printf("allocated socket %d\n",sock);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
unlink(sa_un.sun_path);
if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
perror("bind failure");
exit(EXIT_FAILURE);
}
printf("bound socket %d to %s\n",sock,sa_un.sun_path);
}
static int RecvFileDescriptor(void)
{
int n;
int fd;
char buf[1];
struct iovec iov;
struct msghdr msg;
struct cmsghdr *cmsg;
char cms[CMSG_SPACE(sizeof(int))];
iov.iov_base = buf;
iov.iov_len = 1;
memset(&msg, 0, sizeof msg);
msg.msg_name = 0;
msg.msg_namelen = 0;
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_control = (caddr_t)cms;
msg.msg_controllen = sizeof cms;
if((n=recvmsg(sock, &msg, 0)) < 0) {
perror("recvmsg failed");
return -1;
}
if(n == 0){
perror("recvmsg returned 0");
return -1;
}
cmsg = CMSG_FIRSTHDR(&msg);
memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
printf("received fd %d from socket %d\n",fd,sock);
return fd;
}
static void SendFileDescriptor(int fildes,int xmit_to_rank)
{
struct msghdr msg;
struct iovec iov;
struct cmsghdr *cmsg = NULL;
char ctrl[CMSG_SPACE(sizeof(int))];
char data = ' ';
memset(&msg, 0, sizeof(struct msghdr));
memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
iov.iov_base = &data;
iov.iov_len = sizeof(data);
sprintf(sock_path,sock_path_fmt,xmit_to_rank);
printf("sending FD %d over socket %d to rank %d AF_UNIX path %s\n",fildes,sock,xmit_to_rank,sock_path);fflush(stdout);
struct sockaddr_un sa_un = { 0 };
sa_un.sun_family = AF_UNIX;
snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
msg.msg_name = (void *)&sa_un;
msg.msg_namelen = sizeof(sa_un);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
msg.msg_controllen = CMSG_SPACE(sizeof(int));
msg.msg_control = ctrl;
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
*((int *) CMSG_DATA(cmsg)) = fildes;
if ( sendmsg(sock, &msg, 0) == -1 ) perror("sendmsg failed");
};
};
int main(int argc, char **argv)
{
int me = fork()?0:1;
UnixSockets::Open(me);
// need MPI barrier
sleep(10);
const char * message = "Hello, World\n";
if( me ) {
int fd = open("foo",O_RDWR|O_CREAT,0666);
if ( fd < 0 ) {
perror("failed to open file");
exit(EXIT_FAILURE);
}
// rank 1 sends ot rank 0
UnixSockets::SendFileDescriptor(fd,0);
close(fd);
} else {
// rank 0 sends receives frmo rank 1
int fd = UnixSockets::RecvFileDescriptor();
write(fd,(const void *)message,strlen(message));
close(fd);
}
}

View File

@ -60,7 +60,7 @@ while test $# -gt 0; do
;;
--cxxflags)
echo @GRID_CXXFLAGS@
echo @GRID_CXXFLAGS@ -I@prefix@/include
;;
--cxx)
@ -72,11 +72,11 @@ while test $# -gt 0; do
;;
--ldflags)
echo @GRID_LDFLAGS@
echo @GRID_LDFLAGS@ -L@prefix@/lib
;;
--libs)
echo @GRID_LIBS@
echo @GRID_LIBS@ -lGrid
;;
--summary)

View File

@ -0,0 +1,44 @@
#!/bin/bash -l
#SBATCH --job-name=bench_lehner
#SBATCH --partition=small-g
#SBATCH --nodes=2
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=7
#SBATCH --gpus-per-node=8
#SBATCH --time=00:10:00
#SBATCH --account=project_465000546
#SBATCH --gpu-bind=none
#SBATCH --exclusive
#SBATCH --mem=0
CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
echo $CPU_BIND
cat << EOF > select_gpu
#!/bin/bash
export GPU_MAP=(0 1 2 3 4 5 6 7)
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
export HIP_VISIBLE_DEVICES=\$GPU
unset ROCR_VISIBLE_DEVICES
echo RANK \$SLURM_LOCALID using GPU \$GPU
exec \$*
EOF
chmod +x ./select_gpu
root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
source ${root}/sourceme.sh
export OMP_NUM_THREADS=7
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
for vol in 16.16.16.64 32.32.32.64 32.32.32.128
do
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
done

View File

@ -0,0 +1,30 @@
spack load c-lime
spack load gmp
spack load mpfr
CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
echo clime X$CLIME
echo gmp X$GMP
echo mpfr X$MPFR
../../configure \
--enable-comms=mpi-auto \
--with-lime=$CLIME \
--enable-unified=no \
--enable-shm=nvlink \
--enable-accelerator=hip \
--enable-gen-simd-width=64 \
--enable-simd=GPU \
--enable-accelerator-cshift \
--with-gmp=$GMP \
--with-mpfr=$MPFR \
--with-fftw=$FFTW_DIR/.. \
--disable-fermion-reps \
--disable-gparity \
CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp"

5
systems/Lumi/sourceme.sh Normal file
View File

@ -0,0 +1,5 @@
source ~/spack/share/spack/setup-env.sh
module load CrayEnv LUMI/22.12 partition/G cray-fftw/3.3.10.1 rocm
spack load c-lime
spack load gmp
spack load mpfr

View File

@ -3,8 +3,14 @@ export https_proxy=http://proxy-chain.intel.com:911
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
module load intel-release
source /opt/intel/oneapi/PVC_setup.sh
module load intel-comp-rt/embargo-ci-neo
#source /opt/intel/oneapi/PVC_setup.sh
#source /opt/intel/oneapi/ATS_setup.sh
#module load intel-nightly/20230331
#module load intel-comp-rt/ci-neo-master/026093
#module load intel/mpich
module load intel/mpich/pvc45.3
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH

View File

@ -0,0 +1,46 @@
#!/bin/bash
#PBS -l select=1:system=sunspot,place=scatter
#PBS -A LatticeQCD_aesp_CNDA
#PBS -l walltime=01:00:00
#PBS -N dwf
#PBS -k doe
HDIR=/home/paboyle/
module use /soft/testing/modulefiles/
module load intel-UMD23.05.25593.11/23.05.25593.11
module load tools/pti-gpu
export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
export PATH=$HDIR/tools/bin:$PATH
export TZ='/usr/share/zoneinfo/US/Central'
export OMP_PROC_BIND=spread
export OMP_NUM_THREADS=3
unset OMP_PLACES
cd $PBS_O_WORKDIR
qsub jobscript.pbs
echo Jobid: $PBS_JOBID
echo Running on host `hostname`
echo Running on nodes `cat $PBS_NODEFILE`
echo NODES
cat $PBS_NODEFILE
NNODES=`wc -l < $PBS_NODEFILE`
NRANKS=12 # Number of MPI ranks per node
NDEPTH=4 # Number of hardware threads per rank, spacing between MPI ranks on a node
NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
NTOTRANKS=$(( NNODES * NRANKS ))
echo "NUM_NODES=${NNODES} TOTAL_RANKS=${NTOTRANKS} RANKS_PER_NODE=${NRANKS} THREADS_PER_RANK=${OMP_NUM_THREADS}"
echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
./gpu_tile_compact.sh \
./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32"

View File

@ -0,0 +1,52 @@
#!/bin/bash
display_help() {
echo " Will map gpu tile to rank in compact and then round-robin fashion"
echo " Usage (only work for one node of ATS/PVC):"
echo " mpiexec --np N gpu_tile_compact.sh ./a.out"
echo
echo " Example 3 GPU of 2 Tiles with 7 Ranks:"
echo " 0 Rank 0.0"
echo " 1 Rank 0.1"
echo " 2 Rank 1.0"
echo " 3 Rank 1.1"
echo " 4 Rank 2.0"
echo " 5 Rank 2.1"
echo " 6 Rank 0.0"
echo
echo " Hacked together by apl@anl.gov, please contact if bug found"
exit 1
}
#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence.
#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices")
num_gpu=6
num_tile=2
if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then
display_help
fi
gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu ))
tile_id=$((PALS_LOCAL_RANKID % num_tile))
unset EnableWalkerPartition
export EnableImplicitScaling=0
export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
export ZE_AFFINITY_MASK=$gpu_id.$tile_id
export ONEAPI_DEVICE_FILTER=gpu,level_zero
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
if [ $PALS_LOCAL_RANKID = 0 ]
then
onetrace --chrome-device-timeline "$@"
# "$@"
else
"$@"
fi

View File

@ -0,0 +1,16 @@
TOOLS=$HOME/tools
../../configure \
--enable-simd=GPU \
--enable-gen-simd-width=64 \
--enable-comms=mpi-auto \
--enable-accelerator-cshift \
--disable-gparity \
--disable-fermion-reps \
--enable-shm=nvlink \
--enable-accelerator=sycl \
--enable-unified=no \
MPICXX=mpicxx \
CXX=icpx \
LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \
CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"

View File

@ -1,2 +1,4 @@
CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi --enable-unified=yes
BREW=/opt/local/
MPICXX=mpicxx CXX=c++-12 ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug

View File

@ -115,6 +115,7 @@ int main(int argc, char ** argv)
if (SE->_permute & 0x2 ) { permute(check[i],tmp,1); tmp=check[i];}
if (SE->_permute & 0x4 ) { permute(check[i],tmp,2); tmp=check[i];}
if (SE->_permute & 0x8 ) { permute(check[i],tmp,3); tmp=check[i];}
// std::cout<<GridLogMessage<<"stencil["<<i<<"] "<< check[i]<< " perm "<<(uint32_t)SE->_permute <<std::endl;
}
Real nrmC = norm2(Check);
@ -138,18 +139,17 @@ int main(int argc, char ** argv)
ddiff = check -bar;
diff =norm2(ddiff);
if ( diff > 0){
std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
<<") " <<check<<" vs "<<bar<<std::endl;
std::cout <<"Diff at Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
<<") stencil " <<check<<" vs cshift "<<bar<<std::endl;
}
}}}}
if (nrm > 1.0e-4) {
autoView( check , Check, CpuRead);
autoView( bar , Bar, CpuRead);
for(int i=0;i<check.size();i++){
std::cout << i<<" Check "<<check[i]<< "\n"<<i<<" Bar "<<bar[i]<<std::endl;
std::cout << i<<" ERROR Check \n"<<check[i]<< "\n"<<i<<" Bar \n"<<bar[i]<<std::endl;
}
}
if (nrm > 1.0e-4) exit(-1);

307
tests/core/Test_fft_pf.cc Normal file
View File

@ -0,0 +1,307 @@
/*************************************************************************************
grid` physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
Coordinate mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGRID(&GRID);
ComplexD ci(0.0,1.0);
std::vector<int> seeds({1,2,3,4});
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds); // naughty seeding
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(seeds);
LatticeGaugeFieldD Umu(&GRID);
SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
////////////////////////////////////////////////////
// PF prop
////////////////////////////////////////////////////
LatticeFermionD src(&GRID);
gaussian(pRNG,src);
#if 1
Coordinate point(4,0);
src=Zero();
SpinColourVectorD ferm; gaussian(sRNG,ferm);
pokeSite(ferm,src,point);
#endif
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
std::cout<<"****************************************"<<std::endl;
// LatticeFermionD src(&GRID); gaussian(pRNG,src);
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD diff(&GRID);
const int Ls=48+1;
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
RealD mass=0.1;
RealD M5 =0.8;
OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
bool fiveD = false; //calculate 4d free propagator
std::cout << " Free propagator " <<std::endl;
Dov.FreePropagator(src,ref,mass) ;
std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
Gamma G5(Gamma::Algebra::Gamma5);
LatticeFermionD src5(FGrid); src5=Zero();
LatticeFermionD tmp5(FGrid);
LatticeFermionD result5(FGrid); result5=Zero();
LatticeFermionD result4(&GRID);
const int sdir=0;
////////////////////////////////////////////////////////////////////////
// Import
////////////////////////////////////////////////////////////////////////
std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
Dov.ImportPhysicalFermionSource (src,src5);
std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
////////////////////////////////////////////////////////////////////////
// Conjugate gradient on normal equations system
////////////////////////////////////////////////////////////////////////
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
Dov.Mdag(src5,tmp5);
src5=tmp5;
MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOp,src5,result5);
////////////////////////////////////////////////////////////////////////
// Domain wall physical field propagator
////////////////////////////////////////////////////////////////////////
Dov.ExportPhysicalFermionSolution(result5,result4);
// From DWF4d.pdf :
//
// Dov_pf = 2/(1-m) D_cayley_ovlap [ Page 43 ]
// Dinv_cayley_ovlap = 2/(1-m) Dinv_pf
// Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) => 2/(1-m)^2 Dinv_pf - 1/(1-m) * src [ Eq.2.67 ]
RealD scale = 2.0/(1.0-mass)/(1.0-mass);
result4 = result4 * scale;
result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
DumpSliceNorm("Src",src);
DumpSliceNorm("Grid",result4);
DumpSliceNorm("Fourier",ref);
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
diff = result4- ref;
DumpSliceNorm("diff ",diff);
}
////////////////////////////////////////////////////
// Dwf prop
////////////////////////////////////////////////////
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
std::cout<<"****************************************"<<std::endl;
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD diff(&GRID);
const int Ls=48;
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
RealD mass=0.1;
RealD M5 =0.8;
OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
Dov.FreePropagator(src,ref,mass) ;
Gamma G5(Gamma::Algebra::Gamma5);
LatticeFermionD src5(FGrid); src5=Zero();
LatticeFermionD tmp5(FGrid);
LatticeFermionD result5(FGrid); result5=Zero();
LatticeFermionD result4(&GRID);
const int sdir=0;
////////////////////////////////////////////////////////////////////////
// Domain wall physical field source; need D_minus
////////////////////////////////////////////////////////////////////////
/*
chi_5[0] = chiralProjectPlus(chi);
chi_5[Ls-1]= chiralProjectMinus(chi);
*/
tmp = (src + G5*src)*0.5; InsertSlice(tmp,src5, 0,sdir);
tmp = (src - G5*src)*0.5; InsertSlice(tmp,src5,Ls-1,sdir);
////////////////////////////////////////////////////////////////////////
// Conjugate gradient on normal equations system
////////////////////////////////////////////////////////////////////////
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
Dov.Dminus(src5,tmp5);
src5=tmp5;
Dov.Mdag(src5,tmp5);
src5=tmp5;
MdagMLinearOperator<OverlapWilsonCayleyTanhFermionD,LatticeFermionD> HermOp(Dov);
ConjugateGradient<LatticeFermionD> CG(1.0e-16,10000);
CG(HermOp,src5,result5);
////////////////////////////////////////////////////////////////////////
// Domain wall physical field propagator
////////////////////////////////////////////////////////////////////////
/*
psi = chiralProjectMinus(psi_5[0]);
psi += chiralProjectPlus(psi_5[Ls-1]);
*/
ExtractSlice(tmp,result5,0 ,sdir); result4 = (tmp-G5*tmp)*0.5;
ExtractSlice(tmp,result5,Ls-1,sdir); result4 = result4+(tmp+G5*tmp)*0.5;
std::cout << " Taking difference" <<std::endl;
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
DumpSliceNorm("Grid",result4);
DumpSliceNorm("Fourier",ref);
diff = ref - result4;
std::cout << "result - ref "<<norm2(diff)<<std::endl;
DumpSliceNorm("diff",diff);
}
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
std::cout<<"****************************************"<<std::endl;
// LatticeFermionD src(&GRID); gaussian(pRNG,src);
LatticeFermionD tmp(&GRID);
LatticeFermionD ref(&GRID);
LatticeFermionD diff(&GRID);
const int Ls=48+1;
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
RealD mass=0.1;
RealD M5 =0.8;
OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
// Momentum space prop
std::cout << " Solving by FFT and Feynman rules" <<std::endl;
bool fiveD = false; //calculate 4d free propagator
std::cout << " Free propagator " <<std::endl;
Dov.FreePropagator(src,ref,mass) ;
std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
Gamma G5(Gamma::Algebra::Gamma5);
LatticeFermionD src5(FGrid); src5=Zero();
LatticeFermionD tmp5(FGrid);
LatticeFermionD result5(FGrid); result5=Zero();
LatticeFermionD result4(&GRID);
const int sdir=0;
////////////////////////////////////////////////////////////////////////
// Import
////////////////////////////////////////////////////////////////////////
std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
Dov.ImportPhysicalFermionSource (src,src5);
std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
////////////////////////////////////////////////////////////////////////
// Conjugate gradient on normal equations system
////////////////////////////////////////////////////////////////////////
std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
Dov.Mdag(src5,tmp5);
src5=tmp5;
MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOp,src5,result5);
////////////////////////////////////////////////////////////////////////
// Domain wall physical field propagator
////////////////////////////////////////////////////////////////////////
Dov.ExportPhysicalFermionSolution(result5,result4);
// From DWF4d.pdf :
//
// Dov_pf = 2/(1-m) D_cayley_ovlap [ Page 43 ]
// Dinv_cayley_ovlap = 2/(1-m) Dinv_pf
// Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) => 2/(1-m)^2 Dinv_pf - 1/(1-m) * src [ Eq.2.67 ]
RealD scale = 2.0/(1.0-mass)/(1.0-mass);
result4 = result4 * scale;
result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
DumpSliceNorm("Src",src);
DumpSliceNorm("Grid",result4);
DumpSliceNorm("Fourier",ref);
std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
std::cout << "Dov ref "<<norm2(ref)<<std::endl;
diff = result4- ref;
DumpSliceNorm("diff ",diff);
}
Grid_finalize();
}

View File

@ -63,7 +63,9 @@ int main(int argc, char** argv) {
std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
// guard as this code fails to compile for Nc != 3
#if (Nc == 3)
#if 1
std::cout << " Printing Adjoint Generators"<< std::endl;
SU2Adjoint::printGenerators();
SU2::testGenerators();
@ -148,10 +150,33 @@ int main(int argc, char** argv) {
typename AdjointRep<Nc>::LatticeMatrix Vrmu = peekLorentz(Vr,mu);
pokeLorentz(UrVr,Urmu*Vrmu, mu);
}
typedef typename SU_Adjoint<Nc>::AMatrix AdjointMatrix;
typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl;
std::cout << GridLogMessage << "****************************************** " << std::endl;
std::cout << GridLogMessage << " MAP BETWEEN FUNDAMENTAL AND ADJOINT CHECK " << std::endl;
std::cout << GridLogMessage << "****************************************** " << std::endl;
for(int a=0;a<Nc*Nc-1;a++){
for(int b=0;b<Nc*Nc-1;b++){
for(int c=0;c<Nc*Nc-1;c++){
ColourMatrix Ta;
ColourMatrix Tb;
ColourMatrix Tc;
SU3::generator(a, Ta);
SU3::generator(b, Tb);
SU3::generator(c, Tc);
AdjointMatrix TRa;
SU3Adjoint::generator(a,TRa);
Complex tr1 = trace ( Tc * ( Ta*Tb-Tb*Ta)); // i/2 fabc
Complex tr2 = TRa()()(b,c) * Complex(0,1);
std::cout << " 2 Tr( Tc[Ta,Tb]) " << 2.0*tr1<<std::endl;
std::cout << " - TRa_bc " << tr2<<std::endl;
assert(abs( (2.0*tr1-tr2) ) < 1.0e-7);
std::cout << "------------------"<<std::endl;
}}}
// Check correspondence of algebra and group transformations
// Create a random vector
SU3::LatticeAlgebraVector h_adj(grid);

View File

@ -0,0 +1,188 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_iwasaki_action_newstaple.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
////////////////////////////////////////////////////////////////////////
// PlaqPlusRectangleActoin
////////////////////////////////////////////////////////////////////////
template<class Gimpl>
class PlaqPlusRectangleActionOrig : public Action<typename Gimpl::GaugeField> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
private:
RealD c_plaq;
RealD c_rect;
public:
PlaqPlusRectangleActionOrig(RealD b,RealD c): c_plaq(b),c_rect(c){};
virtual std::string action_name(){return "PlaqPlusRectangleActionOrig";}
virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name() <<"] c_plaq: " << c_plaq << std::endl;
sstream << GridLogMessage << "["<<action_name() <<"] c_rect: " << c_rect << std::endl;
return sstream.str();
}
virtual RealD S(const GaugeField &U) {
RealD vol = U.Grid()->gSites();
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
RealD rect = WilsonLoops<Gimpl>::avgRectangle(U);
RealD action=c_plaq*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5
+c_rect*(1.0 -rect)*(Nd*(Nd-1.0))*vol;
return action;
};
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) {
//extend Ta to include Lorentz indexes
RealD factor_p = c_plaq/RealD(Nc)*0.5;
RealD factor_r = c_rect/RealD(Nc)*0.5;
GridBase *grid = Umu.Grid();
std::vector<GaugeLinkField> U (Nd,grid);
std::vector<GaugeLinkField> U2(Nd,grid);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu],U[mu],mu);
}
GaugeLinkField dSdU_mu(grid);
GaugeLinkField staple(grid);
for (int mu=0; mu < Nd; mu++){
// Staple in direction mu
WilsonLoops<Gimpl>::Staple(staple,Umu,mu);
dSdU_mu = Ta(U[mu]*staple)*factor_p;
WilsonLoops<Gimpl>::RectStaple(Umu,staple,U2,U,mu);
dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}
};
};
// Convenience for common physically defined cases.
//
// RBC c1 parameterisation is not really RBC but don't have good
// reference and we are happy to change name if prior use of this plaq coeff
// parameterisation is made known to us.
template<class Gimpl>
class RBCGaugeActionOrig : public PlaqPlusRectangleActionOrig<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
RBCGaugeActionOrig(RealD beta,RealD c1) : PlaqPlusRectangleActionOrig<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {};
virtual std::string action_name(){return "RBCGaugeActionOrig";}
};
template<class Gimpl>
class IwasakiGaugeActionOrig : public RBCGaugeActionOrig<Gimpl> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
IwasakiGaugeActionOrig(RealD beta) : RBCGaugeActionOrig<Gimpl>(beta,-0.331) {};
virtual std::string action_name(){return "IwasakiGaugeActionOrig";}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
std::cout << " mpi "<<mpi_layout<<std::endl;
std::cout << " simd "<<simd_layout<<std::endl;
std::cout << " latt "<<latt_size<<std::endl;
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&GRID);
SU<Nc>::HotConfiguration(pRNG,U);
//#define PRD
#ifdef PRD
typedef PeriodicGimplD Gimpl;
#else
typedef ConjugateGimplD Gimpl;
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
Gimpl::setDirections(conj_dirs);
#endif
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
GaugeLorentz derivOrig(&GRID), derivNew(&GRID);
double beta = 2.13;
IwasakiGaugeActionOrig<Gimpl> action_orig(beta);
IwasakiGaugeAction<Gimpl> action_new(beta);
double torig=0, tnew=0;
int ntest = 10;
for(int i=0;i<ntest;i++){
double t0 = usecond();
action_orig.deriv(U, derivOrig);
double t1 = usecond();
action_new.deriv(U, derivNew);
double t2 = usecond();
GaugeLorentz diff = derivOrig - derivNew;
double n = norm2(diff);
std::cout << GridLogMessage << "Difference " << n << " (expect 0)" << std::endl;
assert(n<1e-10);
std::cout << GridLogMessage << "Timings orig: " << (t1-t0)/1000 << "ms, new: " << (t2-t1)/1000 << "ms" << std::endl;
torig += (t1-t0)/1000; tnew += (t2-t1)/1000;
}
std::cout << GridLogMessage << "Avg timings " << ntest << " iterations: orig:" << torig/ntest << "ms, new:" << tnew/ntest << "ms" << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,94 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_optimized_staple_gaugebc.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
using namespace std;
using namespace Grid;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
std::cout << " mpi "<<mpi_layout<<std::endl;
std::cout << " simd "<<simd_layout<<std::endl;
std::cout << " latt "<<latt_size<<std::endl;
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&GRID);
SU<Nc>::HotConfiguration(pRNG,U);
//#define PRD
#ifdef PRD
typedef PeriodicGimplD Gimpl;
#else
typedef ConjugateGimplD Gimpl;
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
Gimpl::setDirections(conj_dirs);
#endif
typedef typename WilsonLoops<Gimpl>::GaugeMat GaugeMat;
typedef typename WilsonLoops<Gimpl>::GaugeLorentz GaugeLorentz;
int count = 0;
double torig=0, topt=0;
std::vector<GaugeMat> Umu(Nd,&GRID), U2(Nd,&GRID);
for(int mu=0;mu<Nd;mu++){
Umu[mu] = PeekIndex<LorentzIndex>(U,mu);
WilsonLoops<Gimpl>::RectStapleDouble(U2[mu], Umu[mu], mu);
}
std::cout << GridLogMessage << "Checking optimized vs unoptimized RectStaple" << std::endl;
for(int mu=0;mu<Nd;mu++){
GaugeMat staple_orig(&GRID), staple_opt(&GRID), staple_U2(&GRID);
double t0 = usecond();
WilsonLoops<Gimpl>::RectStapleUnoptimised(staple_orig,U,mu);
double t1 = usecond();
WilsonLoops<Gimpl>::RectStapleOptimised(staple_opt, U2, Umu, mu);
double t2 = usecond();
torig += t1-t0; topt += t2-t1;
++count;
GaugeMat diff = staple_orig - staple_opt;
double n = norm2(diff);
std::cout << GridLogMessage << mu << " " << n << std::endl;
assert(n<1e-10);
}
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, optimized: " << topt/1000/count << "ms" << std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,184 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_padded_cell.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
using namespace std;
using namespace Grid;
template<class vobj> void gpermute(vobj & inout,int perm){
vobj tmp=inout;
if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;}
if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;}
if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
std::cout << " mpi "<<mpi_layout<<std::endl;
std::cout << " simd "<<simd_layout<<std::endl;
std::cout << " latt "<<latt_size<<std::endl;
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField Umu(&GRID);
SU<Nc>::HotConfiguration(pRNG,Umu);
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
LatticeComplex trplaq(&GRID);
std::vector<LatticeColourMatrix> U(Nd, Umu.Grid());
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
std::cout << GridLogMessage << " Average plaquette "<<plaq<<std::endl;
LatticeComplex cplaq(&GRID); cplaq=Zero();
/////////////////////////////////////////////////
// Create a padded cell of extra padding depth=1
/////////////////////////////////////////////////
int depth = 1;
PaddedCell Ghost(depth,&GRID);
LatticeGaugeField Ughost = Ghost.Exchange(Umu);
///////////////////////////////////////////////////////////////////
// Temporary debug Hack for single rank sim:
// Check the contents of the cell are periodcally replicated
// In future ONLY pad those dimensions that are not local to node
///////////////////////////////////////////////////////////////////
#if 0
{
double diff=0;
double n=0;
{
autoView( Ug_v , Ughost, CpuRead);
autoView( Ul_v , Umu , CpuRead);
for(int x=0;x<latt_size[0]+2;x++){
for(int y=0;y<latt_size[1]+2;y++){
for(int z=0;z<latt_size[2]+2;z++){
for(int t=0;t<latt_size[3]+2;t++){
int lx=(x-1+latt_size[0])%latt_size[0];
int ly=(y-1+latt_size[1])%latt_size[1];
int lz=(z-1+latt_size[2])%latt_size[2];
int lt=(t-1+latt_size[3])%latt_size[3];
Coordinate gcoor({x,y,z,t});
Coordinate lcoor({lx,ly,lz,lt});
LorentzColourMatrix g;
LorentzColourMatrix l;
peekLocalSite(g,Ug_v,gcoor);
peekLocalSite(l,Ul_v,lcoor);
g=g-l;
assert(norm2(g)==0);
diff = diff + norm2(g);
n = n + norm2(l);
}}}}
}
std::cout << "padded field check diff "<< diff <<" / "<< n<<std::endl;
std::cout << norm2(Ughost)<< " " << norm2(Umu)<<std::endl;
}
#endif
///// Array for the site plaquette
GridBase *GhostGrid = Ughost.Grid();
LatticeComplex gplaq(GhostGrid);
std::vector<Coordinate> shifts;
for(int mu=0;mu<Nd;mu++){
for(int nu=mu+1;nu<Nd;nu++){
// Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x)
Coordinate shift_0(Nd,0);
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
shifts.push_back(shift_0);
shifts.push_back(shift_mu);
shifts.push_back(shift_nu);
shifts.push_back(shift_0);
}
}
GeneralLocalStencil gStencil(GhostGrid,shifts);
gplaq=Zero();
{
autoView( gp_v , gplaq, CpuWrite);
autoView( t_v , trplaq, CpuRead);
autoView( U_v , Ughost, CpuRead);
for(int ss=0;ss<gp_v.size();ss++){
int s=0;
for(int mu=0;mu<Nd;mu++){
for(int nu=mu+1;nu<Nd;nu++){
auto SE0 = gStencil.GetEntry(s+0,ss);
auto SE1 = gStencil.GetEntry(s+1,ss);
auto SE2 = gStencil.GetEntry(s+2,ss);
auto SE3 = gStencil.GetEntry(s+3,ss);
int o0 = SE0->_offset;
int o1 = SE1->_offset;
int o2 = SE2->_offset;
int o3 = SE3->_offset;
auto U0 = U_v[o0](mu);
auto U1 = U_v[o1](nu);
auto U2 = adj(U_v[o2](mu));
auto U3 = adj(U_v[o3](nu));
gpermute(U0,SE0->_permute);
gpermute(U1,SE1->_permute);
gpermute(U2,SE2->_permute);
gpermute(U3,SE3->_permute);
gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 );
s=s+4;
}
}
}
}
cplaq = Ghost.Extract(gplaq);
RealD vol = cplaq.Grid()->gSites();
RealD faces = (Nd * (Nd-1))/2;
auto p = TensorRemove(sum(cplaq));
auto result = p.real()/vol/faces/Nc;
std::cout << GridLogMessage << " Average plaquette via padded cell "<<result<<std::endl;
std::cout << GridLogMessage << " Diff "<<result-plaq<<std::endl;
assert(fabs(result-plaq)<1.0e-8);
Grid_finalize();
}

View File

@ -0,0 +1,580 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_padded_cell_staple.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
using namespace std;
using namespace Grid;
template <class Gimpl> class WilsonLoopsTest : public Gimpl {
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
//Original implementation
static void StapleOrig(GaugeMat &staple, const GaugeLorentz &Umu, int mu,
int nu) {
GridBase *grid = Umu.Grid();
std::vector<GaugeMat> U(Nd, grid);
for (int d = 0; d < Nd; d++) {
U[d] = PeekIndex<LorentzIndex>(Umu, d);
}
staple = Zero();
if (nu != mu) {
// mu
// ^
// |__> nu
// __
// |
// __|
//
//Forward: Out(x) = Link(x)*field(x+mu)
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
//ShiftStaple: Link(x) = Link(x+mu)
//tmp1 = U^dag_nu(x-nu)
//tmp2 = U^dag_mu(x-mu) tmp1(x-mu) = U^dag_mu(x-mu) U^dag_nu(x-nu-mu)
//tmp3 = U_nu(x) tmp2(x+nu) = U_nu(x)U^dag_mu(x-mu+nu) U^dag_nu(x-mu)
//tmp4 = tmp(x+mu) = U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
staple += Gimpl::ShiftStaple(
Gimpl::CovShiftForward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))),
mu);
// __
// |
// |__
//
//
//tmp1 = U_mu^dag(x-mu) U_nu(x-mu)
//tmp2 = U_nu^dag(x-nu) tmp1(x-nu) = U_nu^dag(x-nu) U_mu^dag(x-mu-nu) U_nu(x-mu-nu)
//tmp3 = tmp2(x+mu) = U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
staple += Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(U[nu], nu,
Gimpl::CovShiftBackward(U[mu], mu, U[nu])),
mu);
}
}
static void StaplePadded(GaugeMat &staple, const GaugeLorentz &U, int mu,
int nu) {
if(nu==mu){
staple = Zero();
return;
}
double peek = 0, construct = 0, exchange = 0, coord = 0, stencil =0, kernel = 0, extract = 0, total = 0;
double tstart = usecond();
double t=tstart;
PaddedCell Ghost(1, (GridCartesian*)U.Grid());
construct += usecond() - t;
t=usecond();
GaugeMat U_mu = PeekIndex<LorentzIndex>(U, mu);
GaugeMat U_nu = PeekIndex<LorentzIndex>(U, nu);
peek += usecond() - t;
t=usecond();
CshiftImplGauge<Gimpl> cshift_impl;
GaugeMat Ug_mu = Ghost.Exchange(U_mu, cshift_impl);
GaugeMat Ug_nu = Ghost.Exchange(U_nu, cshift_impl);
exchange += usecond() - t;
GridBase *ggrid = Ug_mu.Grid();
GaugeMat gStaple(ggrid);
t=usecond();
Coordinate shift_0(Nd,0);
Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
Coordinate shift_mnu(Nd,0); shift_mnu[nu]=-1;
Coordinate shift_mnu_pmu(Nd,0); shift_mnu_pmu[nu]=-1; shift_mnu_pmu[mu]=1;
std::vector<Coordinate> shifts;
//U_nu(x+mu)U^dag_mu(x+nu) U^dag_nu(x)
shifts.push_back(shift_0);
shifts.push_back(shift_nu);
shifts.push_back(shift_mu);
//U_nu^dag(x-nu+mu) U_mu^dag(x-nu) U_nu(x-nu)
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu);
shifts.push_back(shift_mnu_pmu);
coord += usecond()-t;
t=usecond();
GeneralLocalStencil gStencil(ggrid,shifts);
stencil += usecond() -t;
t=usecond();
{
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
autoView( Ug_mu_v , Ug_mu, AcceleratorRead);
autoView( Ug_nu_v , Ug_nu, AcceleratorRead);
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
GeneralStencilEntry const* e = gStencil_v.GetEntry(0,ss);
auto Udag_nu_x = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(1,ss);
auto Udag_mu_xpnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(2,ss);
auto U_nu_xpmu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
auto stencil_ss = U_nu_xpmu * Udag_mu_xpnu * Udag_nu_x;
e = gStencil_v.GetEntry(3,ss);
auto U_nu_xmnu = coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(4,ss);
auto Udag_mu_xmnu = adj(coalescedReadGeneralPermute(Ug_mu_v[e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(5,ss);
auto Udag_nu_xmnu_pmu = adj(coalescedReadGeneralPermute(Ug_nu_v[e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + Udag_nu_xmnu_pmu * Udag_mu_xmnu * U_nu_xmnu;
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
} //ensure views are all closed!
kernel += usecond() - t;
t=usecond();
staple = Ghost.Extract(gStaple);
extract += usecond()-t;
total += usecond() - tstart;
std::cout << GridLogMessage << "StaplePadded timings peek:" << peek << " construct:" << construct << " exchange:" << exchange << " coord:" << coord << " stencil:" << stencil << " kernel:" << kernel << " extract:" << extract << " total:" << total << std::endl;
}
static void RectStapleOrig(GaugeMat &Stap, const GaugeLorentz &Umu,
int mu) {
GridBase *grid = Umu.Grid();
std::vector<GaugeMat> U(Nd, grid);
for (int d = 0; d < Nd; d++) {
U[d] = PeekIndex<LorentzIndex>(Umu, d);
}
Stap = Zero();
for (int nu = 0; nu < Nd; nu++) {
if (nu != mu) {
// __ ___
// | __ |
//
//tmp1 = U_nu^dag(x-nu)
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu) U_nu^dag(x-nu-mu)
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu) U_mu^dag(x-2mu) U_nu^dag(x-nu-2mu)
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu) U_mu^dag(x-2mu+nu) U_nu^dag(x-2mu)
//tmp5 = U_mu(x)tmp4(x+mu) = U_mu(x)U_nu(x+mu)U_mu^dag(x+nu) U_mu^dag(x-mu+nu) U_nu^dag(x-mu)
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftForward(
U[mu], mu,
Gimpl::CovShiftForward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu,
Gimpl::CovShiftBackward(
U[mu], mu,
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
mu);
// __
// |__ __ |
//tmp1 = U^dag_mu(x-mu)U_nu(x-mu)
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)
//tmp3 = U^dag_nu(x-nu)tmp2(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)
//tmp4 = U_mu(x)tmp3(x+mu) = U_mu(x)U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftForward(
U[mu], mu,
Gimpl::CovShiftBackward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])))),
mu);
// __
// |__ __ |
//Forward: Out(x) = Link(x)*field(x+mu)
//Backward: Out(x) = Link^dag(x-mu)*field(x-mu)
//ShiftStaple: Link(x) = Link(x+mu)
//tmp1 = U_nu(x)U_mu(x+nu)
//tmp2 = U^dag_mu(x-mu)tmp1(x-mu) = U^dag_mu(x-mu)U_nu(x-mu)U_mu(x+nu-mu)
//tmp3 = U^dag_mu(x-mu)tmp2(x-mu) = U^dag_mu(x-mu)U^dag_mu(x-2mu)U_nu(x-2mu)U_mu(x+nu-2mu)
//tmp4 = U^dag_nu(x-nu)tmp3(x-nu) = U^dag_nu(x-nu)U^dag_mu(x-mu-nu)U^dag_mu(x-2mu-nu)U_nu(x-2mu-nu)U_mu(x-2mu)
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu,
Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[mu])))),
mu);
// __ ___
// |__ |
//tmp1 = U_nu^dag(x-nu)U_mu(x-nu)
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_mu(x-mu-nu)
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_mu^dag(x-2mu)U_nu^dag(x-2mu-nu)U_mu(x-2mu-nu)
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_mu^dag(x-2mu+nu)U_nu^dag(x-2mu)U_mu(x-2mu)
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftForward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu,
Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftBackward(U[nu], nu, U[mu])))),
mu);
// --
// | |
//
// | |
//tmp1 = U_nu^dag(x-nu)
//tmp2 = U_nu^dag(x-nu)tmp1(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)
//tmp3 = U_mu^dag(x-mu)tmp2(x-mu) = U_mu^dag(x-mu)U_nu^dag(x-mu-nu)U_nu^dag(x-mu-2nu)
//tmp4 = U_nu(x)tmp3(x+nu) = U_nu(x)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_nu^dag(x-mu-nu)
//tmp5 = U_nu(x)tmp4(x+nu) = U_nu(x)U_nu(x+nu)U_mu^dag(x-mu+2nu)U_nu^dag(x-mu+nu)U_nu^dag(x-mu)
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftForward(
U[nu], nu,
Gimpl::CovShiftForward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu,
Gimpl::CovShiftBackward(
U[nu], nu,
Gimpl::CovShiftIdentityBackward(U[nu], nu))))),
mu);
// | |
//
// | |
// --
//tmp1 = U_nu(x)U_nu(x+nu)
//tmp2 = U_mu^dag(x-mu)tmp1(x-mu) = U_mu^dag(x-mu)U_nu(x-mu)U_nu(x-mu+nu)
//tmp3 = U_nu^dag(x-nu)tmp2(x-nu) = U_nu^dag(x-nu)U_mu^dag(x-mu-nu)U_nu(x-mu-nu)U_nu(x-mu)
//tmp4 = U_nu^dag(x-nu)tmp3(x-nu) = U_nu^dag(x-nu)U_nu^dag(x-2nu)U_mu^dag(x-mu-2nu)U_nu(x-mu-2nu)U_nu(x-mu-nu)
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
Stap += Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[nu], nu,
Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftForward(U[nu], nu, U[nu])))),
mu);
}
}
}
static void RectStaplePadded(GaugeMat &Stap, const GaugeLorentz &U,
int mu) {
PaddedCell Ghost(2,(GridCartesian*)U.Grid());
GridBase *ggrid = Ghost.grids.back();
CshiftImplGauge<Gimpl> cshift_impl;
std::vector<GaugeMat> Ug_dirs(Nd,ggrid);
for(int i=0;i<Nd;i++) Ug_dirs[i] = Ghost.Exchange(PeekIndex<LorentzIndex>(U, i), cshift_impl);
GaugeMat gStaple(ggrid);
std::vector<Coordinate> shifts;
for (int nu = 0; nu < Nd; nu++) {
if (nu != mu) {
auto genShift = [&](int mushift,int nushift){
Coordinate out(Nd,0); out[mu]=mushift; out[nu]=nushift; return out;
};
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+2,0));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
shifts.push_back(genShift(+2,-1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(-1,-1));
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(+1,-1));
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,0));
shifts.push_back(genShift(-1,+1));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(+1,0));
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
shifts.push_back(genShift(0,0));
shifts.push_back(genShift(0,+1));
shifts.push_back(genShift(0,+2));
shifts.push_back(genShift(+1,+1));
shifts.push_back(genShift(+1,0));
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
shifts.push_back(genShift(0,-1));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(0,-2));
shifts.push_back(genShift(+1,-2));
shifts.push_back(genShift(+1,-1));
}
}
size_t nshift = shifts.size();
GeneralLocalStencil gStencil(ggrid,shifts);
{
autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View();
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType);
GaugeViewType* Ug_dirs_v_host = (GaugeViewType*)malloc(vsize);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i] = Ug_dirs[i].View(AcceleratorRead);
GaugeViewType* Ug_dirs_v = (GaugeViewType*)acceleratorAllocDevice(vsize);
acceleratorCopyToDevice(Ug_dirs_v_host,Ug_dirs_v,vsize);
accelerator_for(ss, ggrid->oSites(), ggrid->Nsimd(), {
decltype(coalescedRead(Ug_dirs_v[0][0])) stencil_ss;
stencil_ss = Zero();
int s=0;
for(int nu=0;nu<Nd;nu++){
if(nu != mu){
//tmp6 = tmp5(x+mu) = U_mu(x+mu)U_nu(x+2mu)U_mu^dag(x+nu+mu) U_mu^dag(x+nu) U_nu^dag(x)
GeneralStencilEntry const* e = gStencil_v.GetEntry(s++,ss);
auto U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
auto U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
auto U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_mu(x+mu)U^dag_nu(x-nu+2mu)U^dag_mu(x-nu+mu)U^dag_mu(x-nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U^dag_nu(x-nu+mu)U^dag_mu(x-nu)U^dag_mu(x-mu-nu)U_nu(x-mu-nu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu(x+mu)U_mu^dag(x+nu)U_mu^dag(x-mu+nu)U_nu^dag(x-mu)U_mu(x-mu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp6 = tmp5(x+mu) = U_nu(x+mu)U_nu(x+mu+nu)U_mu^dag(x+2nu)U_nu^dag(x+nu)U_nu^dag(x)
e = gStencil_v.GetEntry(s++,ss);
U0 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U1 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U4 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
//tmp5 = tmp4(x+mu) = U_nu^dag(x+mu-nu)U_nu^dag(x+mu-2nu)U_mu^dag(x-2nu)U_nu(x-2nu)U_nu(x-nu)
e = gStencil_v.GetEntry(s++,ss);
U0 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U1 = coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd);
e = gStencil_v.GetEntry(s++,ss);
U2 = adj(coalescedReadGeneralPermute(Ug_dirs_v[mu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U3 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
e = gStencil_v.GetEntry(s++,ss);
U4 = adj(coalescedReadGeneralPermute(Ug_dirs_v[nu][e->_offset], e->_permute, Nd));
stencil_ss = stencil_ss + U4*U3*U2*U1*U0;
}
}
assert(s==nshift);
coalescedWrite(gStaple_v[ss],stencil_ss);
}
);
for(int i=0;i<Nd;i++) Ug_dirs_v_host[i].ViewClose();
free(Ug_dirs_v_host);
acceleratorFreeDevice(Ug_dirs_v);
}
Stap = Ghost.Extract(gStaple);
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
std::cout << " mpi "<<mpi_layout<<std::endl;
std::cout << " simd "<<simd_layout<<std::endl;
std::cout << " latt "<<latt_size<<std::endl;
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&GRID);
pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&GRID);
SU<Nc>::HotConfiguration(pRNG,U);
//typedef PeriodicGimplD Gimpl;
typedef ConjugateGimplD Gimpl;
std::vector<int> conj_dirs(Nd,0); conj_dirs[0]=1; conj_dirs[3]=1;
Gimpl::setDirections(conj_dirs);
typedef typename WilsonLoopsTest<Gimpl>::GaugeMat GaugeMat;
typedef typename WilsonLoopsTest<Gimpl>::GaugeLorentz GaugeLorentz;
std::cout << GridLogMessage << "Checking Staple" << std::endl;
int count = 0;
double torig=0, tpadded=0;
for(int mu=0;mu<Nd;mu++){
for(int nu=0;nu<Nd;nu++){
if(mu != nu){
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
double t0 = usecond();
WilsonLoopsTest<Gimpl>::StapleOrig(staple_orig,U,mu,nu);
double t1 = usecond();
WilsonLoopsTest<Gimpl>::StaplePadded(staple_padded,U,mu,nu);
double t2 = usecond();
torig += t1-t0; tpadded += t2-t1;
++count;
GaugeMat diff = staple_orig - staple_padded;
double n = norm2(diff);
std::cout << GridLogMessage << mu << " " << nu << " " << n << std::endl;
assert(n<1e-10);
}
}
}
std::cout << GridLogMessage << "Staple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
count=0; torig=tpadded=0;
std::cout << GridLogMessage << "Checking RectStaple" << std::endl;
for(int mu=0;mu<Nd;mu++){
GaugeMat staple_orig(&GRID), staple_padded(&GRID);
double t0 = usecond();
WilsonLoopsTest<Gimpl>::RectStapleOrig(staple_orig,U,mu);
double t1 = usecond();
WilsonLoopsTest<Gimpl>::RectStaplePadded(staple_padded,U,mu);
double t2 = usecond();
torig += t1-t0; tpadded += t2-t1;
++count;
GaugeMat diff = staple_orig - staple_padded;
double n = norm2(diff);
std::cout << GridLogMessage << mu << " " << n << std::endl;
assert(n<1e-10);
}
std::cout << GridLogMessage << "RectStaple timings orig: " << torig/1000/count << "ms, padded: " << tpadded/1000/count << "ms" << std::endl;
Grid_finalize();
}

View File

@ -476,7 +476,9 @@ int main (int argc, char ** argv)
// ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
//////////////////// One flavour boundary det ////////////////////
/*
RationalActionParams OFRp; // Up/down
int SP_iters = 3000;
OFRp.lo = 6.0e-5;
OFRp.hi = 90.0;
OFRp.inv_pow = 2;
@ -489,7 +491,7 @@ int main (int argc, char ** argv)
// OFRp.degree = 16;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
/*
*/
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 4.0e-5;
OFRp.hi = 90.0;
@ -499,7 +501,6 @@ int main (int argc, char ** argv)
OFRp.degree = 18;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
*/
std::vector<RealD> ActionTolByPole({
1.0e-7,1.0e-8,1.0e-8,1.0e-8,
1.0e-8,1.0e-8,1.0e-8,1.0e-8,

219
tests/forces/Test_fthmc.cc Normal file
View File

@ -0,0 +1,219 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_fthmc.cc
Copyright (C) 2022
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h>
using namespace std;
using namespace Grid;
typedef MobiusFermionD FermionAction;
typedef WilsonImplD FimplD;
typedef WilsonImplD FermionImplPolicy;
template<class Gimpl>
void ForceTest(Action<LatticeGaugeField> &action,ConfigurationBase<LatticeGaugeField> & smU,MomentumFilterBase<LatticeGaugeField> &Filter)
{
LatticeGaugeField U = smU.get_U(false); // unsmeared config
GridBase *UGrid = U.Grid();
std::vector<int> seeds({1,2,3,5});
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds);
LatticeColourMatrix Pmu(UGrid);
LatticeGaugeField P(UGrid);
LatticeGaugeField UdSdU(UGrid);
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
std::cout << GridLogMessage << " Force test for "<<action.action_name()<<std::endl;
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
RealD eps=0.01;
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << " Refresh "<<action.action_name()<<std::endl;
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
Gimpl::generate_momenta(P,sRNG,RNG4);
// Filter.applyFilter(P);
action.refresh(smU,sRNG,RNG4);
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
RealD S1 = action.S(smU);
Gimpl::update_field(P,U,eps);
smU.set_Field(U);
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << " Derivative "<<action.action_name()<<std::endl;
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
action.deriv(smU,UdSdU);
UdSdU = Ta(UdSdU);
// Filter.applyFilter(UdSdU);
DumpSliceNorm("Force",UdSdU,Nd-1);
Gimpl::update_field(P,U,eps);
smU.set_Field(U);
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
RealD S2 = action.S(smU);
// Use the derivative
LatticeComplex dS(UGrid); dS = Zero();
for(int mu=0;mu<Nd;mu++){
auto UdSdUmu = PeekIndex<LorentzIndex>(UdSdU,mu);
Pmu= PeekIndex<LorentzIndex>(P,mu);
dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*HMC_MOMENTUM_DENOMINATOR;
}
ComplexD dSpred = sum(dS);
RealD diff = S2-S1-dSpred.real();
std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
std::cout<< GridLogMessage << "S1 : "<< S1 <<std::endl;
std::cout<< GridLogMessage << "S2 : "<< S2 <<std::endl;
std::cout<< GridLogMessage << "dS : "<< S2-S1 <<std::endl;
std::cout<< GridLogMessage << "dSpred : "<< dSpred.real() <<std::endl;
std::cout<< GridLogMessage << "diff : "<< diff<<std::endl;
std::cout<< GridLogMessage << "*********************************************************"<<std::endl;
// assert(diff<1.0);
std::cout<< GridLogMessage << "Done" <<std::endl;
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::cout << std::setprecision(14);
Coordinate latt_size = GridDefaultLatt();
Coordinate mpi_layout = GridDefaultMpi();
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi_layout,shm);
const int Ls=12;
const int Nt = latt_size[3];
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
///////////////////// Gauge Field and Gauge Forces ////////////////////////////
LatticeGaugeField U(UGrid);
#if 0
FieldMetaData header;
std::string file("./ckpoint_lat.2000");
NerscIO::readConfiguration(U,header,file);
#else
std::vector<int> seeds({1,2,3,4,5,6,7,8});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds);
SU<Nc>::HotConfiguration(RNG4,U);
#endif
WilsonGaugeActionR PlaqAction(6.0);
IwasakiGaugeActionR RectAction(2.13);
PlaqAction.is_smeared = true;
RectAction.is_smeared = true;
////////////////////////////////////
// Fermion Action
////////////////////////////////////
RealD mass=0.01;
RealD pvmass=1.0;
RealD M5=1.8;
RealD b=1.5;
RealD c=0.5;
// Double versions
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params);
FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params);
double StoppingCondition = 1.0e-8;
double MaxCGIterations = 50000;
ConjugateGradient<LatticeFermion> CG(StoppingCondition,MaxCGIterations);
TwoFlavourRatioPseudoFermionAction<FimplD> Nf2(PVPeriodic, DdwfPeriodic,CG,CG);
Nf2.is_smeared = true;
////////////////////////////////////////////////
// Plaquette only FTHMC smearer
////////////////////////////////////////////////
double rho = 0.1;
Smear_Stout<PeriodicGimplR> Smearer(rho);
SmearedConfigurationMasked<PeriodicGimplR> SmartConfig(UGrid,2*Nd,Smearer);
SmearedConfiguration<PeriodicGimplR> StoutConfig(UGrid,1,Smearer);
JacobianAction<PeriodicGimplR> Jacobian(&SmartConfig);
////////////////////////////////////////////////
// Run some tests
////////////////////////////////////////////////
MomentumFilterNone<LatticeGaugeField> FilterNone;
std::cout << " ********* FIELD TRANSFORM SMEARING ***** "<<std::endl;
SmartConfig.set_Field(U);
ForceTest<GimplTypesR>(PlaqAction,SmartConfig,FilterNone);
SmartConfig.set_Field(U);
ForceTest<GimplTypesR>(RectAction,SmartConfig,FilterNone);
SmartConfig.set_Field(U);
ForceTest<GimplTypesR>(Jacobian,SmartConfig,FilterNone);
SmartConfig.set_Field(U);
ForceTest<GimplTypesR>(Nf2,SmartConfig,FilterNone);
std::cout << " ********* STOUT SMEARING ***** "<<std::endl;
StoutConfig.set_Field(U);
ForceTest<GimplTypesR>(PlaqAction,StoutConfig,FilterNone);
StoutConfig.set_Field(U);
ForceTest<GimplTypesR>(RectAction,StoutConfig,FilterNone);
StoutConfig.set_Field(U);
ForceTest<GimplTypesR>(Nf2,StoutConfig,FilterNone);
Grid_finalize();
}