mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-26 09:39:34 +00:00 
			
		
		
		
	Compare commits
	
		
			51 Commits
		
	
	
		
			hotfix/nvc
			...
			ffd7301649
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | ffd7301649 | ||
|  | d2a8494044 | ||
|  | 0982e0d19b | ||
|  | 3badbfc3c1 | ||
|  | 5465961e30 | ||
|  | 4835fd1a87 | ||
|  | 6533c25814 | ||
|  | 1b2914ec09 | ||
|  | 519f795066 | ||
|  | 4240ad5ca8 | ||
|  | d418347d86 | ||
|  | 29a4bfe5e5 | ||
|  | 9955bf9daf | ||
|  | 876c8f4478 | ||
|  | 9c8750f261 | ||
|  | 91efd08179 | ||
|  | 9953511b65 | ||
|  | 025fa9991a | ||
|  | e8c60c355b | ||
|  | 6c9c7f9d85 | ||
|  | f534523ede | ||
|  | 1b8a834beb | ||
|  | 3aa43e6065 | ||
|  | 78ac4044ff | ||
|  | 119c3db47f | ||
|  | 21bbdb8fc2 | ||
|  | 739bd7572c | ||
|  | 074627a5bd | ||
|  | 6a23b2c599 | ||
|  | bd891fb3f5 | ||
|  | 3984265851 | ||
|  | 45361d188f | ||
|  | 80c9d77e02 | ||
|  | 3aff64dddb | ||
|  | b4f2ca81ff | ||
|  | d1dea5f840 | ||
|  | 54f8b84d16 | ||
|  | da503fef0e | ||
|  | 4a6802098a | ||
|  | f9b41a84d2 | ||
| 5d7e0d18b9 | |||
|  | 86dac5ff4f | ||
|  | 4a382fad3f | ||
|  | cc753670d9 | ||
|  | cc9d88ea1c | ||
|  | b281b0166e | ||
|  | 6a21f694ff | ||
| 39214702f6 | |||
| 3e4614c63a | |||
|  | ccd21f96ff | ||
|  | 4b90cb8888 | 
							
								
								
									
										54
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,54 @@ | ||||
| name: Bug report | ||||
| description: Report a bug. | ||||
| title: "<insert title>" | ||||
| labels: [bug] | ||||
|  | ||||
| body: | ||||
|   - type: markdown | ||||
|     attributes: | ||||
|       value: > | ||||
|         Thank you for taking the time to file a bug report. | ||||
|         Please check that the code is pointing to the HEAD of develop | ||||
|         or any commit in master which is tagged with a version number. | ||||
|  | ||||
|   - type: textarea | ||||
|     attributes: | ||||
|       label: "Describe the issue:" | ||||
|       description: > | ||||
|         Describe the issue and any previous attempt to solve it. | ||||
|     validations: | ||||
|       required: true | ||||
|  | ||||
|   - type: textarea | ||||
|     attributes: | ||||
|       label: "Code example:" | ||||
|       description: > | ||||
|         If relevant, show how to reproduce the issue using a minimal working | ||||
|         example. | ||||
|       placeholder: | | ||||
|         << your code here >> | ||||
|       render: shell | ||||
|     validations: | ||||
|       required: false | ||||
|  | ||||
|   - type: textarea | ||||
|     attributes: | ||||
|       label: "Target platform:" | ||||
|       description: > | ||||
|         Give a description of the target platform (CPU, network, compiler). | ||||
|         Please give the full CPU part description, using for example | ||||
|         `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) | ||||
|         or `sysctl machdep.cpu.brand_string` (macOS) and the full output | ||||
|         the `--version` option of your compiler. | ||||
|     validations: | ||||
|       required: true | ||||
|  | ||||
|   - type: textarea | ||||
|     attributes: | ||||
|       label: "Configure options:" | ||||
|       description: > | ||||
|         Please give the exact configure command used and attach | ||||
|         `config.log`, `grid.config.summary` and the output of `make V=1`. | ||||
|       render: shell | ||||
|     validations: | ||||
|       required: true | ||||
| @@ -542,6 +542,7 @@ public: | ||||
|       (*this)(in[i], out[i]); | ||||
|     } | ||||
|   } | ||||
|   virtual ~LinearFunction(){}; | ||||
| }; | ||||
|  | ||||
| template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { | ||||
|   | ||||
| @@ -166,16 +166,16 @@ public: | ||||
|       rsqf[s] =rsq[s]; | ||||
|       std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl; | ||||
|       //      ps_d[s] = src_d; | ||||
|       precisionChangeFast(ps_f[s],src_d); | ||||
|       precisionChange(ps_f[s],src_d); | ||||
|     } | ||||
|     // r and p for primary | ||||
|     p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys | ||||
|     r_d = p_d; | ||||
|      | ||||
|     //MdagM+m[0] | ||||
|     precisionChangeFast(p_f,p_d); | ||||
|     precisionChange(p_f,p_d); | ||||
|     Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) | ||||
|     precisionChangeFast(tmp_d,mmp_f); | ||||
|     precisionChange(tmp_d,mmp_f); | ||||
|     Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) | ||||
|     tmp_d = tmp_d - mmp_d; | ||||
|     std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl; | ||||
| @@ -204,7 +204,7 @@ public: | ||||
|    | ||||
|     for(int s=0;s<nshift;s++) { | ||||
|       axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d); | ||||
|       precisionChangeFast(psi_f[s],psi_d[s]); | ||||
|       precisionChange(psi_f[s],psi_d[s]); | ||||
|     } | ||||
|    | ||||
|     /////////////////////////////////////// | ||||
| @@ -225,7 +225,7 @@ public: | ||||
|       AXPYTimer.Stop(); | ||||
|  | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChangeFast(r_f, r_d); | ||||
|       precisionChange(r_f, r_d); | ||||
|       PrecChangeTimer.Stop(); | ||||
|  | ||||
|       AXPYTimer.Start(); | ||||
| @@ -243,13 +243,13 @@ public: | ||||
|  | ||||
|       cp=c; | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChangeFast(p_f, p_d); //get back single prec search direction for linop | ||||
|       precisionChange(p_f, p_d); //get back single prec search direction for linop | ||||
|       PrecChangeTimer.Stop(); | ||||
|       MatrixTimer.Start();   | ||||
|       Linop_f.HermOp(p_f,mmp_f); | ||||
|       MatrixTimer.Stop();   | ||||
|       PrecChangeTimer.Start(); | ||||
|       precisionChangeFast(mmp_d, mmp_f); // From Float to Double | ||||
|       precisionChange(mmp_d, mmp_f); // From Float to Double | ||||
|       PrecChangeTimer.Stop(); | ||||
|  | ||||
|       d=real(innerProduct(p_d,mmp_d));     | ||||
| @@ -311,7 +311,7 @@ public: | ||||
| 	SolverTimer.Stop(); | ||||
|  | ||||
| 	for(int s=0;s<nshift;s++){ | ||||
| 	  precisionChangeFast(psi_d[s],psi_f[s]); | ||||
| 	  precisionChange(psi_d[s],psi_f[s]); | ||||
| 	} | ||||
|  | ||||
| 	 | ||||
|   | ||||
| @@ -211,7 +211,7 @@ public: | ||||
|     Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp) | ||||
|     tmp_d = tmp_d - mmp_d; | ||||
|     std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl; | ||||
|     //    assert(norm2(tmp_d)< 1.0e-4); | ||||
|     assert(norm2(tmp_d)< 1.0); | ||||
|  | ||||
|     axpy(mmp_d,mass[0],p_d,mmp_d); | ||||
|     RealD rn = norm2(p_d); | ||||
|   | ||||
| @@ -27,9 +27,10 @@ Author: Christoph Lehner <christoph@lhnr.de> | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #define Mheader "SharedMemoryMpi: " | ||||
|  | ||||
| #include <Grid/GridCore.h> | ||||
| #include <pwd.h> | ||||
| #include <syscall.h> | ||||
|  | ||||
| #ifdef GRID_CUDA | ||||
| #include <cuda_runtime_api.h> | ||||
| @@ -39,11 +40,118 @@ Author: Christoph Lehner <christoph@lhnr.de> | ||||
| #endif | ||||
| #ifdef GRID_SYCL | ||||
| #define GRID_SYCL_LEVEL_ZERO_IPC | ||||
| #include <syscall.h> | ||||
| #define SHM_SOCKETS  | ||||
| #endif | ||||
|  | ||||
| #include <sys/socket.h> | ||||
| #include <sys/un.h> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid);  | ||||
|  | ||||
| #ifdef SHM_SOCKETS | ||||
|  | ||||
| /* | ||||
|  * Barbaric extra intranode communication route in case we need sockets to pass FDs | ||||
|  * Forced by level_zero not being nicely designed | ||||
|  */ | ||||
| static int sock; | ||||
| static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d"; | ||||
| static char sock_path[256]; | ||||
| class UnixSockets { | ||||
| public: | ||||
|   static void Open(int rank) | ||||
|   { | ||||
|     int errnum; | ||||
|  | ||||
|     sock = socket(AF_UNIX, SOCK_DGRAM, 0);  assert(sock>0); | ||||
|  | ||||
|     struct sockaddr_un sa_un = { 0 }; | ||||
|     sa_un.sun_family = AF_UNIX; | ||||
|     snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank); | ||||
|     unlink(sa_un.sun_path); | ||||
|     if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) { | ||||
|       perror("bind failure"); | ||||
|       exit(EXIT_FAILURE); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   static int RecvFileDescriptor(void) | ||||
|   { | ||||
|     int n; | ||||
|     int fd; | ||||
|     char buf[1]; | ||||
|     struct iovec iov; | ||||
|     struct msghdr msg; | ||||
|     struct cmsghdr *cmsg; | ||||
|     char cms[CMSG_SPACE(sizeof(int))]; | ||||
|  | ||||
|     iov.iov_base = buf; | ||||
|     iov.iov_len = 1; | ||||
|  | ||||
|     memset(&msg, 0, sizeof msg); | ||||
|     msg.msg_name = 0; | ||||
|     msg.msg_namelen = 0; | ||||
|     msg.msg_iov = &iov; | ||||
|     msg.msg_iovlen = 1; | ||||
|  | ||||
|     msg.msg_control = (caddr_t)cms; | ||||
|     msg.msg_controllen = sizeof cms; | ||||
|  | ||||
|     if((n=recvmsg(sock, &msg, 0)) < 0) { | ||||
|       perror("recvmsg failed"); | ||||
|       return -1; | ||||
|     } | ||||
|     if(n == 0){ | ||||
|       perror("recvmsg returned 0"); | ||||
|       return -1; | ||||
|     } | ||||
|     cmsg = CMSG_FIRSTHDR(&msg); | ||||
|  | ||||
|     memmove(&fd, CMSG_DATA(cmsg), sizeof(int)); | ||||
|  | ||||
|     return fd; | ||||
|   } | ||||
|  | ||||
|   static void SendFileDescriptor(int fildes,int xmit_to_rank) | ||||
|   { | ||||
|     struct msghdr msg; | ||||
|     struct iovec iov; | ||||
|     struct cmsghdr *cmsg = NULL; | ||||
|     char ctrl[CMSG_SPACE(sizeof(int))]; | ||||
|     char data = ' '; | ||||
|  | ||||
|     memset(&msg, 0, sizeof(struct msghdr)); | ||||
|     memset(ctrl, 0, CMSG_SPACE(sizeof(int))); | ||||
|     iov.iov_base = &data; | ||||
|     iov.iov_len = sizeof(data); | ||||
|      | ||||
|     sprintf(sock_path,sock_path_fmt,xmit_to_rank); | ||||
|      | ||||
|     struct sockaddr_un sa_un = { 0 }; | ||||
|     sa_un.sun_family = AF_UNIX; | ||||
|     snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank); | ||||
|  | ||||
|     msg.msg_name = (void *)&sa_un; | ||||
|     msg.msg_namelen = sizeof(sa_un); | ||||
|     msg.msg_iov = &iov; | ||||
|     msg.msg_iovlen = 1; | ||||
|     msg.msg_controllen =  CMSG_SPACE(sizeof(int)); | ||||
|     msg.msg_control = ctrl; | ||||
|  | ||||
|     cmsg = CMSG_FIRSTHDR(&msg); | ||||
|     cmsg->cmsg_level = SOL_SOCKET; | ||||
|     cmsg->cmsg_type = SCM_RIGHTS; | ||||
|     cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | ||||
|  | ||||
|     *((int *) CMSG_DATA(cmsg)) = fildes; | ||||
|  | ||||
|     sendmsg(sock, &msg, 0); | ||||
|   }; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid);  | ||||
| #define header "SharedMemoryMpi: " | ||||
| /*Construct from an MPI communicator*/ | ||||
| void GlobalSharedMemory::Init(Grid_MPI_Comm comm) | ||||
| { | ||||
| @@ -66,8 +174,8 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) | ||||
|   MPI_Comm_size(WorldShmComm     ,&WorldShmSize); | ||||
|  | ||||
|   if ( WorldRank == 0) { | ||||
|     std::cout << header " World communicator of size " <<WorldSize << std::endl;   | ||||
|     std::cout << header " Node  communicator of size " <<WorldShmSize << std::endl; | ||||
|     std::cout << Mheader " World communicator of size " <<WorldSize << std::endl;   | ||||
|     std::cout << Mheader " Node  communicator of size " <<WorldShmSize << std::endl; | ||||
|   } | ||||
|   // WorldShmComm, WorldShmSize, WorldShmRank | ||||
|  | ||||
| @@ -344,7 +452,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce | ||||
| #ifdef GRID_MPI3_SHMGET | ||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| { | ||||
|   std::cout << header "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl; | ||||
|   std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl; | ||||
|   assert(_ShmSetup==1); | ||||
|   assert(_ShmAlloc==0); | ||||
|  | ||||
| @@ -429,7 +537,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     exit(EXIT_FAILURE);   | ||||
|   } | ||||
|  | ||||
|   std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||
|   std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||
| 	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; | ||||
|  | ||||
|   SharedMemoryZero(ShmCommBuf,bytes); | ||||
| @@ -472,7 +580,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     exit(EXIT_FAILURE);   | ||||
|   } | ||||
|   if ( WorldRank == 0 ){ | ||||
|     std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||
|     std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes  | ||||
| 	      << "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl; | ||||
|   } | ||||
|   SharedMemoryZero(ShmCommBuf,bytes); | ||||
| @@ -480,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Loop over ranks/gpu's on our node | ||||
|   /////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #ifdef SHM_SOCKETS | ||||
|   UnixSockets::Open(WorldShmRank); | ||||
| #endif | ||||
|   for(int r=0;r<WorldShmSize;r++){ | ||||
|  | ||||
|     MPI_Barrier(WorldShmComm); | ||||
|  | ||||
| #ifndef GRID_MPI3_SHM_NONE | ||||
|     ////////////////////////////////////////////////// | ||||
|     // If it is me, pass around the IPC access key | ||||
| @@ -489,7 +602,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     void * thisBuf = ShmCommBuf; | ||||
|     if(!Stencil_force_mpi) { | ||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||
|     typedef struct { int fd; pid_t pid ; } clone_mem_t; | ||||
|     typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t; | ||||
|  | ||||
|     auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device()); | ||||
|     auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context()); | ||||
| @@ -500,13 +613,21 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     if ( r==WorldShmRank ) {  | ||||
|       auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle); | ||||
|       if ( err != ZE_RESULT_SUCCESS ) { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
| 	exit(EXIT_FAILURE); | ||||
|       } else { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; | ||||
|       } | ||||
|       memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int)); | ||||
|       handle.pid = getpid(); | ||||
|       memcpy((void *)&handle.ze,(void *)&ihandle,sizeof(ihandle)); | ||||
| #ifdef SHM_SOCKETS | ||||
|       for(int rr=0;rr<WorldShmSize;rr++){ | ||||
| 	if(rr!=r){ | ||||
| 	  UnixSockets::SendFileDescriptor(handle.fd,rr); | ||||
| 	} | ||||
|       } | ||||
| #endif | ||||
|     } | ||||
| #endif | ||||
| #ifdef GRID_CUDA | ||||
| @@ -534,6 +655,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     // Share this IPC handle across the Shm Comm | ||||
|     ////////////////////////////////////////////////// | ||||
|     {  | ||||
|       MPI_Barrier(WorldShmComm); | ||||
|       int ierr=MPI_Bcast(&handle, | ||||
| 			 sizeof(handle), | ||||
| 			 MPI_BYTE, | ||||
| @@ -549,6 +671,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| #ifdef GRID_SYCL_LEVEL_ZERO_IPC | ||||
|     if ( r!=WorldShmRank ) { | ||||
|       thisBuf = nullptr; | ||||
|       int myfd; | ||||
| #ifdef SHM_SOCKETS | ||||
|       myfd=UnixSockets::RecvFileDescriptor(); | ||||
| #else | ||||
|       std::cout<<"mapping seeking remote pid/fd " | ||||
| 	       <<handle.pid<<"/" | ||||
| 	       <<handle.fd<<std::endl; | ||||
| @@ -556,16 +682,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|       int pidfd = syscall(SYS_pidfd_open,handle.pid,0); | ||||
|       std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n"; | ||||
|       //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0); | ||||
|       int myfd  = syscall(438,pidfd,handle.fd,0); | ||||
|  | ||||
|       std::cout<<"Using IpcHandle myfd "<<myfd<<"\n"; | ||||
|        | ||||
|       myfd  = syscall(438,pidfd,handle.fd,0); | ||||
|       int err_t = errno; | ||||
|       if (myfd < 0) { | ||||
|         fprintf(stderr,"pidfd_getfd returned %d errno was %d\n", myfd,err_t); fflush(stderr); | ||||
| 	perror("pidfd_getfd failed "); | ||||
| 	assert(0); | ||||
|       } | ||||
| #endif | ||||
|       std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n"; | ||||
|       memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle)); | ||||
|       memcpy((void *)&ihandle,(void *)&myfd,sizeof(int)); | ||||
|  | ||||
|       auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf); | ||||
|       if ( err != ZE_RESULT_SUCCESS ) { | ||||
| 	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||
| 	std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl; | ||||
| 	std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;  | ||||
| 	exit(EXIT_FAILURE); | ||||
|       } else { | ||||
| 	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl; | ||||
| @@ -600,6 +732,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| #else | ||||
|     WorldShmCommBufs[r] = ShmCommBuf; | ||||
| #endif | ||||
|     MPI_Barrier(WorldShmComm); | ||||
|   } | ||||
|  | ||||
|   _ShmAllocBytes=bytes; | ||||
| @@ -611,7 +744,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| #ifdef GRID_MPI3_SHMMMAP | ||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| { | ||||
|   std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl; | ||||
|   std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl; | ||||
|   assert(_ShmSetup==1); | ||||
|   assert(_ShmAlloc==0); | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -648,7 +781,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
|     assert(((uint64_t)ptr&0x3F)==0); | ||||
|     close(fd); | ||||
|     WorldShmCommBufs[r] =ptr; | ||||
|     //    std::cout << header "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|     //    std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl; | ||||
|   } | ||||
|   _ShmAlloc=1; | ||||
|   _ShmAllocBytes  = bytes; | ||||
| @@ -658,7 +791,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| #ifdef GRID_MPI3_SHM_NONE | ||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| { | ||||
|   std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl; | ||||
|   std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl; | ||||
|   assert(_ShmSetup==1); | ||||
|   assert(_ShmAlloc==0); | ||||
|   ////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -705,7 +838,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| //////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) | ||||
| {  | ||||
|   std::cout << header "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl; | ||||
|   std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl; | ||||
|   assert(_ShmSetup==1); | ||||
|   assert(_ShmAlloc==0);  | ||||
|   MPI_Barrier(WorldShmComm); | ||||
|   | ||||
| @@ -707,9 +707,9 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro | ||||
|   Coordinate ist = Tg->_istride; | ||||
|   Coordinate ost = Tg->_ostride; | ||||
|  | ||||
|   autoView( t_v , To, AcceleratorWrite); | ||||
|   autoView( f_v , From, AcceleratorRead); | ||||
|   accelerator_for(idx,Fg->lSites(),1,{ | ||||
|   autoView( t_v , To, CpuWrite); | ||||
|   autoView( f_v , From, CpuRead); | ||||
|   thread_for(idx,Fg->lSites(),{ | ||||
|     sobj s; | ||||
|     Coordinate Fcoor(nd); | ||||
|     Coordinate Tcoor(nd); | ||||
| @@ -722,15 +722,20 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro | ||||
|       Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d]; | ||||
|     } | ||||
|     if (in_region) { | ||||
|       Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); | ||||
|       Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); | ||||
|       Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); | ||||
|       Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); | ||||
|       vector_type * fp = (vector_type *)&f_v[odx_f]; | ||||
|       vector_type * tp = (vector_type *)&t_v[odx_t]; | ||||
| #if 0       | ||||
|       Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); // inner index from | ||||
|       Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); // inner index to | ||||
|       Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); // outer index from | ||||
|       Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); // outer index to | ||||
|       scalar_type * fp = (scalar_type *)&f_v[odx_f]; | ||||
|       scalar_type * tp = (scalar_type *)&t_v[odx_t]; | ||||
|       for(int w=0;w<words;w++){ | ||||
| 	tp[w].putlane(fp[w].getlane(idx_f),idx_t); | ||||
|       } | ||||
| #else | ||||
|     peekLocalSite(s,f_v,Fcoor); | ||||
|     pokeLocalSite(s,t_v,Tcoor); | ||||
| #endif | ||||
|     } | ||||
|   }); | ||||
| } | ||||
|   | ||||
							
								
								
									
										136
									
								
								Grid/lattice/PaddedCell.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								Grid/lattice/PaddedCell.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,136 @@ | ||||
| /************************************************************************************* | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/lattice/PaddedCell.h | ||||
|  | ||||
|     Copyright (C) 2019 | ||||
|  | ||||
| Author: Peter Boyle pboyle@bnl.gov | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| class PaddedCell { | ||||
| public: | ||||
|   GridCartesian * unpadded_grid; | ||||
|   int dims; | ||||
|   int depth; | ||||
|   std::vector<GridCartesian *> grids; | ||||
|   ~PaddedCell() | ||||
|   { | ||||
|     DeleteGrids(); | ||||
|   } | ||||
|   PaddedCell(int _depth,GridCartesian *_grid) | ||||
|   { | ||||
|     unpadded_grid = _grid; | ||||
|     depth=_depth; | ||||
|     dims=_grid->Nd(); | ||||
|     AllocateGrids(); | ||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||
|     for(int d=0;d<dims;d++){ | ||||
|       assert(local[d]>=depth); | ||||
|     } | ||||
|   } | ||||
|   void DeleteGrids(void) | ||||
|   { | ||||
|     for(int d=0;d<grids.size();d++){ | ||||
|       delete grids[d]; | ||||
|     } | ||||
|     grids.resize(0); | ||||
|   }; | ||||
|   void AllocateGrids(void) | ||||
|   { | ||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||
|     Coordinate simd      =unpadded_grid->_simd_layout; | ||||
|     Coordinate processors=unpadded_grid->_processors; | ||||
|     Coordinate plocal    =unpadded_grid->LocalDimensions(); | ||||
|     Coordinate global(dims); | ||||
|  | ||||
|     // expand up one dim at a time | ||||
|     for(int d=0;d<dims;d++){ | ||||
|  | ||||
|       plocal[d] += 2*depth;  | ||||
|  | ||||
|       for(int d=0;d<dims;d++){ | ||||
| 	global[d] = plocal[d]*processors[d]; | ||||
|       } | ||||
|  | ||||
|       grids.push_back(new GridCartesian(global,simd,processors)); | ||||
|     } | ||||
|   }; | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Extract(Lattice<vobj> &in) | ||||
|   { | ||||
|     Lattice<vobj> out(unpadded_grid); | ||||
|  | ||||
|     Coordinate local     =unpadded_grid->LocalDimensions(); | ||||
|     Coordinate fll(dims,depth); // depends on the MPI spread | ||||
|     Coordinate tll(dims,0); // depends on the MPI spread | ||||
|     localCopyRegion(in,out,fll,tll,local); | ||||
|     return out; | ||||
|   } | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Exchange(Lattice<vobj> &in) | ||||
|   { | ||||
|     GridBase *old_grid = in.Grid(); | ||||
|     int dims = old_grid->Nd(); | ||||
|     Lattice<vobj> tmp = in; | ||||
|     for(int d=0;d<dims;d++){ | ||||
|       tmp = Expand(d,tmp); // rvalue && assignment | ||||
|     } | ||||
|     return tmp; | ||||
|   } | ||||
|   // expand up one dim at a time | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Expand(int dim,Lattice<vobj> &in) | ||||
|   { | ||||
|     GridBase *old_grid = in.Grid(); | ||||
|     GridCartesian *new_grid = grids[dim];//These are new grids | ||||
|     Lattice<vobj>  padded(new_grid); | ||||
|     Lattice<vobj> shifted(old_grid);     | ||||
|     Coordinate local     =old_grid->LocalDimensions(); | ||||
|     Coordinate plocal    =new_grid->LocalDimensions(); | ||||
|     if(dim==0) conformable(old_grid,unpadded_grid); | ||||
|     else       conformable(old_grid,grids[dim-1]); | ||||
|  | ||||
|     std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl; | ||||
|     // Middle bit | ||||
|     for(int x=0;x<local[dim];x++){ | ||||
|       InsertSliceLocal(in,padded,x,depth+x,dim); | ||||
|     } | ||||
|     // High bit | ||||
|     shifted = Cshift(in,dim,depth); | ||||
|     for(int x=0;x<depth;x++){ | ||||
|       InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim); | ||||
|     } | ||||
|     // Low bit | ||||
|     shifted = Cshift(in,dim,-depth); | ||||
|     for(int x=0;x<depth;x++){ | ||||
|       InsertSliceLocal(shifted,padded,x,x,dim); | ||||
|     } | ||||
|     return padded; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|   | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| @@ -104,6 +104,7 @@ template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iSca | ||||
| template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ; | ||||
| template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >; | ||||
| template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ; | ||||
| template<typename vtype> using iLorentzComplex            = iVector<iScalar<iScalar<vtype> >, Nd > ; | ||||
| template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ; | ||||
| template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >; | ||||
| template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >; | ||||
| @@ -178,6 +179,15 @@ typedef iLorentzColourMatrix<vComplexF>  vLorentzColourMatrixF; | ||||
| typedef iLorentzColourMatrix<vComplexD>  vLorentzColourMatrixD; | ||||
| typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2; | ||||
|  | ||||
| // LorentzComplex | ||||
| typedef iLorentzComplex<Complex  > LorentzComplex; | ||||
| typedef iLorentzComplex<ComplexF > LorentzComplexF; | ||||
| typedef iLorentzComplex<ComplexD > LorentzComplexD; | ||||
|  | ||||
| typedef iLorentzComplex<vComplex > vLorentzComplex; | ||||
| typedef iLorentzComplex<vComplexF> vLorentzComplexF; | ||||
| typedef iLorentzComplex<vComplexD> vLorentzComplexD; | ||||
|  | ||||
| // DoubleStored gauge field | ||||
| typedef iDoubleStoredColourMatrix<Complex  > DoubleStoredColourMatrix; | ||||
| typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF; | ||||
| @@ -307,6 +317,10 @@ typedef Lattice<vLorentzColourMatrixF>  LatticeLorentzColourMatrixF; | ||||
| typedef Lattice<vLorentzColourMatrixD>  LatticeLorentzColourMatrixD; | ||||
| typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2; | ||||
|  | ||||
| typedef Lattice<vLorentzComplex>  LatticeLorentzComplex; | ||||
| typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF; | ||||
| typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD; | ||||
|  | ||||
| // DoubleStored gauge field | ||||
| typedef Lattice<vDoubleStoredColourMatrix>   LatticeDoubleStoredColourMatrix; | ||||
| typedef Lattice<vDoubleStoredColourMatrixF>  LatticeDoubleStoredColourMatrixF; | ||||
|   | ||||
| @@ -34,10 +34,24 @@ directory | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| /////////////////////////////////// | ||||
| // Smart configuration base class | ||||
| /////////////////////////////////// | ||||
| template< class Field > | ||||
| class ConfigurationBase | ||||
| { | ||||
| public: | ||||
|   ConfigurationBase() {} | ||||
|   virtual ~ConfigurationBase() {} | ||||
|   virtual void set_Field(Field& U) =0; | ||||
|   virtual void smeared_force(Field&) const = 0; | ||||
|   virtual Field& get_SmearedU() =0; | ||||
|   virtual Field &get_U(bool smeared = false) = 0; | ||||
| }; | ||||
|  | ||||
| template <class GaugeField > | ||||
| class Action  | ||||
| { | ||||
|  | ||||
| public: | ||||
|   bool is_smeared = false; | ||||
|   RealD deriv_norm_sum; | ||||
| @@ -77,11 +91,39 @@ public: | ||||
|   void refresh_timer_stop(void)  { refresh_us+=usecond(); } | ||||
|   void S_timer_start(void)       { S_us-=usecond(); } | ||||
|   void S_timer_stop(void)        { S_us+=usecond(); } | ||||
|   ///////////////////////////// | ||||
|   // Heatbath? | ||||
|   ///////////////////////////// | ||||
|   virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions | ||||
|   virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action | ||||
|   virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ;  // if the refresh computes the action, can cache it. Alternately refreshAndAction() ? | ||||
|   virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0;        // evaluate the action derivative | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   // virtual smeared interface through configuration container | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) | ||||
|   { | ||||
|     refresh(U.get_U(is_smeared),sRNG,pRNG); | ||||
|   } | ||||
|   virtual RealD S(ConfigurationBase<GaugeField>& U) | ||||
|   { | ||||
|     return S(U.get_U(is_smeared)); | ||||
|   } | ||||
|   virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)  | ||||
|   { | ||||
|     return Sinitial(U.get_U(is_smeared)); | ||||
|   } | ||||
|   virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU) | ||||
|   { | ||||
|     deriv(U.get_U(is_smeared),dSdU);  | ||||
|     if ( is_smeared ) { | ||||
|       U.smeared_force(dSdU); | ||||
|     } | ||||
|   } | ||||
|   /////////////////////////////// | ||||
|   // Logging | ||||
|   /////////////////////////////// | ||||
|   virtual std::string action_name()    = 0;                             // return the action name | ||||
|   virtual std::string LogParameters()  = 0;                             // prints action parameters | ||||
|   virtual ~Action(){} | ||||
|   | ||||
| @@ -30,6 +30,8 @@ directory | ||||
| #ifndef QCD_ACTION_CORE | ||||
| #define QCD_ACTION_CORE | ||||
|  | ||||
| #include <Grid/qcd/action/gauge/GaugeImplementations.h> | ||||
|  | ||||
| #include <Grid/qcd/action/ActionBase.h> | ||||
| NAMESPACE_CHECK(ActionBase); | ||||
| #include <Grid/qcd/action/ActionSet.h> | ||||
|   | ||||
| @@ -507,6 +507,7 @@ public: | ||||
|     } | ||||
|     this->face_table_computed=1; | ||||
|     assert(this->u_comm_offset==this->_unified_buffer_size); | ||||
|     accelerator_barrier(); | ||||
|   } | ||||
|  | ||||
| }; | ||||
|   | ||||
| @@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | ||||
|   ///////////////////////////// | ||||
|   { | ||||
|     GRID_TRACE("Gather"); | ||||
|     st.HaloExchangeOptGather(in,compressor); | ||||
|     accelerator_barrier(); | ||||
|     st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine | ||||
|   } | ||||
|    | ||||
|   std::vector<std::vector<CommsRequest_t> > requests; | ||||
|   | ||||
| @@ -428,9 +428,10 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S | ||||
|   auto ptr = &st.surface_list[0];					\ | ||||
|   accelerator_forNB( ss, sz, Simd::Nsimd(), {				\ | ||||
|       int sF = ptr[ss];							\ | ||||
|       int sU = ss/Ls;							\ | ||||
|       int sU = sF/Ls;							\ | ||||
|       WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\ | ||||
|     });									 | ||||
|     });									\ | ||||
|   accelerator_barrier(); | ||||
|  | ||||
| #define ASM_CALL(A)							\ | ||||
|   thread_for( sss, Nsite, {						\ | ||||
| @@ -474,9 +475,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteInt);    return;} | ||||
| #endif | ||||
|    } else if( exterior ) { | ||||
|      // dependent on result of merge | ||||
|      acceleratorFenceComputeStream(); | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteExt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt);    return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt);    return;} | ||||
| #ifndef GRID_CUDA | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteExt);    return;} | ||||
| #endif | ||||
| @@ -506,9 +508,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagInt);     return;} | ||||
| #endif | ||||
|    } else if( exterior ) { | ||||
|      // Dependent on result of merge | ||||
|      acceleratorFenceComputeStream(); | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt);    return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;} | ||||
|      if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt);    return;} | ||||
| #ifndef GRID_CUDA | ||||
|      if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagExt);     return;} | ||||
| #endif | ||||
|   | ||||
| @@ -53,9 +53,10 @@ NAMESPACE_BEGIN(Grid); | ||||
|       Integer ReliableUpdateFreq; | ||||
|     protected: | ||||
|  | ||||
|       //Action evaluation | ||||
|       //Allow derived classes to override the multishift CG | ||||
|       virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){ | ||||
| #if 0 | ||||
| #if 1 | ||||
| 	SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD); | ||||
| 	ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx); | ||||
| 	msCG(schurOp,in, out); | ||||
| @@ -70,9 +71,10 @@ NAMESPACE_BEGIN(Grid); | ||||
| 	msCG(schurOpD, in, out); | ||||
| #endif | ||||
|       } | ||||
|       //Force evaluation | ||||
|       virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){ | ||||
| 	SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD); | ||||
| 	SchurDifferentiableOperator<ImplF>  schurOpF (numerator ? NumOpF  : DenOpF); | ||||
| 	SchurDifferentiableOperator<ImplF>  schurOpF(numerator ? NumOpF  : DenOpF); | ||||
|  | ||||
| 	FermionFieldD inD(NumOpD.FermionRedBlackGrid()); | ||||
| 	FermionFieldD outD(NumOpD.FermionRedBlackGrid()); | ||||
| @@ -84,20 +86,15 @@ NAMESPACE_BEGIN(Grid); | ||||
|       virtual void ImportGauge(const typename ImplD::GaugeField &Ud){ | ||||
|  | ||||
| 	typename ImplF::GaugeField Uf(NumOpF.GaugeGrid()); | ||||
| 	typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid()); | ||||
| 	precisionChange(Uf, Ud); | ||||
| 	precisionChange(Ud2, Ud); | ||||
|  | ||||
| 	std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl; | ||||
| 	std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl; | ||||
| 	 | ||||
| 	NumOpD.ImportGauge(Ud); | ||||
| 	DenOpD.ImportGauge(Ud); | ||||
|  | ||||
| 	NumOpF.ImportGauge(Uf); | ||||
| 	DenOpF.ImportGauge(Uf); | ||||
|  | ||||
| 	NumOpD.ImportGauge(Ud2); | ||||
| 	DenOpD.ImportGauge(Ud2); | ||||
|       } | ||||
|        | ||||
|     public: | ||||
|   | ||||
| @@ -207,20 +207,27 @@ NAMESPACE_BEGIN(Grid); | ||||
|         //X = (Mdag M)^-1 V^dag phi | ||||
|         //Y = (Mdag)^-1 V^dag  phi | ||||
|         Vpc.MpcDag(PhiOdd,Y);          // Y= Vdag phi | ||||
| 	std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl; | ||||
|         X=Zero(); | ||||
|         DerivativeSolver(Mpc,Y,X);     // X= (MdagM)^-1 Vdag phi | ||||
| 	std::cout << GridLogMessage <<" X "<<norm2(X)<<std::endl; | ||||
|         Mpc.Mpc(X,Y);                  // Y=  Mdag^-1 Vdag phi | ||||
| 	std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl; | ||||
|  | ||||
|         // phi^dag V (Mdag M)^-1 dV^dag  phi | ||||
|         Vpc.MpcDagDeriv(force , X, PhiOdd );   dSdU = force; | ||||
| 	std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl; | ||||
|    | ||||
|         // phi^dag dV (Mdag M)^-1 V^dag  phi | ||||
|         Vpc.MpcDeriv(force , PhiOdd, X );      dSdU = dSdU+force; | ||||
| 	std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl; | ||||
|  | ||||
|         //    -    phi^dag V (Mdag M)^-1 Mdag dM   (Mdag M)^-1 V^dag  phi | ||||
|         //    -    phi^dag V (Mdag M)^-1 dMdag M   (Mdag M)^-1 V^dag  phi | ||||
|         Mpc.MpcDeriv(force,Y,X);              dSdU = dSdU-force; | ||||
| 	std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl; | ||||
|         Mpc.MpcDagDeriv(force,X,Y);           dSdU = dSdU-force; | ||||
| 	std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl; | ||||
|  | ||||
|         // FIXME No force contribution from EvenEven assumed here | ||||
|         // Needs a fix for clover. | ||||
|   | ||||
| @@ -7,26 +7,27 @@ | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|  | ||||
| //trivial class for no smearing | ||||
| template< class Impl > | ||||
| class NoSmearing | ||||
| class NoSmearing : public ConfigurationBase<typename Impl::Field> | ||||
| { | ||||
| public: | ||||
|   INHERIT_FIELD_TYPES(Impl); | ||||
|  | ||||
|   Field* ThinField; | ||||
|   Field* ThinLinks; | ||||
|  | ||||
|   NoSmearing(): ThinField(NULL) {} | ||||
|   NoSmearing(): ThinLinks(NULL) {} | ||||
|  | ||||
|   void set_Field(Field& U) { ThinField = &U; } | ||||
|   void set_Field(Field& U) { ThinLinks = &U; } | ||||
|  | ||||
|   void smeared_force(Field&) const {} | ||||
|  | ||||
|   Field& get_SmearedU() { return *ThinField; } | ||||
|   Field& get_SmearedU() { return *ThinLinks; } | ||||
|  | ||||
|   Field &get_U(bool smeared = false) | ||||
|   { | ||||
|     return *ThinField; | ||||
|     return *ThinLinks; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| @@ -42,19 +43,24 @@ public: | ||||
|   It stores a list of smeared configurations. | ||||
| */ | ||||
| template <class Gimpl> | ||||
| class SmearedConfiguration | ||||
| class SmearedConfiguration : public ConfigurationBase<typename Gimpl::Field> | ||||
| { | ||||
| public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
| private: | ||||
| protected: | ||||
|   const unsigned int smearingLevels; | ||||
|   Smear_Stout<Gimpl> *StoutSmearing; | ||||
|   std::vector<GaugeField> SmearedSet; | ||||
| public: | ||||
|   GaugeField*  ThinLinks; /* Pointer to the thin links configuration */ // move to base??? | ||||
| protected: | ||||
|    | ||||
|   // Member functions | ||||
|   //==================================================================== | ||||
|   void fill_smearedSet(GaugeField &U) | ||||
|  | ||||
|   // Overridden in masked version | ||||
|   virtual void fill_smearedSet(GaugeField &U) | ||||
|   { | ||||
|     ThinLinks = &U;  // attach the smearing routine to the field U | ||||
|  | ||||
| @@ -82,8 +88,9 @@ private: | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   //==================================================================== | ||||
|   GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, | ||||
|  | ||||
|   //overridden in masked verson | ||||
|   virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, | ||||
| 					  const GaugeField& GaugeK) const  | ||||
|   { | ||||
|     GridBase* grid = GaugeK.Grid(); | ||||
| @@ -213,8 +220,6 @@ private: | ||||
|  | ||||
|   //==================================================================== | ||||
| public: | ||||
|   GaugeField* | ||||
|       ThinLinks; /* Pointer to the thin links configuration */ | ||||
|  | ||||
|   /* Standard constructor */ | ||||
|   SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, | ||||
|   | ||||
							
								
								
									
										776
									
								
								Grid/qcd/smearing/GaugeConfigurationMasked.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										776
									
								
								Grid/qcd/smearing/GaugeConfigurationMasked.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,776 @@ | ||||
| /*! | ||||
|   @file GaugeConfiguration.h | ||||
|   @brief Declares the GaugeConfiguration class | ||||
| */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| /*! | ||||
|   @brief Smeared configuration masked container | ||||
|   Modified for a multi-subset smearing (aka Luscher Flowed HMC) | ||||
| */ | ||||
| template <class Gimpl> | ||||
| class SmearedConfigurationMasked : public SmearedConfiguration<Gimpl> | ||||
| { | ||||
| public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
| private: | ||||
|   // These live in base class | ||||
|   //  const unsigned int smearingLevels; | ||||
|   //  Smear_Stout<Gimpl> *StoutSmearing; | ||||
|   //  std::vector<GaugeField> SmearedSet; | ||||
|    | ||||
|   std::vector<LatticeLorentzComplex> masks; | ||||
|  | ||||
|   typedef typename SU3Adjoint::AMatrix AdjMatrix; | ||||
|   typedef typename SU3Adjoint::LatticeAdjMatrix  AdjMatrixField; | ||||
|   typedef typename SU3Adjoint::LatticeAdjVector  AdjVectorField; | ||||
|  | ||||
|   // Adjoint vector to GaugeField force | ||||
|   void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu) | ||||
|   { | ||||
|     Complex ci(0,1); | ||||
|     GaugeLinkField Fdet_pol(Fdet.Grid()); | ||||
|     Fdet_pol=Zero(); | ||||
|     for(int e=0;e<8;e++){ | ||||
|       ColourMatrix te; | ||||
|       SU3::generator(e, te); | ||||
|       auto tmp=peekColour(Fdet_nu,e); | ||||
|       Fdet_pol=Fdet_pol + ci*tmp*te; // but norm of te is different.. why? | ||||
|     } | ||||
|     pokeLorentz(Fdet, Fdet_pol, nu); | ||||
|   } | ||||
|   void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 ) | ||||
|   { | ||||
|     GaugeLinkField UtaU(PlaqL.Grid()); | ||||
|     GaugeLinkField D(PlaqL.Grid()); | ||||
|     AdjMatrixField Dbc(PlaqL.Grid()); | ||||
|     LatticeComplex tmp(PlaqL.Grid()); | ||||
|     const int Ngen = SU3Adjoint::Dimension; | ||||
|     Complex ci(0,1); | ||||
|     ColourMatrix   ta,tb,tc; | ||||
|      | ||||
|     for(int a=0;a<Ngen;a++) { | ||||
|       SU3::generator(a, ta); | ||||
|       // Qlat Tb = 2i Tb^Grid | ||||
|       UtaU= 2.0*ci*adj(PlaqL)*ta*PlaqR; | ||||
|       for(int c=0;c<Ngen;c++) { | ||||
| 	SU3::generator(c, tc); | ||||
| 	D = Ta( (2.0)*ci*tc *UtaU); | ||||
| 	for(int b=0;b<Ngen;b++){ | ||||
| 	  SU3::generator(b, tb); | ||||
| 	  tmp =-trace(ci*tb*D);  | ||||
| 	  PokeIndex<ColourIndex>(Dbc,tmp,b,c);  // Adjoint rep | ||||
| 	} | ||||
|       } | ||||
|       tmp = trace(MpInvJx * Dbc); | ||||
|       PokeIndex<ColourIndex>(Fdet2,tmp,a); | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd) | ||||
|   { | ||||
|     GaugeLinkField Nx(PlaqL.Grid()); | ||||
|     const int Ngen = SU3Adjoint::Dimension; | ||||
|     Complex ci(0,1); | ||||
|     ColourMatrix   tb; | ||||
|     ColourMatrix   tc; | ||||
|     for(int b=0;b<Ngen;b++) { | ||||
|       SU3::generator(b, tb); | ||||
|       Nx = (2.0)*Ta( adj(PlaqL)*ci*tb * PlaqR ); | ||||
|       for(int c=0;c<Ngen;c++) { | ||||
| 	SU3::generator(c, tc); | ||||
| 	auto tmp =closure( -trace(ci*tc*Nx));  | ||||
| 	PokeIndex<ColourIndex>(NxAd,tmp,c,b);  | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   void ApplyMask(GaugeField &U,int smr) | ||||
|   { | ||||
|     LatticeComplex tmp(U.Grid()); | ||||
|     GaugeLinkField Umu(U.Grid()); | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       Umu=PeekIndex<LorentzIndex>(U,mu); | ||||
|       tmp=PeekIndex<LorentzIndex>(masks[smr],mu); | ||||
|       Umu=Umu*tmp; | ||||
|       PokeIndex<LorentzIndex>(U, Umu, mu); | ||||
|     } | ||||
|   } | ||||
| public: | ||||
|  | ||||
|   void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr) | ||||
|   { | ||||
|     GridBase* grid = U.Grid(); | ||||
|     ColourMatrix   tb; | ||||
|     ColourMatrix   tc; | ||||
|     ColourMatrix   ta; | ||||
|     GaugeField C(grid); | ||||
|     GaugeField Umsk(grid); | ||||
|     std::vector<GaugeLinkField> Umu(Nd,grid); | ||||
|     GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon | ||||
|     GaugeLinkField Zx(grid);  // U times Staple, contains factor of epsilon | ||||
|     GaugeLinkField Nxx(grid);  // Nxx fundamental space | ||||
|     GaugeLinkField Utmp(grid); | ||||
|     GaugeLinkField PlaqL(grid); | ||||
|     GaugeLinkField PlaqR(grid); | ||||
|     const int Ngen = SU3Adjoint::Dimension; | ||||
|     AdjMatrix TRb; | ||||
|     ColourMatrix Ident; | ||||
|     LatticeComplex  cplx(grid); | ||||
|      | ||||
|     AdjVectorField  dJdXe_nMpInv(grid);  | ||||
|     AdjVectorField  dJdXe_nMpInv_y(grid);  | ||||
|     AdjMatrixField  MpAd(grid);    // Mprime luchang's notes | ||||
|     AdjMatrixField  MpAdInv(grid); // Mprime inverse | ||||
|     AdjMatrixField  NxxAd(grid);    // Nxx in adjoint space | ||||
|     AdjMatrixField  JxAd(grid);      | ||||
|     AdjMatrixField  ZxAd(grid); | ||||
|     AdjMatrixField  mZxAd(grid); | ||||
|     AdjMatrixField  X(grid); | ||||
|     Complex ci(0,1); | ||||
|  | ||||
|     Ident = ComplexD(1.0); | ||||
|     for(int d=0;d<Nd;d++){ | ||||
|       Umu[d] = peekLorentz(U, d); | ||||
|     } | ||||
|     int mu= (smr/2) %Nd; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Mask the gauge field | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask | ||||
|  | ||||
|     Umsk = U; | ||||
|     ApplyMask(Umsk,smr); | ||||
|     Utmp = peekLorentz(Umsk,mu); | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Retrieve the eps/rho parameter(s) -- could allow all different but not so far | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     double rho=this->StoutSmearing->SmearRho[1]; | ||||
|     int idx=0; | ||||
|     for(int mu=0;mu<4;mu++){ | ||||
|     for(int nu=0;nu<4;nu++){ | ||||
|       if ( mu!=nu) assert(this->StoutSmearing->SmearRho[idx]==rho); | ||||
|       else         assert(this->StoutSmearing->SmearRho[idx]==0.0); | ||||
|       idx++; | ||||
|     }} | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Assemble the N matrix | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Computes ALL the staples -- could compute one only and do it here | ||||
|     this->StoutSmearing->BaseSmear(C, U); | ||||
|     Cmu = peekLorentz(C, mu); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Assemble Luscher exp diff map J matrix  | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Ta so Z lives in Lie algabra | ||||
|     Zx  = Ta(Cmu * adj(Umu[mu])); | ||||
|  | ||||
|     // Move Z to the Adjoint Rep == make_adjoint_representation | ||||
|     ZxAd = Zero(); | ||||
|     for(int b=0;b<8;b++) { | ||||
|       // Adj group sets traceless antihermitian T's -- Guido, really???? | ||||
|       SU3::generator(b, tb);         // Fund group sets traceless hermitian T's | ||||
|       SU3Adjoint::generator(b,TRb); | ||||
|       TRb=-TRb; | ||||
|       cplx = 2.0*trace(ci*tb*Zx); // my convention 1/2 delta ba | ||||
|       ZxAd = ZxAd + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign. | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////// | ||||
|     // J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)! | ||||
|     ////////////////////////////////////// | ||||
|     X=1.0;  | ||||
|     JxAd = X; | ||||
|     mZxAd = (-1.0)*ZxAd;  | ||||
|     RealD kpfac = 1; | ||||
|     for(int k=1;k<12;k++){ | ||||
|       X=X*mZxAd; | ||||
|       kpfac = kpfac /(k+1); | ||||
|       JxAd = JxAd + X * kpfac; | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////// | ||||
|     // dJ(x)/dxe | ||||
|     ////////////////////////////////////// | ||||
|     std::vector<AdjMatrixField>  dJdX;    dJdX.resize(8,grid); | ||||
|     AdjMatrixField tbXn(grid); | ||||
|     AdjMatrixField sumXtbX(grid); | ||||
|     AdjMatrixField t2(grid); | ||||
|     AdjMatrixField dt2(grid); | ||||
|     AdjMatrixField t3(grid); | ||||
|     AdjMatrixField dt3(grid); | ||||
|     AdjMatrixField aunit(grid); | ||||
|     for(int b=0;b<8;b++){ | ||||
|       aunit = ComplexD(1.0); | ||||
|       SU3Adjoint::generator(b, TRb); //dt2 | ||||
|  | ||||
|       X  = (-1.0)*ZxAd;  | ||||
|       t2 = X; | ||||
|       dt2 = TRb; | ||||
|       for (int j = 20; j > 1; --j) { | ||||
| 	t3 = t2*(1.0 / (j + 1))  + aunit; | ||||
| 	dt3 = dt2*(1.0 / (j + 1)); | ||||
| 	t2 = X * t3; | ||||
| 	dt2 = TRb * t3 + X * dt3; | ||||
|       } | ||||
|       dJdX[b] = -dt2;  | ||||
|     } | ||||
|     ///////////////////////////////////////////////////////////////// | ||||
|     // Mask Umu for this link | ||||
|     ///////////////////////////////////////////////////////////////// | ||||
|     PlaqL = Ident; | ||||
|     PlaqR = Utmp*adj(Cmu); | ||||
|     ComputeNxy(PlaqL,PlaqR,NxxAd); | ||||
|      | ||||
|     //////////////////////////// | ||||
|     // Mab | ||||
|     //////////////////////////// | ||||
|     MpAd = Complex(1.0,0.0); | ||||
|     MpAd = MpAd - JxAd * NxxAd; | ||||
|  | ||||
|     ///////////////////////// | ||||
|     // invert the 8x8 | ||||
|     ///////////////////////// | ||||
|     MpAdInv = Inverse(MpAd); | ||||
|      | ||||
|     ///////////////////////////////////////////////////////////////// | ||||
|     // Nxx Mp^-1 | ||||
|     ///////////////////////////////////////////////////////////////// | ||||
|     AdjVectorField  FdetV(grid); | ||||
|     AdjVectorField  Fdet1_nu(grid); | ||||
|     AdjVectorField  Fdet2_nu(grid); | ||||
|     AdjVectorField  Fdet2_mu(grid); | ||||
|     AdjVectorField  Fdet1_mu(grid); | ||||
|  | ||||
|     AdjMatrixField nMpInv(grid); | ||||
|     nMpInv= NxxAd *MpAdInv; | ||||
|  | ||||
|     AdjMatrixField MpInvJx(grid); | ||||
|     AdjMatrixField MpInvJx_nu(grid); | ||||
|     MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor | ||||
|  | ||||
|     Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); | ||||
|     Fdet2_mu=FdetV; | ||||
|     Fdet1_mu=Zero(); | ||||
|      | ||||
|     for(int e =0 ; e<8 ; e++){ | ||||
|       LatticeComplexD tr(grid); | ||||
|       ColourMatrix te; | ||||
|       SU3::generator(e, te); | ||||
|       tr = trace(dJdX[e] * nMpInv); | ||||
|       pokeColour(dJdXe_nMpInv,tr,e); | ||||
|     } | ||||
|     /////////////////////////////// | ||||
|     // Mask it off | ||||
|     /////////////////////////////// | ||||
|     auto tmp=PeekIndex<LorentzIndex>(masks[smr],mu); | ||||
|     dJdXe_nMpInv = dJdXe_nMpInv*tmp; | ||||
|      | ||||
|     //    dJdXe_nMpInv needs to multiply: | ||||
|     //       Nxx_mu (site local)                           (1) | ||||
|     //       Nxy_mu one site forward  in each nu direction (3) | ||||
|     //       Nxy_mu one site backward in each nu direction (3) | ||||
|     //       Nxy_nu 0,0  ; +mu,0; 0,-nu; +mu-nu   [ 3x4 = 12] | ||||
|     // 19 terms. | ||||
|     AdjMatrixField Nxy(grid); | ||||
|  | ||||
|     GaugeField Fdet1(grid); | ||||
|     GaugeField Fdet2(grid); | ||||
|     GaugeLinkField Fdet_pol(grid); // one polarisation | ||||
|  | ||||
|     for(int nu=0;nu<Nd;nu++){ | ||||
|  | ||||
|       if (nu!=mu) { | ||||
| 	///////////////// +ve nu ///////////////// | ||||
| 	//     __ | ||||
| 	//    |  | | ||||
| 	//    x==    // nu polarisation -- clockwise | ||||
|  | ||||
| 	PlaqL=Ident; | ||||
|  | ||||
| 	PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu, | ||||
|  	       Gimpl::CovShiftForward(Umu[mu], mu, | ||||
| 	         Gimpl::CovShiftBackward(Umu[nu], nu, | ||||
| 		   Gimpl::CovShiftIdentityBackward(Utmp, mu)))); | ||||
|  | ||||
| 	dJdXe_nMpInv_y =   dJdXe_nMpInv; | ||||
| 	ComputeNxy(PlaqL,PlaqR,Nxy); | ||||
| 	Fdet1_nu = transpose(Nxy)*dJdXe_nMpInv_y; | ||||
|  | ||||
| 	PlaqR=(-1.0)*PlaqR; | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV); | ||||
| 	Fdet2_nu = FdetV; | ||||
| 	 | ||||
| 	//    x== | ||||
| 	//    |  | | ||||
| 	//    .__|    // nu polarisation -- anticlockwise | ||||
|  | ||||
| 	PlaqR=(rho)*Gimpl::CovShiftForward(Umu[nu], nu, | ||||
| 		      Gimpl::CovShiftBackward(Umu[mu], mu, | ||||
|     	 	        Gimpl::CovShiftIdentityBackward(Umu[nu], nu))); | ||||
|  | ||||
| 	PlaqL=Gimpl::CovShiftIdentityBackward(Utmp, mu); | ||||
|  | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1); | ||||
| 	ComputeNxy(PlaqL, PlaqR,Nxy); | ||||
| 	Fdet1_nu = Fdet1_nu+transpose(Nxy)*dJdXe_nMpInv_y; | ||||
| 	 | ||||
|  | ||||
| 	MpInvJx_nu = Cshift(MpInvJx,mu,-1); | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); | ||||
| 	Fdet2_nu = Fdet2_nu+FdetV; | ||||
| 	 | ||||
| 	///////////////// -ve nu ///////////////// | ||||
| 	//  __ | ||||
| 	// |  | | ||||
| 	// x==          // nu polarisation -- clockwise | ||||
|  | ||||
| 	PlaqL=(rho)* Gimpl::CovShiftForward(Umu[mu], mu, | ||||
| 		       Gimpl::CovShiftForward(Umu[nu], nu, | ||||
| 			 Gimpl::CovShiftIdentityBackward(Utmp, mu))); | ||||
|  | ||||
|         PlaqR = Gimpl::CovShiftIdentityForward(Umu[nu], nu); | ||||
|  | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1); | ||||
| 	ComputeNxy(PlaqL,PlaqR,Nxy); | ||||
| 	Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y; | ||||
|  | ||||
| 	MpInvJx_nu = Cshift(MpInvJx,nu,1); | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); | ||||
| 	Fdet2_nu = Fdet2_nu+FdetV; | ||||
| 	 | ||||
| 	// x== | ||||
| 	// |  | | ||||
| 	// |__|         // nu polarisation | ||||
|  | ||||
| 	PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu, | ||||
|  	        Gimpl::CovShiftIdentityBackward(Utmp, mu)); | ||||
|  | ||||
| 	PlaqR=Gimpl::CovShiftBackward(Umu[mu], mu, | ||||
| 	        Gimpl::CovShiftIdentityForward(Umu[nu], nu)); | ||||
|  | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1); | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv_y,nu,1); | ||||
|  | ||||
| 	ComputeNxy(PlaqL,PlaqR,Nxy); | ||||
| 	Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y; | ||||
|  | ||||
| 	MpInvJx_nu = Cshift(MpInvJx,mu,-1); | ||||
| 	MpInvJx_nu = Cshift(MpInvJx_nu,nu,1); | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); | ||||
| 	Fdet2_nu = Fdet2_nu+FdetV; | ||||
|  | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	// Set up the determinant force contribution in 3x3 algebra basis | ||||
| 	///////////////////////////////////////////////////////////////////// | ||||
| 	InsertForce(Fdet1,Fdet1_nu,nu); | ||||
| 	InsertForce(Fdet2,Fdet2_nu,nu); | ||||
| 	 | ||||
| 	////////////////////////////////////////////////// | ||||
| 	// Parallel direction terms | ||||
| 	////////////////////////////////////////////////// | ||||
|  | ||||
|         //     __ | ||||
| 	//    |  " | ||||
| 	//    |__"x    // mu polarisation | ||||
| 	PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, | ||||
| 		      Gimpl::CovShiftBackward(Umu[nu], nu, | ||||
|    		        Gimpl::CovShiftIdentityBackward(Utmp, mu))); | ||||
|  | ||||
| 	PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu); | ||||
| 	 | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1); | ||||
|  | ||||
| 	ComputeNxy(PlaqL,PlaqR,Nxy); | ||||
| 	Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; | ||||
|  | ||||
| 	MpInvJx_nu = Cshift(MpInvJx,nu,-1); | ||||
|  | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); | ||||
| 	Fdet2_mu = Fdet2_mu+FdetV; | ||||
|  | ||||
| 	//  __ | ||||
| 	// "  | | ||||
| 	// x__|          // mu polarisation | ||||
|  | ||||
| 	PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu, | ||||
| 		       Gimpl::CovShiftForward(Umu[nu], nu, | ||||
| 		 	 Gimpl::CovShiftIdentityBackward(Utmp, mu))); | ||||
|  | ||||
|         PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu); | ||||
|  | ||||
| 	dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1); | ||||
|  | ||||
| 	ComputeNxy(PlaqL,PlaqR,Nxy); | ||||
| 	Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y; | ||||
|  | ||||
| 	MpInvJx_nu = Cshift(MpInvJx,nu,1); | ||||
|  | ||||
| 	Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV); | ||||
| 	Fdet2_mu = Fdet2_mu+FdetV; | ||||
| 	 | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     Fdet1_mu = Fdet1_mu + transpose(NxxAd)*dJdXe_nMpInv; | ||||
|  | ||||
|     InsertForce(Fdet1,Fdet1_mu,mu); | ||||
|     InsertForce(Fdet2,Fdet2_mu,mu); | ||||
|  | ||||
|     force= (-0.5)*( Fdet1 + Fdet2); | ||||
|   } | ||||
|   RealD logDetJacobianLevel(const GaugeField &U,int smr) | ||||
|   { | ||||
|     GridBase* grid = U.Grid(); | ||||
|     GaugeField C(grid); | ||||
|     GaugeLinkField Nb(grid); | ||||
|     GaugeLinkField Z(grid); | ||||
|     GaugeLinkField Umu(grid), Cmu(grid); | ||||
|     ColourMatrix   Tb; | ||||
|     ColourMatrix   Tc; | ||||
|     typedef typename SU3Adjoint::AMatrix AdjMatrix; | ||||
|     typedef typename SU3Adjoint::LatticeAdjMatrix  AdjMatrixField; | ||||
|     typedef typename SU3Adjoint::LatticeAdjVector  AdjVectorField; | ||||
|     const int Ngen = SU3Adjoint::Dimension; | ||||
|     AdjMatrix TRb; | ||||
|     LatticeComplex       cplx(grid);  | ||||
|     AdjVectorField  AlgV(grid);  | ||||
|     AdjMatrixField  Mab(grid); | ||||
|     AdjMatrixField  Ncb(grid); | ||||
|     AdjMatrixField  Jac(grid); | ||||
|     AdjMatrixField  Zac(grid); | ||||
|     AdjMatrixField  mZac(grid); | ||||
|     AdjMatrixField  X(grid); | ||||
|  | ||||
|     int mu= (smr/2) %Nd; | ||||
|  | ||||
|     auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Assemble the N matrix | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Computes ALL the staples -- could compute one only here | ||||
|     this->StoutSmearing->BaseSmear(C, U); | ||||
|     Cmu = peekLorentz(C, mu); | ||||
|     Umu = peekLorentz(U, mu); | ||||
|     Complex ci(0,1); | ||||
|     for(int b=0;b<Ngen;b++) { | ||||
|       SU3::generator(b, Tb); | ||||
|       // Qlat Tb = 2i Tb^Grid | ||||
|       Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu)); | ||||
|       for(int c=0;c<Ngen;c++) { | ||||
| 	SU3::generator(c, Tc); | ||||
| 	auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important | ||||
| 	PokeIndex<ColourIndex>(Ncb,tmp,c,b);  | ||||
|       } | ||||
|     }       | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Assemble Luscher exp diff map J matrix  | ||||
|     ////////////////////////////////////////////////////////////////// | ||||
|     // Ta so Z lives in Lie algabra | ||||
|     Z  = Ta(Cmu * adj(Umu)); | ||||
|  | ||||
|     // Move Z to the Adjoint Rep == make_adjoint_representation | ||||
|     Zac = Zero(); | ||||
|     for(int b=0;b<8;b++) { | ||||
|       // Adj group sets traceless antihermitian T's -- Guido, really???? | ||||
|       // Is the mapping of these the same? Same structure constants | ||||
|       // Might never have been checked. | ||||
|       SU3::generator(b, Tb);         // Fund group sets traceless hermitian T's | ||||
|       SU3Adjoint::generator(b,TRb); | ||||
|       TRb=-TRb; | ||||
|       cplx = 2.0*trace(ci*Tb*Z); // my convention 1/2 delta ba | ||||
|       Zac = Zac + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign. | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////// | ||||
|     // J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)! | ||||
|     ////////////////////////////////////// | ||||
|     X=1.0;  | ||||
|     Jac = X; | ||||
|     mZac = (-1.0)*Zac;  | ||||
|     RealD kpfac = 1; | ||||
|     for(int k=1;k<12;k++){ | ||||
|       X=X*mZac; | ||||
|       kpfac = kpfac /(k+1); | ||||
|       Jac = Jac + X * kpfac; | ||||
|     } | ||||
|  | ||||
|     //////////////////////////// | ||||
|     // Mab | ||||
|     //////////////////////////// | ||||
|     Mab = Complex(1.0,0.0); | ||||
|     Mab = Mab - Jac * Ncb; | ||||
|  | ||||
|     //////////////////////////// | ||||
|     // det | ||||
|     //////////////////////////// | ||||
|     LatticeComplex       det(grid);  | ||||
|     det = Determinant(Mab); | ||||
|  | ||||
|     //////////////////////////// | ||||
|     // ln det | ||||
|     //////////////////////////// | ||||
|     LatticeComplex       ln_det(grid);  | ||||
|     ln_det = log(det); | ||||
|  | ||||
|     //////////////////////////// | ||||
|     // Masked sum | ||||
|     //////////////////////////// | ||||
|     ln_det = ln_det * mask; | ||||
|     Complex result = sum(ln_det); | ||||
|     return result.real(); | ||||
|   } | ||||
| public: | ||||
|   RealD logDetJacobian(void) | ||||
|   { | ||||
|     RealD ln_det = 0; | ||||
|     if (this->smearingLevels > 0) | ||||
|     { | ||||
|       double start = usecond(); | ||||
|       for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { | ||||
| 	ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr); | ||||
|       } | ||||
|       ln_det +=logDetJacobianLevel(*(this->ThinLinks),0); | ||||
|  | ||||
|       double end = usecond(); | ||||
|       double time = (end - start)/ 1e3; | ||||
|       std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl;   | ||||
|     } | ||||
|     return ln_det; | ||||
|   } | ||||
|   void logDetJacobianForce(GaugeField &force) | ||||
|   { | ||||
|     force =Zero(); | ||||
|     GaugeField force_det(force.Grid()); | ||||
|  | ||||
|     if (this->smearingLevels > 0) | ||||
|     { | ||||
|       double start = usecond(); | ||||
|  | ||||
|       GaugeLinkField tmp_mu(force.Grid()); | ||||
|  | ||||
|       for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { | ||||
|  | ||||
| 	// remove U in UdSdU... | ||||
| 	for (int mu = 0; mu < Nd; mu++) { | ||||
| 	  tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu); | ||||
| 	  pokeLorentz(force, tmp_mu, mu); | ||||
| 	} | ||||
| 	 | ||||
|       	// Propagate existing force | ||||
|         force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr); | ||||
|  | ||||
| 	// Add back U in UdSdU... | ||||
| 	for (int mu = 0; mu < Nd; mu++) { | ||||
| 	  tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu); | ||||
| 	  pokeLorentz(force, tmp_mu, mu); | ||||
| 	} | ||||
|     	 | ||||
| 	// Get this levels determinant force | ||||
| 	force_det = Zero(); | ||||
| 	logDetJacobianForceLevel(this->get_smeared_conf(ismr-1),force_det,ismr); | ||||
|  | ||||
| 	// Sum the contributions | ||||
| 	force = force + force_det; | ||||
|       } | ||||
|      | ||||
|       // remove U in UdSdU... | ||||
|       for (int mu = 0; mu < Nd; mu++) { | ||||
| 	tmp_mu = adj(peekLorentz(this->get_smeared_conf(0), mu)) * peekLorentz(force, mu); | ||||
| 	pokeLorentz(force, tmp_mu, mu); | ||||
|       } | ||||
|  | ||||
|       force = this->AnalyticSmearedForce(force, *this->ThinLinks,0); | ||||
|  | ||||
|       for (int mu = 0; mu < Nd; mu++) { | ||||
| 	tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu); | ||||
| 	pokeLorentz(force, tmp_mu, mu); | ||||
|       } | ||||
|  | ||||
|       force_det = Zero(); | ||||
|  | ||||
|       logDetJacobianForceLevel(*this->ThinLinks,force_det,0); | ||||
|  | ||||
|       force = force + force_det; | ||||
|  | ||||
|       force=Ta(force); // Ta | ||||
|        | ||||
|       double end = usecond(); | ||||
|       double time = (end - start)/ 1e3; | ||||
|       std::cout << GridLogMessage << "GaugeConfigurationMasked: lnDetJacobianForce took " << time << " ms" << std::endl;   | ||||
|     }  // if smearingLevels = 0 do nothing | ||||
|   } | ||||
|  | ||||
| private: | ||||
|   //==================================================================== | ||||
|   // Override base clas here to mask it | ||||
|   virtual void fill_smearedSet(GaugeField &U) | ||||
|   { | ||||
|     this->ThinLinks = &U;  // attach the smearing routine to the field U | ||||
|  | ||||
|     // check the pointer is not null | ||||
|     if (this->ThinLinks == NULL) | ||||
|       std::cout << GridLogError << "[SmearedConfigurationMasked] Error in ThinLinks pointer\n"; | ||||
|  | ||||
|     if (this->smearingLevels > 0) | ||||
|     { | ||||
|       std::cout << GridLogMessage << "[SmearedConfigurationMasked] Filling SmearedSet\n"; | ||||
|       GaugeField previous_u(this->ThinLinks->Grid()); | ||||
|  | ||||
|       GaugeField smeared_A(this->ThinLinks->Grid()); | ||||
|       GaugeField smeared_B(this->ThinLinks->Grid()); | ||||
|  | ||||
|       previous_u = *this->ThinLinks; | ||||
|       double start = usecond(); | ||||
|       for (int smearLvl = 0; smearLvl < this->smearingLevels; ++smearLvl) | ||||
|       { | ||||
|         this->StoutSmearing->smear(smeared_A, previous_u); | ||||
| 	ApplyMask(smeared_A,smearLvl); | ||||
| 	smeared_B = previous_u; | ||||
| 	ApplyMask(smeared_B,smearLvl); | ||||
| 	// Replace only the masked portion | ||||
| 	this->SmearedSet[smearLvl] = previous_u-smeared_B + smeared_A; | ||||
|         previous_u = this->SmearedSet[smearLvl]; | ||||
|  | ||||
|         // For debug purposes | ||||
|         RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(previous_u); | ||||
|         std::cout << GridLogMessage << "[SmearedConfigurationMasked] smeared Plaq: " << impl_plaq << std::endl; | ||||
|       } | ||||
|       double end = usecond(); | ||||
|       double time = (end - start)/ 1e3; | ||||
|       std::cout << GridLogMessage << "GaugeConfigurationMasked: Link smearing took " << time << " ms" << std::endl;   | ||||
|     } | ||||
|   } | ||||
|   //==================================================================== | ||||
|   // Override base to add masking | ||||
|   virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, | ||||
| 					  const GaugeField& GaugeK,int level)  | ||||
|   { | ||||
|     GridBase* grid = GaugeK.Grid(); | ||||
|     GaugeField C(grid), SigmaK(grid), iLambda(grid); | ||||
|     GaugeField SigmaKPrimeA(grid); | ||||
|     GaugeField SigmaKPrimeB(grid); | ||||
|     GaugeLinkField iLambda_mu(grid); | ||||
|     GaugeLinkField iQ(grid), e_iQ(grid); | ||||
|     GaugeLinkField SigmaKPrime_mu(grid); | ||||
|     GaugeLinkField GaugeKmu(grid), Cmu(grid); | ||||
|      | ||||
|     this->StoutSmearing->BaseSmear(C, GaugeK); | ||||
|     SigmaK = Zero(); | ||||
|     iLambda = Zero(); | ||||
|  | ||||
|     SigmaKPrimeA = SigmaKPrime; | ||||
|     ApplyMask(SigmaKPrimeA,level); | ||||
|     SigmaKPrimeB = SigmaKPrime - SigmaKPrimeA; | ||||
|      | ||||
|     // Could get away with computing only one polarisation here | ||||
|     // int mu= (smr/2) %Nd; | ||||
|     // SigmaKprime_A has only one component | ||||
|     for (int mu = 0; mu < Nd; mu++) | ||||
|     { | ||||
|       Cmu = peekLorentz(C, mu); | ||||
|       GaugeKmu = peekLorentz(GaugeK, mu); | ||||
|       SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu); | ||||
|       iQ = Ta(Cmu * adj(GaugeKmu)); | ||||
|       this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu); | ||||
|       pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu); | ||||
|       pokeLorentz(iLambda, iLambda_mu, mu); | ||||
|     } | ||||
|     this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK);  // derivative of SmearBase | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////////// | ||||
|     // propagate the rest of the force as identity map, just add back | ||||
|     //////////////////////////////////////////////////////////////////////////////////// | ||||
|     SigmaK = SigmaK+SigmaKPrimeB; | ||||
|  | ||||
|     return SigmaK; | ||||
|   } | ||||
|  | ||||
| public: | ||||
|  | ||||
|   /* Standard constructor */ | ||||
|   SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout<Gimpl>& Stout,bool domask=false) | ||||
|     : SmearedConfiguration<Gimpl>(_UGrid, Nsmear,Stout) | ||||
|   { | ||||
|     if(domask) assert(Nsmear%(2*Nd)==0); // Or multiply by 8?? | ||||
|  | ||||
|     // was resized in base class | ||||
|     assert(this->SmearedSet.size()==Nsmear); | ||||
|      | ||||
|     GridRedBlackCartesian * UrbGrid; | ||||
|     UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(_UGrid); | ||||
|     LatticeComplex one(_UGrid); one = ComplexD(1.0,0.0); | ||||
|     LatticeComplex tmp(_UGrid); | ||||
|  | ||||
|     for (unsigned int i = 0; i < this->smearingLevels; ++i) { | ||||
|  | ||||
|       masks.push_back(*(new LatticeLorentzComplex(_UGrid))); | ||||
|       if (domask) { | ||||
|  | ||||
| 	int mu= (i/2) %Nd; | ||||
| 	int cb= (i%2); | ||||
| 	LatticeComplex tmpcb(UrbGrid); | ||||
| 	 | ||||
| 	masks[i]=Zero(); | ||||
| 	//////////////////// | ||||
| 	// Setup the mask | ||||
| 	//////////////////// | ||||
| 	tmp = Zero(); | ||||
| 	pickCheckerboard(cb,tmpcb,one); | ||||
| 	setCheckerboard(tmp,tmpcb); | ||||
| 	PokeIndex<LorentzIndex>(masks[i],tmp, mu); | ||||
| 	 | ||||
|       } else { | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  PokeIndex<LorentzIndex>(masks[i],one, mu); | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|     delete UrbGrid; | ||||
|   } | ||||
|    | ||||
|   virtual void smeared_force(GaugeField &SigmaTilde)  | ||||
|   { | ||||
|     if (this->smearingLevels > 0) | ||||
|     { | ||||
|       double start = usecond(); | ||||
|       GaugeField force = SigmaTilde; // actually = U*SigmaTilde | ||||
|       GaugeLinkField tmp_mu(SigmaTilde.Grid()); | ||||
|  | ||||
|       // Remove U from UdSdU | ||||
|       for (int mu = 0; mu < Nd; mu++) | ||||
|       { | ||||
|         // to get just SigmaTilde | ||||
|         tmp_mu = adj(peekLorentz(this->SmearedSet[this->smearingLevels - 1], mu)) * peekLorentz(force, mu); | ||||
|         pokeLorentz(force, tmp_mu, mu); | ||||
|       } | ||||
|  | ||||
|       for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) { | ||||
|         force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1),ismr); | ||||
|       } | ||||
|        | ||||
|       force = this->AnalyticSmearedForce(force, *this->ThinLinks,0); | ||||
|  | ||||
|       // Add U to UdSdU | ||||
|       for (int mu = 0; mu < Nd; mu++) | ||||
|       { | ||||
|         tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu); | ||||
|         pokeLorentz(SigmaTilde, tmp_mu, mu); | ||||
|       } | ||||
|       double end = usecond(); | ||||
|       double time = (end - start)/ 1e3; | ||||
|       std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl;   | ||||
|     }  // if smearingLevels = 0 do nothing | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
							
								
								
									
										91
									
								
								Grid/qcd/smearing/JacobianAction.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								Grid/qcd/smearing/JacobianAction.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h | ||||
|  | ||||
| Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: neo <cossu@post.kek.jp> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| 			   /*  END LEGAL */ | ||||
| #pragma once | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////// | ||||
| // Jacobian Action ..  | ||||
| //////////////////////////////////////////////////////////////////////// | ||||
| template <class Gimpl> | ||||
| class JacobianAction : public Action<typename Gimpl::GaugeField> { | ||||
| public:   | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   SmearedConfigurationMasked<Gimpl> * smearer; | ||||
|   /////////////////////////// constructors | ||||
|   explicit JacobianAction(SmearedConfigurationMasked<Gimpl> * _smearer ) { smearer=_smearer;}; | ||||
|  | ||||
|   virtual std::string action_name() {return "JacobianAction";} | ||||
|  | ||||
|   virtual std::string LogParameters(){ | ||||
|     std::stringstream sstream; | ||||
|     sstream << GridLogMessage << "[JacobianAction] " << std::endl; | ||||
|     return sstream.str(); | ||||
|   } | ||||
|  | ||||
|   ////////////////////////////////// | ||||
|   // Usual cases are not used | ||||
|   ////////////////////////////////// | ||||
|   virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){ assert(0);}; | ||||
|   virtual RealD S(const GaugeField &U) { assert(0); } | ||||
|   virtual void deriv(const GaugeField &U, GaugeField &dSdU) { assert(0);  } | ||||
|  | ||||
|   ////////////////////////////////// | ||||
|   // Functions of smart configs only | ||||
|   ////////////////////////////////// | ||||
|   virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) | ||||
|   { | ||||
|     return; | ||||
|   } | ||||
|   virtual RealD S(ConfigurationBase<GaugeField>& U) | ||||
|   { | ||||
|     // det M = e^{ - ( - logDetM) } | ||||
|     assert( &U == smearer ); | ||||
|     return -smearer->logDetJacobian(); | ||||
|   } | ||||
|   virtual RealD Sinitial(ConfigurationBase<GaugeField>& U)  | ||||
|   { | ||||
|     return S(U); | ||||
|   } | ||||
|   virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU) | ||||
|   { | ||||
|     assert( &U == smearer ); | ||||
|     smearer->logDetJacobianForce(dSdU); | ||||
|   } | ||||
|  | ||||
| private: | ||||
|  }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| @@ -40,7 +40,9 @@ template <class Gimpl> | ||||
| class Smear_Stout : public Smear<Gimpl> { | ||||
|  private: | ||||
|   int OrthogDim = -1; | ||||
| public: | ||||
|   const std::vector<double> SmearRho; | ||||
| private: | ||||
|   // Smear<Gimpl>* ownership semantics: | ||||
|   //    Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here | ||||
|   //    Smear<Gimpl>* created within constructor need to be deleted as part of the destructor | ||||
|   | ||||
| @@ -823,6 +823,35 @@ LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> | ||||
|   return ret; | ||||
| } | ||||
| template<int N> | ||||
| Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu) | ||||
| { | ||||
|   GridBase *grid=Umu.Grid(); | ||||
|   auto lvol = grid->lSites(); | ||||
|   Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid); | ||||
|    | ||||
|   autoView(Umu_v,Umu,CpuRead); | ||||
|   autoView(ret_v,ret,CpuWrite); | ||||
|   thread_for(site,lvol,{ | ||||
|     Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N); | ||||
|     Coordinate lcoor; | ||||
|     grid->LocalIndexToLocalCoor(site, lcoor); | ||||
|     iScalar<iScalar<iMatrix<ComplexD, N> > > Us; | ||||
|     iScalar<iScalar<iMatrix<ComplexD, N> > > Ui; | ||||
|     peekLocalSite(Us, Umu_v, lcoor); | ||||
|     for(int i=0;i<N;i++){ | ||||
|       for(int j=0;j<N;j++){ | ||||
| 	EigenU(i,j) = Us()()(i,j); | ||||
|       }} | ||||
|     Eigen::MatrixXcd EigenUinv = EigenU.inverse(); | ||||
|     for(int i=0;i<N;i++){ | ||||
|       for(int j=0;j<N;j++){ | ||||
| 	Ui()()(i,j) = EigenUinv(i,j); | ||||
|       }} | ||||
|     pokeLocalSite(Ui,ret_v,lcoor); | ||||
|   }); | ||||
|   return ret; | ||||
| } | ||||
| template<int N> | ||||
| static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu) | ||||
| { | ||||
|   Umu      = ProjectOnGroup(Umu); | ||||
|   | ||||
| @@ -51,6 +51,7 @@ public: | ||||
|   typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF; | ||||
|   typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD; | ||||
|  | ||||
|   typedef Lattice<iScalar<iScalar<iVector<vComplex, Dimension> > > >  LatticeAdjVector; | ||||
|  | ||||
|   template <class cplx> | ||||
|   static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) { | ||||
|   | ||||
| @@ -123,7 +123,7 @@ public: | ||||
| 	  } | ||||
| 	  if ( permute_slice ) { | ||||
| 	    int ptype       =grid->PermuteType(d); | ||||
| 	    uint8_t mask    =grid->Nsimd() >> (ptype + 1);		 | ||||
| 	    uint8_t mask    =0x1<<ptype; | ||||
| 	    SE._permute    |= mask; | ||||
| 	  } | ||||
| 	}	 | ||||
|   | ||||
| @@ -339,8 +339,8 @@ public: | ||||
|   // Vectors that live on the symmetric heap in case of SHMEM | ||||
|   // These are used; either SHM objects or refs to the above symmetric heap vectors | ||||
|   // depending on comms target | ||||
|   Vector<cobj *> u_simd_send_buf; | ||||
|   Vector<cobj *> u_simd_recv_buf; | ||||
|   std::vector<cobj *> u_simd_send_buf; | ||||
|   std::vector<cobj *> u_simd_recv_buf; | ||||
|  | ||||
|   int u_comm_offset; | ||||
|   int _unified_buffer_size; | ||||
| @@ -348,7 +348,7 @@ public: | ||||
|   //////////////////////////////////////// | ||||
|   // Stencil query | ||||
|   //////////////////////////////////////// | ||||
| #ifdef SHM_FAST_PATH | ||||
| #if 1 | ||||
|   inline int SameNode(int point) { | ||||
|  | ||||
|     int dimension    = this->_directions[point]; | ||||
| @@ -434,7 +434,6 @@ public: | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
|   void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs) | ||||
|   { | ||||
|     accelerator_barrier(); | ||||
|     for(int i=0;i<Packets.size();i++){ | ||||
|       _grid->StencilSendToRecvFromBegin(MpiReqs, | ||||
| 					Packets[i].send_buf, | ||||
| @@ -452,7 +451,6 @@ public: | ||||
|     else if ( this->fullDirichlet ) DslashLogDirichlet(); | ||||
|     else DslashLogFull(); | ||||
|     acceleratorCopySynchronise(); | ||||
|     // Everyone agrees we are all done | ||||
|     _grid->StencilBarrier();  | ||||
|   } | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
| @@ -541,6 +539,7 @@ public: | ||||
|       compress.Point(point); | ||||
|       HaloGatherDir(source,compress,point,face_idx); | ||||
|     } | ||||
|     accelerator_barrier(); | ||||
|     face_table_computed=1; | ||||
|     assert(u_comm_offset==_unified_buffer_size); | ||||
|  | ||||
| @@ -666,11 +665,9 @@ public: | ||||
|     for(int i=0;i<mm.size();i++){ | ||||
|       decompressor::MergeFace(decompress,mm[i]); | ||||
|     } | ||||
|     if ( mm.size() )    acceleratorFenceComputeStream(); | ||||
|     for(int i=0;i<dd.size();i++){ | ||||
|       decompressor::DecompressFace(decompress,dd[i]); | ||||
|     } | ||||
|     if ( dd.size() )    acceleratorFenceComputeStream(); | ||||
|   } | ||||
|   //////////////////////////////////////// | ||||
|   // Set up routines | ||||
| @@ -708,6 +705,7 @@ public: | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|     std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl; | ||||
|   } | ||||
|   /// Introduce a block structure and switch off comms on boundaries | ||||
|   void DirichletBlock(const Coordinate &dirichlet_block) | ||||
|   | ||||
| @@ -526,7 +526,7 @@ inline void acceleratorFreeCpu  (void *ptr){free(ptr);}; | ||||
| ////////////////////////////////////////////// | ||||
|  | ||||
| #ifdef GRID_SYCL | ||||
| inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();}; | ||||
| inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); }; | ||||
| #else | ||||
| // Ordering within a stream guaranteed on Nvidia & AMD | ||||
| inline void acceleratorFenceComputeStream(void){ }; | ||||
|   | ||||
| @@ -227,7 +227,7 @@ int main(int argc, char **argv) { | ||||
|   //  std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated | ||||
|   //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); | ||||
|  | ||||
|   int SP_iters=10000; | ||||
|   int SP_iters=9000; | ||||
|    | ||||
|   RationalActionParams OFRp; // Up/down | ||||
|   OFRp.lo       = 6.0e-5; | ||||
| @@ -362,12 +362,12 @@ int main(int argc, char **argv) { | ||||
|  | ||||
|   // Probably dominates the force - back to EOFA. | ||||
|   OneFlavourRationalParams SFRp; | ||||
|   SFRp.lo       = 0.25; | ||||
|   SFRp.lo       = 0.1; | ||||
|   SFRp.hi       = 25.0; | ||||
|   SFRp.MaxIter  = 10000; | ||||
|   SFRp.tolerance= 1.0e-5; | ||||
|   SFRp.tolerance= 1.0e-8; | ||||
|   SFRp.mdtolerance= 2.0e-4; | ||||
|   SFRp.degree   = 8; | ||||
|   SFRp.degree   = 12; | ||||
|   SFRp.precision= 50; | ||||
|    | ||||
|   MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); | ||||
|   | ||||
| @@ -245,11 +245,6 @@ int main(int argc, char **argv) { | ||||
|  | ||||
|   GlobalSharedMemory::GetShmDims(mpi,shm); | ||||
|  | ||||
|   Coordinate CommDim(Nd); | ||||
|   for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0; | ||||
|  | ||||
|   Coordinate NonDirichlet(Nd+1,0); | ||||
|  | ||||
|   ////////////////////////// | ||||
|   // Fermion Grids | ||||
|   ////////////////////////// | ||||
| @@ -277,8 +272,6 @@ int main(int argc, char **argv) { | ||||
|   std::vector<Complex> boundary = {1,1,1,-1}; | ||||
|   FermionAction::ImplParams Params(boundary); | ||||
|   FermionActionF::ImplParams ParamsF(boundary); | ||||
|   Params.dirichlet=NonDirichlet; | ||||
|   ParamsF.dirichlet=NonDirichlet; | ||||
|  | ||||
|   //  double StoppingCondition = 1e-14; | ||||
|   //  double MDStoppingCondition = 1e-9; | ||||
| @@ -305,12 +298,12 @@ int main(int argc, char **argv) { | ||||
|  | ||||
|   // Probably dominates the force - back to EOFA. | ||||
|   OneFlavourRationalParams SFRp; | ||||
|   SFRp.lo       = 0.25; | ||||
|   SFRp.hi       = 25.0; | ||||
|   SFRp.lo       = 0.1; | ||||
|   SFRp.hi       = 30.0; | ||||
|   SFRp.MaxIter  = 10000; | ||||
|   SFRp.tolerance= 1.0e-5; | ||||
|   SFRp.mdtolerance= 2.0e-4; | ||||
|   SFRp.degree   = 8; | ||||
|   SFRp.tolerance= 1.0e-8; | ||||
|   SFRp.mdtolerance= 2.0e-6; | ||||
|   SFRp.degree   = 10; | ||||
|   SFRp.precision= 50; | ||||
|    | ||||
|   MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); | ||||
| @@ -370,19 +363,17 @@ int main(int argc, char **argv) { | ||||
|   //////////////////////////////////// | ||||
|   std::vector<Real> light_den; | ||||
|   std::vector<Real> light_num; | ||||
|   std::vector<int> dirichlet_den; | ||||
|   std::vector<int> dirichlet_num; | ||||
|  | ||||
|   int n_hasenbusch = hasenbusch.size(); | ||||
|   light_den.push_back(light_mass);  dirichlet_den.push_back(0); | ||||
|   light_den.push_back(light_mass);  | ||||
|   for(int h=0;h<n_hasenbusch;h++){ | ||||
|     light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(0); | ||||
|     light_den.push_back(hasenbusch[h]); | ||||
|   } | ||||
|  | ||||
|   for(int h=0;h<n_hasenbusch;h++){ | ||||
|     light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(0); | ||||
|     light_num.push_back(hasenbusch[h]); | ||||
|   } | ||||
|   light_num.push_back(pv_mass);  dirichlet_num.push_back(0); | ||||
|   light_num.push_back(pv_mass); | ||||
|  | ||||
|   std::vector<FermionAction *> Numerators; | ||||
|   std::vector<FermionAction *> Denominators; | ||||
| @@ -408,9 +399,7 @@ int main(int argc, char **argv) { | ||||
|     std::cout << GridLogMessage | ||||
| 	      << " 2f quotient Action "; | ||||
|     std::cout << "det D("<<light_den[h]<<")"; | ||||
|     if ( dirichlet_den[h] ) std::cout << "^dirichlet    "; | ||||
|     std::cout << "/ det D("<<light_num[h]<<")"; | ||||
|     if ( dirichlet_num[h] ) std::cout << "^dirichlet    "; | ||||
|     std::cout << std::endl; | ||||
|  | ||||
|     FermionAction::ImplParams ParamsNum(boundary); | ||||
| @@ -418,21 +407,11 @@ int main(int argc, char **argv) { | ||||
|     FermionActionF::ImplParams ParamsDenF(boundary); | ||||
|     FermionActionF::ImplParams ParamsNumF(boundary); | ||||
|      | ||||
|     ParamsNum.dirichlet = NonDirichlet; | ||||
|     ParamsDen.dirichlet = NonDirichlet; | ||||
|  | ||||
|     ParamsNum.partialDirichlet = 0; | ||||
|     ParamsDen.partialDirichlet = 0; | ||||
|      | ||||
|     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum)); | ||||
|     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen)); | ||||
|  | ||||
|     ParamsDenF.dirichlet = ParamsDen.dirichlet; | ||||
|     ParamsDenF.partialDirichlet = ParamsDen.partialDirichlet; | ||||
|     DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF)); | ||||
|  | ||||
|     ParamsNumF.dirichlet = ParamsNum.dirichlet; | ||||
|     ParamsNumF.partialDirichlet = ParamsNum.partialDirichlet; | ||||
|     NumeratorsF.push_back  (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF)); | ||||
|  | ||||
|     LinOpD.push_back(new LinearOperatorD(*Denominators[h])); | ||||
| @@ -469,7 +448,6 @@ int main(int argc, char **argv) { | ||||
|   // Gauge action | ||||
|   ///////////////////////////////////////////////////////////// | ||||
|   Level3.push_back(&GaugeAction); | ||||
|   //  TheHMC.TheAction.push_back(Level1); | ||||
|   TheHMC.TheAction.push_back(Level2); | ||||
|   TheHMC.TheAction.push_back(Level3); | ||||
|   std::cout << GridLogMessage << " Action complete "<< std::endl; | ||||
|   | ||||
| @@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet) | ||||
|  | ||||
|   err = r_eo-result; | ||||
|   n2e= norm2(err); | ||||
|   std::cout<<GridLogMessage << "norm diff   "<< n2e<< "  Line "<<__LINE__ <<std::endl; | ||||
|   std::cout<<GridLogMessage << "norm diff   "<< n2e<<std::endl; | ||||
|   assert(n2e<1.0e-4); | ||||
|  | ||||
|   pickCheckerboard(Even,src_e,err); | ||||
|   | ||||
							
								
								
									
										90
									
								
								documentation/David_notes.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								documentation/David_notes.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| Branch: develop | ||||
|  | ||||
| Files: | ||||
|  | ||||
| Grid/lattice/PaddedCell.h -- Halo exchange | ||||
| tests/Test_general_stencil.cc -- test local off axis stencil addressing | ||||
| tests/debug/Test_padded_cell.cc -- test PaddedCell halo exchange and the General local stencil  by computing ALL plaquettes on lattice | ||||
|  | ||||
| Functionality: | ||||
|  | ||||
| -- extend a lattice field: | ||||
| Grid/lattice/PaddedCell.h | ||||
|  | ||||
| // Constructor | ||||
|   PaddedCell(int _depth,GridCartesian *_grid) | ||||
|  | ||||
| // Expand a field "in" to depth "d" | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Exchange(Lattice<vobj> &in) | ||||
|    | ||||
| // Take the "apple core" of in to a smaller local volume | ||||
|   template<class vobj> | ||||
|   inline Lattice<vobj> Extract(Lattice<vobj> &in) | ||||
|  | ||||
| -- Plaquette test: | ||||
| tests/debug/Test_padded_cell.cc | ||||
|   ///////////////////////////////////////////////// | ||||
|   // Create a padded cell of extra padding depth=1 | ||||
|   ///////////////////////////////////////////////// | ||||
|   int depth = 1; | ||||
|   PaddedCell Ghost(depth,&GRID); | ||||
|   LatticeGaugeField Ughost = Ghost.Exchange(Umu); | ||||
|  | ||||
| ///// Array for the site plaquette | ||||
|   GridBase *GhostGrid = Ughost.Grid(); | ||||
|   LatticeComplex gplaq(GhostGrid);  | ||||
|  | ||||
|   std::vector<Coordinate> shifts; | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     for(int nu=mu+1;nu<Nd;nu++){ | ||||
|    | ||||
|       //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x) | ||||
|       Coordinate shift_0(Nd,0); | ||||
|       Coordinate shift_mu(Nd,0); shift_mu[mu]=1; | ||||
|       Coordinate shift_nu(Nd,0); shift_nu[nu]=1; | ||||
|       shifts.push_back(shift_0); | ||||
|       shifts.push_back(shift_mu); | ||||
|       shifts.push_back(shift_nu); | ||||
|       shifts.push_back(shift_0); | ||||
|     } | ||||
|   } | ||||
|   GeneralLocalStencil gStencil(GhostGrid,shifts); | ||||
|  | ||||
|   gplaq=Zero(); | ||||
|   { | ||||
|     autoView( gp_v , gplaq, CpuWrite); | ||||
|     autoView( t_v , trplaq, CpuRead); | ||||
|     autoView( U_v , Ughost, CpuRead); | ||||
|     for(int ss=0;ss<gp_v.size();ss++){ | ||||
|       int s=0; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	for(int nu=mu+1;nu<Nd;nu++){ | ||||
|  | ||||
| 	  auto SE0 = gStencil.GetEntry(s+0,ss); | ||||
| 	  auto SE1 = gStencil.GetEntry(s+1,ss); | ||||
| 	  auto SE2 = gStencil.GetEntry(s+2,ss); | ||||
| 	  auto SE3 = gStencil.GetEntry(s+3,ss); | ||||
| 	 | ||||
| 	  int o0 = SE0->_offset; | ||||
| 	  int o1 = SE1->_offset; | ||||
| 	  int o2 = SE2->_offset; | ||||
| 	  int o3 = SE3->_offset; | ||||
| 	   | ||||
| 	  auto U0 = U_v[o0](mu); | ||||
| 	  auto U1 = U_v[o1](nu); | ||||
| 	  auto U2 = adj(U_v[o2](mu)); | ||||
| 	  auto U3 = adj(U_v[o3](nu)); | ||||
|  | ||||
| 	  gpermute(U0,SE0->_permute); | ||||
| 	  gpermute(U1,SE1->_permute); | ||||
| 	  gpermute(U2,SE2->_permute); | ||||
| 	  gpermute(U3,SE3->_permute); | ||||
| 	   | ||||
| 	  gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 ); | ||||
| 	  s=s+4; | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   cplaq = Ghost.Extract(gplaq); | ||||
							
								
								
									
										133
									
								
								examples/socket_grid.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								examples/socket_grid.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| #include <sys/socket.h> | ||||
| #include <sys/un.h> | ||||
| #include <unistd.h> | ||||
| #include <stdio.h> | ||||
| #include <err.h> | ||||
| #include <fcntl.h> | ||||
| #include <assert.h> | ||||
| #include <string.h> | ||||
| #include <stdlib.h> | ||||
|  | ||||
| static int sock; | ||||
| static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d"; | ||||
| static char sock_path[256]; | ||||
|  | ||||
| class UnixSockets { | ||||
| public: | ||||
|   static void Open(int rank) | ||||
|   { | ||||
|     int errnum; | ||||
|  | ||||
|     sock = socket(AF_UNIX, SOCK_DGRAM, 0);  assert(sock>0); | ||||
|     printf("allocated socket %d\n",sock); | ||||
|  | ||||
|     struct sockaddr_un sa_un = { 0 }; | ||||
|     sa_un.sun_family = AF_UNIX; | ||||
|     snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank); | ||||
|     unlink(sa_un.sun_path); | ||||
|     if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) { | ||||
|       perror("bind failure"); | ||||
|       exit(EXIT_FAILURE); | ||||
|     } | ||||
|     printf("bound socket %d to %s\n",sock,sa_un.sun_path); | ||||
|   } | ||||
|  | ||||
|   static int RecvFileDescriptor(void) | ||||
|   { | ||||
|     int n; | ||||
|     int fd; | ||||
|     char buf[1]; | ||||
|     struct iovec iov; | ||||
|     struct msghdr msg; | ||||
|     struct cmsghdr *cmsg; | ||||
|     char cms[CMSG_SPACE(sizeof(int))]; | ||||
|  | ||||
|     iov.iov_base = buf; | ||||
|     iov.iov_len = 1; | ||||
|  | ||||
|     memset(&msg, 0, sizeof msg); | ||||
|     msg.msg_name = 0; | ||||
|     msg.msg_namelen = 0; | ||||
|     msg.msg_iov = &iov; | ||||
|     msg.msg_iovlen = 1; | ||||
|  | ||||
|     msg.msg_control = (caddr_t)cms; | ||||
|     msg.msg_controllen = sizeof cms; | ||||
|  | ||||
|     if((n=recvmsg(sock, &msg, 0)) < 0) { | ||||
|       perror("recvmsg failed"); | ||||
|       return -1; | ||||
|     } | ||||
|     if(n == 0){ | ||||
|       perror("recvmsg returned 0"); | ||||
|       return -1; | ||||
|     } | ||||
|     cmsg = CMSG_FIRSTHDR(&msg); | ||||
|     memmove(&fd, CMSG_DATA(cmsg), sizeof(int)); | ||||
|     printf("received fd %d from socket %d\n",fd,sock); | ||||
|     return fd; | ||||
|   } | ||||
|  | ||||
|   static void SendFileDescriptor(int fildes,int xmit_to_rank) | ||||
|   { | ||||
|     struct msghdr msg; | ||||
|     struct iovec iov; | ||||
|     struct cmsghdr *cmsg = NULL; | ||||
|     char ctrl[CMSG_SPACE(sizeof(int))]; | ||||
|     char data = ' '; | ||||
|  | ||||
|     memset(&msg, 0, sizeof(struct msghdr)); | ||||
|     memset(ctrl, 0, CMSG_SPACE(sizeof(int))); | ||||
|     iov.iov_base = &data; | ||||
|     iov.iov_len = sizeof(data); | ||||
|      | ||||
|     sprintf(sock_path,sock_path_fmt,xmit_to_rank); | ||||
|     printf("sending FD %d over socket %d to rank %d AF_UNIX path %s\n",fildes,sock,xmit_to_rank,sock_path);fflush(stdout); | ||||
|      | ||||
|     struct sockaddr_un sa_un = { 0 }; | ||||
|     sa_un.sun_family = AF_UNIX; | ||||
|     snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank); | ||||
|  | ||||
|     msg.msg_name = (void *)&sa_un; | ||||
|     msg.msg_namelen = sizeof(sa_un); | ||||
|     msg.msg_iov = &iov; | ||||
|     msg.msg_iovlen = 1; | ||||
|     msg.msg_controllen =  CMSG_SPACE(sizeof(int)); | ||||
|     msg.msg_control = ctrl; | ||||
|  | ||||
|     cmsg = CMSG_FIRSTHDR(&msg); | ||||
|     cmsg->cmsg_level = SOL_SOCKET; | ||||
|     cmsg->cmsg_type = SCM_RIGHTS; | ||||
|     cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | ||||
|  | ||||
|     *((int *) CMSG_DATA(cmsg)) = fildes; | ||||
|  | ||||
|     if ( sendmsg(sock, &msg, 0) == -1 ) perror("sendmsg failed"); | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| int main(int argc, char **argv) | ||||
| { | ||||
|   int me = fork()?0:1; | ||||
|    | ||||
|   UnixSockets::Open(me); | ||||
|    | ||||
|   // need MPI barrier | ||||
|   sleep(10); | ||||
|   const char * message = "Hello, World\n"; | ||||
|   if( me ) { | ||||
|     int fd = open("foo",O_RDWR|O_CREAT,0666); | ||||
|     if ( fd < 0 ) { | ||||
|       perror("failed to open file"); | ||||
|       exit(EXIT_FAILURE); | ||||
|     } | ||||
|     // rank 1 sends ot rank 0 | ||||
|     UnixSockets::SendFileDescriptor(fd,0); | ||||
|     close(fd); | ||||
|   } else { | ||||
|     // rank 0 sends receives frmo rank 1 | ||||
|     int fd = UnixSockets::RecvFileDescriptor(); | ||||
|     write(fd,(const void *)message,strlen(message)); | ||||
|     close(fd); | ||||
|   } | ||||
| } | ||||
| @@ -60,7 +60,7 @@ while test $# -gt 0; do | ||||
|     ;; | ||||
|      | ||||
|     --cxxflags) | ||||
|       echo @GRID_CXXFLAGS@ | ||||
|       echo @GRID_CXXFLAGS@ -I@prefix@/include | ||||
|     ;; | ||||
|      | ||||
|     --cxx) | ||||
| @@ -72,11 +72,11 @@ while test $# -gt 0; do | ||||
|     ;; | ||||
|      | ||||
|     --ldflags) | ||||
|       echo @GRID_LDFLAGS@ | ||||
|       echo @GRID_LDFLAGS@ -L@prefix@/lib | ||||
|     ;; | ||||
|      | ||||
|     --libs) | ||||
|       echo @GRID_LIBS@ | ||||
|       echo @GRID_LIBS@ -lGrid | ||||
|     ;; | ||||
|      | ||||
|     --summary) | ||||
|   | ||||
							
								
								
									
										32
									
								
								systems/Lumi/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								systems/Lumi/config-command
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| spack load c-lime | ||||
| spack load gmp | ||||
| spack load mpfr | ||||
| CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-` | ||||
| GMP=`spack find --paths gmp | grep gmp | cut -c 12-` | ||||
| MPFR=`spack find --paths mpfr | grep mpfr | cut -c 12-` | ||||
| echo clime $CLIME | ||||
| echo gmp $GMP | ||||
| echo mpfr $MPFR | ||||
|  | ||||
| ../../configure --enable-comms=mpi-auto \ | ||||
| --with-lime=$CLIME \ | ||||
| --enable-unified=no \ | ||||
| --enable-shm=nvlink \ | ||||
| --enable-tracing=timer \ | ||||
| --enable-accelerator=hip \ | ||||
| --enable-gen-simd-width=64 \ | ||||
| --enable-simd=GPU \ | ||||
| --disable-accelerator-cshift \ | ||||
| --with-gmp=$OLCF_GMP_ROOT \ | ||||
| --with-fftw=$FFTW_DIR/.. \ | ||||
| --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ | ||||
| --disable-fermion-reps \ | ||||
| --disable-gparity \ | ||||
| CXX=hipcc MPICXX=mpicxx \ | ||||
| CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -std=c++14 -I${MPICH_DIR}/include -L/lib64 --amdgpu-target=gfx90a" \ | ||||
|  LDFLAGS="-L/lib64 -L/opt/rocm-5.2.0/lib/ -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 --amdgpu-target=gfx90a " | ||||
|  | ||||
|  | ||||
| #--enable-simd=GPU-RRII \ | ||||
|  | ||||
|  | ||||
							
								
								
									
										1
									
								
								systems/Lumi/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								systems/Lumi/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| module load CrayEnv LUMI/22.12 partition/G  cray-fftw/3.3.10.1 | ||||
| @@ -3,8 +3,14 @@ export https_proxy=http://proxy-chain.intel.com:911 | ||||
| export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH | ||||
|  | ||||
| module load intel-release | ||||
| source /opt/intel/oneapi/PVC_setup.sh | ||||
| module load intel-comp-rt/embargo-ci-neo | ||||
|  | ||||
| #source /opt/intel/oneapi/PVC_setup.sh | ||||
| #source /opt/intel/oneapi/ATS_setup.sh | ||||
| #module load intel-nightly/20230331 | ||||
| #module load intel-comp-rt/ci-neo-master/026093 | ||||
|  | ||||
| #module load intel/mpich | ||||
| module load intel/mpich/pvc45.3 | ||||
| export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH | ||||
|  | ||||
|   | ||||
| @@ -1,2 +1,4 @@ | ||||
| CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi --enable-unified=yes | ||||
| BREW=/opt/local/ | ||||
| MPICXX=mpicxx CXX=c++-12 ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -115,6 +115,7 @@ int main(int argc, char ** argv) | ||||
| 	  if (SE->_permute & 0x2 ) { permute(check[i],tmp,1); tmp=check[i];} | ||||
| 	  if (SE->_permute & 0x4 ) { permute(check[i],tmp,2); tmp=check[i];} | ||||
| 	  if (SE->_permute & 0x8 ) { permute(check[i],tmp,3); tmp=check[i];} | ||||
| 	  //	  std::cout<<GridLogMessage<<"stencil["<<i<<"] "<< check[i]<< " perm "<<(uint32_t)SE->_permute <<std::endl; | ||||
| 	} | ||||
|  | ||||
| 	Real nrmC = norm2(Check); | ||||
| @@ -138,18 +139,17 @@ int main(int argc, char ** argv) | ||||
| 	  ddiff = check -bar; | ||||
| 	  diff =norm2(ddiff); | ||||
| 	  if ( diff > 0){ | ||||
| 	    std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3] | ||||
| 		      <<") " <<check<<" vs "<<bar<<std::endl; | ||||
| 	    std::cout <<"Diff at Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3] | ||||
| 		      <<") stencil " <<check<<" vs cshift "<<bar<<std::endl; | ||||
| 	  } | ||||
|  | ||||
|  | ||||
| 	}}}} | ||||
|  | ||||
| 	if (nrm > 1.0e-4) { | ||||
| 	  autoView( check , Check, CpuRead); | ||||
| 	  autoView(   bar ,   Bar, CpuRead); | ||||
| 	  for(int i=0;i<check.size();i++){ | ||||
| 	    std::cout << i<<" Check "<<check[i]<< "\n"<<i<<" Bar "<<bar[i]<<std::endl; | ||||
| 	    std::cout << i<<" ERROR Check \n"<<check[i]<< "\n"<<i<<" Bar \n"<<bar[i]<<std::endl; | ||||
| 	  } | ||||
| 	} | ||||
| 	if (nrm > 1.0e-4) exit(-1); | ||||
|   | ||||
| @@ -63,7 +63,9 @@ int main(int argc, char** argv) { | ||||
|   std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; | ||||
|  | ||||
|   // guard as this code fails to compile for Nc != 3 | ||||
| #if (Nc == 3) | ||||
| #if 1 | ||||
|  | ||||
|   std::cout << " Printing  Adjoint Generators"<< std::endl; | ||||
|      | ||||
|   SU2Adjoint::printGenerators(); | ||||
|   SU2::testGenerators(); | ||||
| @@ -149,9 +151,32 @@ int main(int argc, char** argv) { | ||||
|     pokeLorentz(UrVr,Urmu*Vrmu, mu); | ||||
|   } | ||||
|  | ||||
|   typedef typename SU_Adjoint<Nc>::AMatrix AdjointMatrix; | ||||
|   typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr; | ||||
|   std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "****************************************** " << std::endl; | ||||
|   std::cout << GridLogMessage << " MAP BETWEEN FUNDAMENTAL AND ADJOINT CHECK " << std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************** " << std::endl; | ||||
|   for(int a=0;a<Nc*Nc-1;a++){ | ||||
|   for(int b=0;b<Nc*Nc-1;b++){ | ||||
|   for(int c=0;c<Nc*Nc-1;c++){ | ||||
|     ColourMatrix Ta; | ||||
|     ColourMatrix Tb; | ||||
|     ColourMatrix Tc; | ||||
|     SU3::generator(a, Ta); | ||||
|     SU3::generator(b, Tb); | ||||
|     SU3::generator(c, Tc); | ||||
|     AdjointMatrix TRa; | ||||
|     SU3Adjoint::generator(a,TRa); | ||||
|     Complex tr1 = trace ( Tc * ( Ta*Tb-Tb*Ta)); // i/2 fabc | ||||
|     Complex tr2 = TRa()()(b,c) * Complex(0,1); | ||||
|     std::cout << " 2 Tr( Tc[Ta,Tb]) " << 2.0*tr1<<std::endl; | ||||
|     std::cout << " - TRa_bc " << tr2<<std::endl; | ||||
|     assert(abs( (2.0*tr1-tr2) ) < 1.0e-7); | ||||
|     std::cout << "------------------"<<std::endl; | ||||
|   }}} | ||||
|    | ||||
|   // Check correspondence of algebra and group transformations | ||||
|   // Create a random vector | ||||
|   SU3::LatticeAlgebraVector h_adj(grid); | ||||
|   | ||||
							
								
								
									
										184
									
								
								tests/debug/Test_padded_cell.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								tests/debug/Test_padded_cell.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,184 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_padded_cell.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/lattice/PaddedCell.h> | ||||
| #include <Grid/stencil/GeneralLocalStencil.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
| template<class vobj> void gpermute(vobj & inout,int perm){ | ||||
|   vobj tmp=inout; | ||||
|   if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} | ||||
|   if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;} | ||||
|   if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} | ||||
|   if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} | ||||
| } | ||||
|    | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   Coordinate latt_size  = GridDefaultLatt(); | ||||
|   Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd()); | ||||
|   Coordinate mpi_layout = GridDefaultMpi(); | ||||
|   std::cout << " mpi "<<mpi_layout<<std::endl; | ||||
|   std::cout << " simd "<<simd_layout<<std::endl; | ||||
|   std::cout << " latt "<<latt_size<<std::endl; | ||||
|   GridCartesian GRID(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|   GridParallelRNG   pRNG(&GRID); | ||||
|   pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|   LatticeGaugeField Umu(&GRID); | ||||
|  | ||||
|   SU<Nc>::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   LatticeComplex trplaq(&GRID); | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(Nd, Umu.Grid()); | ||||
|   for (int mu = 0; mu < Nd; mu++) { | ||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|   } | ||||
|  | ||||
|   std::cout << GridLogMessage << " Average plaquette "<<plaq<<std::endl; | ||||
|  | ||||
|   LatticeComplex cplaq(&GRID); cplaq=Zero(); | ||||
|  | ||||
|   ///////////////////////////////////////////////// | ||||
|   // Create a padded cell of extra padding depth=1 | ||||
|   ///////////////////////////////////////////////// | ||||
|   int depth = 1; | ||||
|   PaddedCell Ghost(depth,&GRID); | ||||
|   LatticeGaugeField Ughost = Ghost.Exchange(Umu); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////////// | ||||
|   // Temporary debug Hack for single rank sim: | ||||
|   // Check the contents of the cell are periodcally replicated | ||||
|   // In future ONLY pad those dimensions that are not local to node | ||||
|   /////////////////////////////////////////////////////////////////// | ||||
| #if 0 | ||||
|   { | ||||
|     double diff=0; | ||||
|     double n=0; | ||||
|   { | ||||
|     autoView( Ug_v , Ughost, CpuRead); | ||||
|     autoView( Ul_v , Umu   , CpuRead); | ||||
|   for(int x=0;x<latt_size[0]+2;x++){ | ||||
|   for(int y=0;y<latt_size[1]+2;y++){ | ||||
|   for(int z=0;z<latt_size[2]+2;z++){ | ||||
|   for(int t=0;t<latt_size[3]+2;t++){ | ||||
|     int lx=(x-1+latt_size[0])%latt_size[0]; | ||||
|     int ly=(y-1+latt_size[1])%latt_size[1]; | ||||
|     int lz=(z-1+latt_size[2])%latt_size[2]; | ||||
|     int lt=(t-1+latt_size[3])%latt_size[3]; | ||||
|     Coordinate gcoor({x,y,z,t}); | ||||
|     Coordinate lcoor({lx,ly,lz,lt}); | ||||
|     LorentzColourMatrix g; | ||||
|     LorentzColourMatrix l; | ||||
|     peekLocalSite(g,Ug_v,gcoor); | ||||
|     peekLocalSite(l,Ul_v,lcoor); | ||||
|     g=g-l; | ||||
|     assert(norm2(g)==0); | ||||
|     diff = diff + norm2(g); | ||||
|     n = n + norm2(l); | ||||
|   }}}} | ||||
|   } | ||||
|   std::cout << "padded field check diff "<< diff <<" / "<< n<<std::endl; | ||||
|   std::cout << norm2(Ughost)<< " " << norm2(Umu)<<std::endl; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   ///// Array for the site plaquette | ||||
|   GridBase *GhostGrid = Ughost.Grid(); | ||||
|   LatticeComplex gplaq(GhostGrid);  | ||||
|    | ||||
|   std::vector<Coordinate> shifts; | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     for(int nu=mu+1;nu<Nd;nu++){ | ||||
|    | ||||
|       //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x) | ||||
|       Coordinate shift_0(Nd,0); | ||||
|       Coordinate shift_mu(Nd,0); shift_mu[mu]=1; | ||||
|       Coordinate shift_nu(Nd,0); shift_nu[nu]=1; | ||||
|       shifts.push_back(shift_0); | ||||
|       shifts.push_back(shift_mu); | ||||
|       shifts.push_back(shift_nu); | ||||
|       shifts.push_back(shift_0); | ||||
|     } | ||||
|   } | ||||
|   GeneralLocalStencil gStencil(GhostGrid,shifts); | ||||
|  | ||||
|   gplaq=Zero(); | ||||
|   { | ||||
|     autoView( gp_v , gplaq, CpuWrite); | ||||
|     autoView( t_v , trplaq, CpuRead); | ||||
|     autoView( U_v , Ughost, CpuRead); | ||||
|     for(int ss=0;ss<gp_v.size();ss++){ | ||||
|       int s=0; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	for(int nu=mu+1;nu<Nd;nu++){ | ||||
|  | ||||
| 	  auto SE0 = gStencil.GetEntry(s+0,ss); | ||||
| 	  auto SE1 = gStencil.GetEntry(s+1,ss); | ||||
| 	  auto SE2 = gStencil.GetEntry(s+2,ss); | ||||
| 	  auto SE3 = gStencil.GetEntry(s+3,ss); | ||||
| 	 | ||||
| 	  int o0 = SE0->_offset; | ||||
| 	  int o1 = SE1->_offset; | ||||
| 	  int o2 = SE2->_offset; | ||||
| 	  int o3 = SE3->_offset; | ||||
| 	   | ||||
| 	  auto U0 = U_v[o0](mu); | ||||
| 	  auto U1 = U_v[o1](nu); | ||||
| 	  auto U2 = adj(U_v[o2](mu)); | ||||
| 	  auto U3 = adj(U_v[o3](nu)); | ||||
|  | ||||
| 	  gpermute(U0,SE0->_permute); | ||||
| 	  gpermute(U1,SE1->_permute); | ||||
| 	  gpermute(U2,SE2->_permute); | ||||
| 	  gpermute(U3,SE3->_permute); | ||||
| 	   | ||||
| 	  gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 ); | ||||
| 	  s=s+4; | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   cplaq = Ghost.Extract(gplaq); | ||||
|   RealD vol = cplaq.Grid()->gSites(); | ||||
|   RealD faces = (Nd * (Nd-1))/2; | ||||
|   auto p = TensorRemove(sum(cplaq)); | ||||
|   auto result = p.real()/vol/faces/Nc; | ||||
|  | ||||
|   std::cout << GridLogMessage << " Average plaquette via padded cell "<<result<<std::endl; | ||||
|   std::cout << GridLogMessage << " Diff "<<result-plaq<<std::endl; | ||||
|    | ||||
|   assert(fabs(result-plaq)<1.0e-8); | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -476,7 +476,9 @@ int main (int argc, char ** argv) | ||||
|   //  ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter); | ||||
|  | ||||
|   //////////////////// One flavour boundary det  //////////////////// | ||||
|   /* | ||||
|   RationalActionParams OFRp; // Up/down | ||||
|   int SP_iters = 3000; | ||||
|   OFRp.lo       = 6.0e-5; | ||||
|   OFRp.hi       = 90.0; | ||||
|   OFRp.inv_pow  = 2; | ||||
| @@ -489,7 +491,7 @@ int main (int argc, char ** argv) | ||||
|   //  OFRp.degree   = 16; | ||||
|   OFRp.precision= 80; | ||||
|   OFRp.BoundsCheckFreq=0; | ||||
|   /* | ||||
|   */ | ||||
|   OneFlavourRationalParams OFRp; // Up/down | ||||
|   OFRp.lo       = 4.0e-5; | ||||
|   OFRp.hi       = 90.0; | ||||
| @@ -499,7 +501,6 @@ int main (int argc, char ** argv) | ||||
|   OFRp.degree   = 18; | ||||
|   OFRp.precision= 80; | ||||
|   OFRp.BoundsCheckFreq=0; | ||||
|   */ | ||||
|   std::vector<RealD> ActionTolByPole({ | ||||
|       1.0e-7,1.0e-8,1.0e-8,1.0e-8, | ||||
|       1.0e-8,1.0e-8,1.0e-8,1.0e-8, | ||||
|   | ||||
							
								
								
									
										170
									
								
								tests/forces/Test_fthmc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								tests/forces/Test_fthmc.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_fthmc.cc | ||||
|  | ||||
|     Copyright (C) 2022 | ||||
|  | ||||
| Author: Peter Boyle <pboyle@bnl.gov> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
| #include <Grid/qcd/smearing/GaugeConfigurationMasked.h> | ||||
| #include <Grid/qcd/smearing/JacobianAction.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
|  | ||||
|  | ||||
| template<class Gimpl> | ||||
| void ForceTest(Action<LatticeGaugeField> &action,SmearedConfigurationMasked<PeriodicGimplR> & smU,MomentumFilterBase<LatticeGaugeField> &Filter) | ||||
| { | ||||
|   LatticeGaugeField U = smU.get_U(false); // unsmeared config | ||||
|   GridBase *UGrid = U.Grid(); | ||||
|  | ||||
|   std::vector<int> seeds({1,2,3,5}); | ||||
|   GridSerialRNG            sRNG;         sRNG.SeedFixedIntegers(seeds); | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds); | ||||
|  | ||||
|   LatticeColourMatrix Pmu(UGrid);  | ||||
|   LatticeGaugeField P(UGrid);  | ||||
|   LatticeGaugeField UdSdU(UGrid);  | ||||
|  | ||||
|   std::cout << GridLogMessage << "*********************************************************"<<std::endl; | ||||
|   std::cout << GridLogMessage << " Force test for "<<action.action_name()<<std::endl; | ||||
|   std::cout << GridLogMessage << "*********************************************************"<<std::endl; | ||||
|    | ||||
|   RealD eps=0.005; | ||||
|  | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   std::cout << GridLogMessage << " Refresh "<<action.action_name()<<std::endl; | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|    | ||||
|   Gimpl::generate_momenta(P,sRNG,RNG4); | ||||
|   Filter.applyFilter(P); | ||||
|  | ||||
|   action.refresh(smU,sRNG,RNG4); | ||||
|  | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl; | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|  | ||||
|   RealD S1 = action.S(smU); | ||||
|  | ||||
|   Gimpl::update_field(P,U,eps); | ||||
|   smU.set_Field(U); | ||||
|  | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   std::cout << GridLogMessage << " Derivative "<<action.action_name()<<std::endl; | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   action.deriv(smU,UdSdU); | ||||
|   UdSdU = Ta(UdSdU); | ||||
|   Filter.applyFilter(UdSdU); | ||||
|  | ||||
|   DumpSliceNorm("Force",UdSdU,Nd-1); | ||||
|    | ||||
|   Gimpl::update_field(P,U,eps); | ||||
|   smU.set_Field(U); | ||||
|  | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl; | ||||
|   std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|    | ||||
|   RealD S2 = action.S(smU); | ||||
|  | ||||
|   // Use the derivative | ||||
|   LatticeComplex dS(UGrid); dS = Zero(); | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     auto UdSdUmu = PeekIndex<LorentzIndex>(UdSdU,mu); | ||||
|     Pmu= PeekIndex<LorentzIndex>(P,mu); | ||||
|     dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0; | ||||
|   } | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|   RealD diff =  S2-S1-dSpred.real(); | ||||
|  | ||||
|   std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl; | ||||
|   std::cout<< GridLogMessage << "S1 : "<< S1    <<std::endl; | ||||
|   std::cout<< GridLogMessage << "S2 : "<< S2    <<std::endl; | ||||
|   std::cout<< GridLogMessage << "dS : "<< S2-S1 <<std::endl; | ||||
|   std::cout<< GridLogMessage << "dSpred : "<< dSpred.real() <<std::endl; | ||||
|   std::cout<< GridLogMessage << "diff : "<< diff<<std::endl; | ||||
|   std::cout<< GridLogMessage << "*********************************************************"<<std::endl; | ||||
|   //  assert(diff<1.0); | ||||
|   std::cout<< GridLogMessage << "Done" <<std::endl; | ||||
|   std::cout << GridLogMessage << "*********************************************************"<<std::endl; | ||||
| } | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::cout << std::setprecision(14); | ||||
|   Coordinate latt_size   = GridDefaultLatt(); | ||||
|   Coordinate mpi_layout  = GridDefaultMpi(); | ||||
|   Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   Coordinate shm; | ||||
|   GlobalSharedMemory::GetShmDims(mpi_layout,shm); | ||||
|  | ||||
|   const int Ls=12; | ||||
|   const int Nt = latt_size[3]; | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|  | ||||
|   ///////////////////// Gauge Field and Gauge Forces //////////////////////////// | ||||
|   LatticeGaugeField U(UGrid); | ||||
|  | ||||
| #if  0 | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ckpoint_lat.2000"); | ||||
|   NerscIO::readConfiguration(U,header,file); | ||||
| #else | ||||
|   std::vector<int> seeds({1,2,3,4,5,6,7,8}); | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds); | ||||
|   SU<Nc>::HotConfiguration(RNG4,U); | ||||
| #endif | ||||
|  | ||||
|    | ||||
|   RealD beta=6.0; | ||||
|   WilsonGaugeActionR  PlaqAction(beta); | ||||
|   IwasakiGaugeActionR RectAction(beta); | ||||
|  | ||||
|  | ||||
|   //////////////////////////////////////////////// | ||||
|   // Plaquette only FTHMC smearer | ||||
|   //////////////////////////////////////////////// | ||||
|   double rho = 0.1; | ||||
|   Smear_Stout<PeriodicGimplR> Smearer(rho); | ||||
|   SmearedConfigurationMasked<PeriodicGimplR> SmartConfig(UGrid,2*Nd,Smearer,true); | ||||
|  | ||||
|   JacobianAction<PeriodicGimplR> Jacobian(&SmartConfig); | ||||
|    | ||||
|   //////////////////////////////////////////////// | ||||
|   // Run some tests | ||||
|   //////////////////////////////////////////////// | ||||
|   MomentumFilterNone<LatticeGaugeField> FilterNone; | ||||
|   SmartConfig.set_Field(U); | ||||
|   ForceTest<GimplTypesR>(PlaqAction,SmartConfig,FilterNone); | ||||
|   SmartConfig.set_Field(U); | ||||
|   ForceTest<GimplTypesR>(RectAction,SmartConfig,FilterNone); | ||||
|   SmartConfig.set_Field(U); | ||||
|   ForceTest<GimplTypesR>(Jacobian,SmartConfig,FilterNone); | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user