mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 05:24:32 +00:00 
			
		
		
		
	Compare commits
	
		
			94 Commits
		
	
	
		
			master
			...
			8f84bbed1b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					8f84bbed1b | ||
| 
						 | 
					b8a7004365 | ||
| 
						 | 
					994512048e | ||
| 78bae9417c | |||
| dd170ead01 | |||
| 014704856f | |||
| 
						 | 
					ee92e08edb | ||
| 
						 | 
					c1dcee9328 | ||
| 
						 | 
					6b150961fe | ||
| 
						 | 
					5bafcaedfa | ||
| 
						 | 
					bfeceae708 | ||
| 
						 | 
					eacb66591f | ||
| 
						 | 
					fadaa85626 | ||
| 
						 | 
					02a5b0d786 | ||
| 
						 | 
					0e2141442a | ||
| 
						 | 
					769eb0eecb | ||
| 85e35c4da1 | |||
| 
						 | 
					d72e914cf0 | ||
| 
						 | 
					3b5254e2d5 | ||
| 
						 | 
					f1c358b596 | ||
| 
						 | 
					c0ef210265 | ||
| 
						 | 
					e3e1cc1962 | ||
| 
						 | 
					723eadbb5c | ||
| 
						 | 
					e24637ec1e | ||
| 
						 | 
					8b01ff4ce7 | ||
| 
						 | 
					588197c487 | ||
| 
						 | 
					1352bad2e4 | ||
| 
						 | 
					ffd7301649 | ||
| 
						 | 
					d2a8494044 | ||
| 
						 | 
					0982e0d19b | ||
| 
						 | 
					3badbfc3c1 | ||
| 
						 | 
					5465961e30 | ||
| 477b794bc5 | |||
| 
						 | 
					4835fd1a87 | ||
| 
						 | 
					6533c25814 | ||
| 
						 | 
					1b2914ec09 | ||
| 
						 | 
					519f795066 | ||
| 
						 | 
					4240ad5ca8 | ||
| 
						 | 
					d418347d86 | ||
| 
						 | 
					29a4bfe5e5 | ||
| 
						 | 
					9955bf9daf | ||
| 
						 | 
					876c8f4478 | ||
| 
						 | 
					9c8750f261 | ||
| 
						 | 
					91efd08179 | ||
| 
						 | 
					9953511b65 | ||
| 
						 | 
					025fa9991a | ||
| 
						 | 
					e8c60c355b | ||
| 
						 | 
					6c9c7f9d85 | ||
| 
						 | 
					f534523ede | ||
| 
						 | 
					1b8a834beb | ||
| 
						 | 
					3aa43e6065 | ||
| 
						 | 
					78ac4044ff | ||
| 
						 | 
					119c3db47f | ||
| 
						 | 
					21bbdb8fc2 | ||
| 
						 | 
					739bd7572c | ||
| 
						 | 
					074627a5bd | ||
| 
						 | 
					6a23b2c599 | ||
| 
						 | 
					bd891fb3f5 | ||
| 
						 | 
					3984265851 | ||
| 
						 | 
					45361d188f | ||
| 
						 | 
					80c9d77e02 | ||
| 
						 | 
					3aff64dddb | ||
| 
						 | 
					b4f2ca81ff | ||
| 
						 | 
					d1dea5f840 | ||
| 
						 | 
					54f8b84d16 | ||
| 
						 | 
					da503fef0e | ||
| 
						 | 
					4a6802098a | ||
| 
						 | 
					f9b41a84d2 | ||
| 5d7e0d18b9 | |||
| 9e64387933 | |||
| 983b681d46 | |||
| 4072408b6f | |||
| bd76b47fbf | |||
| 18ce23aa75 | |||
| 
						 | 
					ffa7fe0cc2 | ||
| 
						 | 
					6b979f0a69 | ||
| 
						 | 
					86dac5ff4f | ||
| 
						 | 
					4a382fad3f | ||
| 
						 | 
					cc753670d9 | ||
| 
						 | 
					cc9d88ea1c | ||
| 
						 | 
					b281b0166e | ||
| 
						 | 
					6a21f694ff | ||
| 
						 | 
					fc4db5e963 | ||
| 
						 | 
					6252ffaf76 | ||
| 
						 | 
					af64c1c6b6 | ||
| 
						 | 
					866f48391a | ||
| 
						 | 
					a4df527d74 | ||
| 
						 | 
					5764d21161 | ||
| 
						 | 
					496d04cd85 | ||
| 39214702f6 | |||
| 3e4614c63a | |||
| 
						 | 
					ccd21f96ff | ||
| 
						 | 
					4b90cb8888 | ||
| 32e6d58356 | 
							
								
								
									
										54
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								.github/ISSUE_TEMPLATE/bug-report.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,54 @@
 | 
				
			|||||||
 | 
					name: Bug report
 | 
				
			||||||
 | 
					description: Report a bug.
 | 
				
			||||||
 | 
					title: "<insert title>"
 | 
				
			||||||
 | 
					labels: [bug]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					body:
 | 
				
			||||||
 | 
					  - type: markdown
 | 
				
			||||||
 | 
					    attributes:
 | 
				
			||||||
 | 
					      value: >
 | 
				
			||||||
 | 
					        Thank you for taking the time to file a bug report.
 | 
				
			||||||
 | 
					        Please check that the code is pointing to the HEAD of develop
 | 
				
			||||||
 | 
					        or any commit in master which is tagged with a version number.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - type: textarea
 | 
				
			||||||
 | 
					    attributes:
 | 
				
			||||||
 | 
					      label: "Describe the issue:"
 | 
				
			||||||
 | 
					      description: >
 | 
				
			||||||
 | 
					        Describe the issue and any previous attempt to solve it.
 | 
				
			||||||
 | 
					    validations:
 | 
				
			||||||
 | 
					      required: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - type: textarea
 | 
				
			||||||
 | 
					    attributes:
 | 
				
			||||||
 | 
					      label: "Code example:"
 | 
				
			||||||
 | 
					      description: >
 | 
				
			||||||
 | 
					        If relevant, show how to reproduce the issue using a minimal working
 | 
				
			||||||
 | 
					        example.
 | 
				
			||||||
 | 
					      placeholder: |
 | 
				
			||||||
 | 
					        << your code here >>
 | 
				
			||||||
 | 
					      render: shell
 | 
				
			||||||
 | 
					    validations:
 | 
				
			||||||
 | 
					      required: false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - type: textarea
 | 
				
			||||||
 | 
					    attributes:
 | 
				
			||||||
 | 
					      label: "Target platform:"
 | 
				
			||||||
 | 
					      description: >
 | 
				
			||||||
 | 
					        Give a description of the target platform (CPU, network, compiler).
 | 
				
			||||||
 | 
					        Please give the full CPU part description, using for example
 | 
				
			||||||
 | 
					        `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
 | 
				
			||||||
 | 
					        or `sysctl machdep.cpu.brand_string` (macOS) and the full output
 | 
				
			||||||
 | 
					        the `--version` option of your compiler.
 | 
				
			||||||
 | 
					    validations:
 | 
				
			||||||
 | 
					      required: true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  - type: textarea
 | 
				
			||||||
 | 
					    attributes:
 | 
				
			||||||
 | 
					      label: "Configure options:"
 | 
				
			||||||
 | 
					      description: >
 | 
				
			||||||
 | 
					        Please give the exact configure command used and attach
 | 
				
			||||||
 | 
					        `config.log`, `grid.config.summary` and the output of `make V=1`.
 | 
				
			||||||
 | 
					      render: shell
 | 
				
			||||||
 | 
					    validations:
 | 
				
			||||||
 | 
					      required: true
 | 
				
			||||||
@@ -542,6 +542,7 @@ public:
 | 
				
			|||||||
      (*this)(in[i], out[i]);
 | 
					      (*this)(in[i], out[i]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  virtual ~LinearFunction(){};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
 | 
					template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -166,16 +166,16 @@ public:
 | 
				
			|||||||
      rsqf[s] =rsq[s];
 | 
					      rsqf[s] =rsq[s];
 | 
				
			||||||
      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
 | 
					      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
 | 
				
			||||||
      //      ps_d[s] = src_d;
 | 
					      //      ps_d[s] = src_d;
 | 
				
			||||||
      precisionChangeFast(ps_f[s],src_d);
 | 
					      precisionChange(ps_f[s],src_d);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // r and p for primary
 | 
					    // r and p for primary
 | 
				
			||||||
    p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
 | 
					    p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
 | 
				
			||||||
    r_d = p_d;
 | 
					    r_d = p_d;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    //MdagM+m[0]
 | 
					    //MdagM+m[0]
 | 
				
			||||||
    precisionChangeFast(p_f,p_d);
 | 
					    precisionChange(p_f,p_d);
 | 
				
			||||||
    Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
					    Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
				
			||||||
    precisionChangeFast(tmp_d,mmp_f);
 | 
					    precisionChange(tmp_d,mmp_f);
 | 
				
			||||||
    Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
					    Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
				
			||||||
    tmp_d = tmp_d - mmp_d;
 | 
					    tmp_d = tmp_d - mmp_d;
 | 
				
			||||||
    std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
 | 
					    std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
 | 
				
			||||||
@@ -204,7 +204,7 @@ public:
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
    for(int s=0;s<nshift;s++) {
 | 
					    for(int s=0;s<nshift;s++) {
 | 
				
			||||||
      axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
 | 
					      axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
 | 
				
			||||||
      precisionChangeFast(psi_f[s],psi_d[s]);
 | 
					      precisionChange(psi_f[s],psi_d[s]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
    ///////////////////////////////////////
 | 
					    ///////////////////////////////////////
 | 
				
			||||||
@@ -225,7 +225,7 @@ public:
 | 
				
			|||||||
      AXPYTimer.Stop();
 | 
					      AXPYTimer.Stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      PrecChangeTimer.Start();
 | 
					      PrecChangeTimer.Start();
 | 
				
			||||||
      precisionChangeFast(r_f, r_d);
 | 
					      precisionChange(r_f, r_d);
 | 
				
			||||||
      PrecChangeTimer.Stop();
 | 
					      PrecChangeTimer.Stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      AXPYTimer.Start();
 | 
					      AXPYTimer.Start();
 | 
				
			||||||
@@ -243,13 +243,13 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      cp=c;
 | 
					      cp=c;
 | 
				
			||||||
      PrecChangeTimer.Start();
 | 
					      PrecChangeTimer.Start();
 | 
				
			||||||
      precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
 | 
					      precisionChange(p_f, p_d); //get back single prec search direction for linop
 | 
				
			||||||
      PrecChangeTimer.Stop();
 | 
					      PrecChangeTimer.Stop();
 | 
				
			||||||
      MatrixTimer.Start();  
 | 
					      MatrixTimer.Start();  
 | 
				
			||||||
      Linop_f.HermOp(p_f,mmp_f);
 | 
					      Linop_f.HermOp(p_f,mmp_f);
 | 
				
			||||||
      MatrixTimer.Stop();  
 | 
					      MatrixTimer.Stop();  
 | 
				
			||||||
      PrecChangeTimer.Start();
 | 
					      PrecChangeTimer.Start();
 | 
				
			||||||
      precisionChangeFast(mmp_d, mmp_f); // From Float to Double
 | 
					      precisionChange(mmp_d, mmp_f); // From Float to Double
 | 
				
			||||||
      PrecChangeTimer.Stop();
 | 
					      PrecChangeTimer.Stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      d=real(innerProduct(p_d,mmp_d));    
 | 
					      d=real(innerProduct(p_d,mmp_d));    
 | 
				
			||||||
@@ -311,7 +311,7 @@ public:
 | 
				
			|||||||
	SolverTimer.Stop();
 | 
						SolverTimer.Stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for(int s=0;s<nshift;s++){
 | 
						for(int s=0;s<nshift;s++){
 | 
				
			||||||
	  precisionChangeFast(psi_d[s],psi_f[s]);
 | 
						  precisionChange(psi_d[s],psi_f[s]);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -211,7 +211,7 @@ public:
 | 
				
			|||||||
    Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
					    Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
 | 
				
			||||||
    tmp_d = tmp_d - mmp_d;
 | 
					    tmp_d = tmp_d - mmp_d;
 | 
				
			||||||
    std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
 | 
					    std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
 | 
				
			||||||
    //    assert(norm2(tmp_d)< 1.0e-4);
 | 
					    assert(norm2(tmp_d)< 1.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    axpy(mmp_d,mass[0],p_d,mmp_d);
 | 
					    axpy(mmp_d,mass[0],p_d,mmp_d);
 | 
				
			||||||
    RealD rn = norm2(p_d);
 | 
					    RealD rn = norm2(p_d);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -419,14 +419,15 @@ until convergence
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if ( Nconv < Nstop )
 | 
					      if ( Nconv < Nstop ) {
 | 
				
			||||||
	std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
 | 
						std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
 | 
				
			||||||
 | 
						std::cout << GridLogIRL << "returning Nstop vectors, the last "<< Nstop-Nconv << "of which might meet convergence criterion only approximately" <<std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      eval=eval2;
 | 
					      eval=eval2;
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      //Keep only converged
 | 
					      //Keep only converged
 | 
				
			||||||
      eval.resize(Nconv);// Nstop?
 | 
					      eval.resize(Nstop);// was Nconv
 | 
				
			||||||
      evec.resize(Nconv,grid);// Nstop?
 | 
					      evec.resize(Nstop,grid);// was Nconv
 | 
				
			||||||
      basisSortInPlace(evec,eval,reverse);
 | 
					      basisSortInPlace(evec,eval,reverse);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
 | 
				
			|||||||
  uint64_t LruBytes1=0;
 | 
					  uint64_t LruBytes1=0;
 | 
				
			||||||
  uint64_t LruBytes2=0;
 | 
					  uint64_t LruBytes2=0;
 | 
				
			||||||
  uint64_t LruCnt=0;
 | 
					  uint64_t LruCnt=0;
 | 
				
			||||||
  uint64_t LockedBytes=0;
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
 | 
					  std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
 | 
				
			||||||
  for(auto it=LRU.begin();it!=LRU.end();it++){
 | 
					  for(auto it=LRU.begin();it!=LRU.end();it++){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -128,7 +128,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 | 
				
			|||||||
							 int recv_from_rank,int dor,
 | 
												 int recv_from_rank,int dor,
 | 
				
			||||||
							 int xbytes,int rbytes, int dir)
 | 
												 int xbytes,int rbytes, int dir)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return 2.0*bytes;
 | 
					  return xbytes+rbytes;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
 | 
					void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -91,6 +91,59 @@ void *SharedMemory::ShmBufferSelf(void)
 | 
				
			|||||||
  //std::cerr << "ShmBufferSelf "<<ShmRank<<" "<<std::hex<< ShmCommBufs[ShmRank] <<std::dec<<std::endl;
 | 
					  //std::cerr << "ShmBufferSelf "<<ShmRank<<" "<<std::hex<< ShmCommBufs[ShmRank] <<std::dec<<std::endl;
 | 
				
			||||||
  return ShmCommBufs[ShmRank];
 | 
					  return ShmCommBufs[ShmRank];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					static inline int divides(int a,int b)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return ( b == ( (b/a)*a ) );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Allow user to configure through environment variable
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
 | 
				
			||||||
 | 
					  if ( str ) {
 | 
				
			||||||
 | 
					    std::vector<int> IntShmDims;
 | 
				
			||||||
 | 
					    GridCmdOptionIntVector(std::string(str),IntShmDims);
 | 
				
			||||||
 | 
					    assert(IntShmDims.size() == WorldDims.size());
 | 
				
			||||||
 | 
					    long ShmSize = 1;
 | 
				
			||||||
 | 
					    for (int dim=0;dim<WorldDims.size();dim++) {
 | 
				
			||||||
 | 
					      ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
 | 
				
			||||||
 | 
					      assert(divides(ShmDims[dim],WorldDims[dim]));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    assert(ShmSize == WorldShmSize);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Powers of 2,3,5 only in prime decomposition for now
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int ndimension = WorldDims.size();
 | 
				
			||||||
 | 
					  ShmDims=Coordinate(ndimension,1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> primes({2,3,5});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int dim = 0;
 | 
				
			||||||
 | 
					  int last_dim = ndimension - 1;
 | 
				
			||||||
 | 
					  int AutoShmSize = 1;
 | 
				
			||||||
 | 
					  while(AutoShmSize != WorldShmSize) {
 | 
				
			||||||
 | 
					    int p;
 | 
				
			||||||
 | 
					    for(p=0;p<primes.size();p++) {
 | 
				
			||||||
 | 
					      int prime=primes[p];
 | 
				
			||||||
 | 
					      if ( divides(prime,WorldDims[dim]/ShmDims[dim])
 | 
				
			||||||
 | 
					        && divides(prime,WorldShmSize/AutoShmSize)  ) {
 | 
				
			||||||
 | 
					  AutoShmSize*=prime;
 | 
				
			||||||
 | 
					  ShmDims[dim]*=prime;
 | 
				
			||||||
 | 
					  last_dim = dim;
 | 
				
			||||||
 | 
					  break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (p == primes.size() && last_dim == dim) {
 | 
				
			||||||
 | 
					      std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
 | 
				
			||||||
 | 
					      exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    dim=(dim+1) %ndimension;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid); 
 | 
					NAMESPACE_END(Grid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -27,9 +27,10 @@ Author: Christoph Lehner <christoph@lhnr.de>
 | 
				
			|||||||
*************************************************************************************/
 | 
					*************************************************************************************/
 | 
				
			||||||
/*  END LEGAL */
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define Mheader "SharedMemoryMpi: "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <Grid/GridCore.h>
 | 
					#include <Grid/GridCore.h>
 | 
				
			||||||
#include <pwd.h>
 | 
					#include <pwd.h>
 | 
				
			||||||
#include <syscall.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_CUDA
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
#include <cuda_runtime_api.h>
 | 
					#include <cuda_runtime_api.h>
 | 
				
			||||||
@@ -39,11 +40,118 @@ Author: Christoph Lehner <christoph@lhnr.de>
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_SYCL
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
					#define GRID_SYCL_LEVEL_ZERO_IPC
 | 
				
			||||||
 | 
					#include <syscall.h>
 | 
				
			||||||
 | 
					#define SHM_SOCKETS 
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <sys/socket.h>
 | 
				
			||||||
 | 
					#include <sys/un.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef SHM_SOCKETS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Barbaric extra intranode communication route in case we need sockets to pass FDs
 | 
				
			||||||
 | 
					 * Forced by level_zero not being nicely designed
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int sock;
 | 
				
			||||||
 | 
					static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
 | 
				
			||||||
 | 
					static char sock_path[256];
 | 
				
			||||||
 | 
					class UnixSockets {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  static void Open(int rank)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int errnum;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sock = socket(AF_UNIX, SOCK_DGRAM, 0);  assert(sock>0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct sockaddr_un sa_un = { 0 };
 | 
				
			||||||
 | 
					    sa_un.sun_family = AF_UNIX;
 | 
				
			||||||
 | 
					    snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
 | 
				
			||||||
 | 
					    unlink(sa_un.sun_path);
 | 
				
			||||||
 | 
					    if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
 | 
				
			||||||
 | 
					      perror("bind failure");
 | 
				
			||||||
 | 
					      exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static int RecvFileDescriptor(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int n;
 | 
				
			||||||
 | 
					    int fd;
 | 
				
			||||||
 | 
					    char buf[1];
 | 
				
			||||||
 | 
					    struct iovec iov;
 | 
				
			||||||
 | 
					    struct msghdr msg;
 | 
				
			||||||
 | 
					    struct cmsghdr *cmsg;
 | 
				
			||||||
 | 
					    char cms[CMSG_SPACE(sizeof(int))];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    iov.iov_base = buf;
 | 
				
			||||||
 | 
					    iov.iov_len = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    memset(&msg, 0, sizeof msg);
 | 
				
			||||||
 | 
					    msg.msg_name = 0;
 | 
				
			||||||
 | 
					    msg.msg_namelen = 0;
 | 
				
			||||||
 | 
					    msg.msg_iov = &iov;
 | 
				
			||||||
 | 
					    msg.msg_iovlen = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg.msg_control = (caddr_t)cms;
 | 
				
			||||||
 | 
					    msg.msg_controllen = sizeof cms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if((n=recvmsg(sock, &msg, 0)) < 0) {
 | 
				
			||||||
 | 
					      perror("recvmsg failed");
 | 
				
			||||||
 | 
					      return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(n == 0){
 | 
				
			||||||
 | 
					      perror("recvmsg returned 0");
 | 
				
			||||||
 | 
					      return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    cmsg = CMSG_FIRSTHDR(&msg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return fd;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void SendFileDescriptor(int fildes,int xmit_to_rank)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    struct msghdr msg;
 | 
				
			||||||
 | 
					    struct iovec iov;
 | 
				
			||||||
 | 
					    struct cmsghdr *cmsg = NULL;
 | 
				
			||||||
 | 
					    char ctrl[CMSG_SPACE(sizeof(int))];
 | 
				
			||||||
 | 
					    char data = ' ';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    memset(&msg, 0, sizeof(struct msghdr));
 | 
				
			||||||
 | 
					    memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
 | 
				
			||||||
 | 
					    iov.iov_base = &data;
 | 
				
			||||||
 | 
					    iov.iov_len = sizeof(data);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    sprintf(sock_path,sock_path_fmt,xmit_to_rank);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    struct sockaddr_un sa_un = { 0 };
 | 
				
			||||||
 | 
					    sa_un.sun_family = AF_UNIX;
 | 
				
			||||||
 | 
					    snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg.msg_name = (void *)&sa_un;
 | 
				
			||||||
 | 
					    msg.msg_namelen = sizeof(sa_un);
 | 
				
			||||||
 | 
					    msg.msg_iov = &iov;
 | 
				
			||||||
 | 
					    msg.msg_iovlen = 1;
 | 
				
			||||||
 | 
					    msg.msg_controllen =  CMSG_SPACE(sizeof(int));
 | 
				
			||||||
 | 
					    msg.msg_control = ctrl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cmsg = CMSG_FIRSTHDR(&msg);
 | 
				
			||||||
 | 
					    cmsg->cmsg_level = SOL_SOCKET;
 | 
				
			||||||
 | 
					    cmsg->cmsg_type = SCM_RIGHTS;
 | 
				
			||||||
 | 
					    cmsg->cmsg_len = CMSG_LEN(sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *((int *) CMSG_DATA(cmsg)) = fildes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sendmsg(sock, &msg, 0);
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid); 
 | 
					 | 
				
			||||||
#define header "SharedMemoryMpi: "
 | 
					 | 
				
			||||||
/*Construct from an MPI communicator*/
 | 
					/*Construct from an MPI communicator*/
 | 
				
			||||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
 | 
					void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -66,8 +174,8 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
 | 
				
			|||||||
  MPI_Comm_size(WorldShmComm     ,&WorldShmSize);
 | 
					  MPI_Comm_size(WorldShmComm     ,&WorldShmSize);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if ( WorldRank == 0) {
 | 
					  if ( WorldRank == 0) {
 | 
				
			||||||
    std::cout << header " World communicator of size " <<WorldSize << std::endl;  
 | 
					    std::cout << Mheader " World communicator of size " <<WorldSize << std::endl;  
 | 
				
			||||||
    std::cout << header " Node  communicator of size " <<WorldShmSize << std::endl;
 | 
					    std::cout << Mheader " Node  communicator of size " <<WorldShmSize << std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  // WorldShmComm, WorldShmSize, WorldShmRank
 | 
					  // WorldShmComm, WorldShmSize, WorldShmRank
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -170,59 +278,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
 | 
				
			|||||||
  if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
 | 
					  if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
 | 
				
			||||||
  else                          OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
 | 
					  else                          OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
static inline int divides(int a,int b)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  return ( b == ( (b/a)*a ) );
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Allow user to configure through environment variable
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  char* str = getenv(("GRID_SHM_DIMS_" + std::to_string(ShmDims.size())).c_str());
 | 
					 | 
				
			||||||
  if ( str ) {
 | 
					 | 
				
			||||||
    std::vector<int> IntShmDims;
 | 
					 | 
				
			||||||
    GridCmdOptionIntVector(std::string(str),IntShmDims);
 | 
					 | 
				
			||||||
    assert(IntShmDims.size() == WorldDims.size());
 | 
					 | 
				
			||||||
    long ShmSize = 1;
 | 
					 | 
				
			||||||
    for (int dim=0;dim<WorldDims.size();dim++) {
 | 
					 | 
				
			||||||
      ShmSize *= (ShmDims[dim] = IntShmDims[dim]);
 | 
					 | 
				
			||||||
      assert(divides(ShmDims[dim],WorldDims[dim]));
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    assert(ShmSize == WorldShmSize);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Powers of 2,3,5 only in prime decomposition for now
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  int ndimension = WorldDims.size();
 | 
					 | 
				
			||||||
  ShmDims=Coordinate(ndimension,1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::vector<int> primes({2,3,5});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int dim = 0;
 | 
					 | 
				
			||||||
  int last_dim = ndimension - 1;
 | 
					 | 
				
			||||||
  int AutoShmSize = 1;
 | 
					 | 
				
			||||||
  while(AutoShmSize != WorldShmSize) {
 | 
					 | 
				
			||||||
    int p;
 | 
					 | 
				
			||||||
    for(p=0;p<primes.size();p++) {
 | 
					 | 
				
			||||||
      int prime=primes[p];
 | 
					 | 
				
			||||||
      if ( divides(prime,WorldDims[dim]/ShmDims[dim])
 | 
					 | 
				
			||||||
        && divides(prime,WorldShmSize/AutoShmSize)  ) {
 | 
					 | 
				
			||||||
	AutoShmSize*=prime;
 | 
					 | 
				
			||||||
	ShmDims[dim]*=prime;
 | 
					 | 
				
			||||||
	last_dim = dim;
 | 
					 | 
				
			||||||
	break;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (p == primes.size() && last_dim == dim) {
 | 
					 | 
				
			||||||
      std::cerr << "GlobalSharedMemory::GetShmDims failed" << std::endl;
 | 
					 | 
				
			||||||
      exit(EXIT_FAILURE);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    dim=(dim+1) %ndimension;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
 | 
					void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -396,7 +452,7 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &proce
 | 
				
			|||||||
#ifdef GRID_MPI3_SHMGET
 | 
					#ifdef GRID_MPI3_SHMGET
 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  std::cout << header "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
 | 
					  std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
 | 
				
			||||||
  assert(_ShmSetup==1);
 | 
					  assert(_ShmSetup==1);
 | 
				
			||||||
  assert(_ShmAlloc==0);
 | 
					  assert(_ShmAlloc==0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -481,7 +537,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    exit(EXIT_FAILURE);  
 | 
					    exit(EXIT_FAILURE);  
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes 
 | 
					  std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes 
 | 
				
			||||||
	    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
 | 
						    << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SharedMemoryZero(ShmCommBuf,bytes);
 | 
					  SharedMemoryZero(ShmCommBuf,bytes);
 | 
				
			||||||
@@ -524,7 +580,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    exit(EXIT_FAILURE);  
 | 
					    exit(EXIT_FAILURE);  
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  if ( WorldRank == 0 ){
 | 
					  if ( WorldRank == 0 ){
 | 
				
			||||||
    std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes 
 | 
					    std::cout << WorldRank << Mheader " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes 
 | 
				
			||||||
	      << "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
 | 
						      << "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  SharedMemoryZero(ShmCommBuf,bytes);
 | 
					  SharedMemoryZero(ShmCommBuf,bytes);
 | 
				
			||||||
@@ -532,8 +588,13 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Loop over ranks/gpu's on our node
 | 
					  // Loop over ranks/gpu's on our node
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#ifdef SHM_SOCKETS
 | 
				
			||||||
 | 
					  UnixSockets::Open(WorldShmRank);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  for(int r=0;r<WorldShmSize;r++){
 | 
					  for(int r=0;r<WorldShmSize;r++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    MPI_Barrier(WorldShmComm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef GRID_MPI3_SHM_NONE
 | 
					#ifndef GRID_MPI3_SHM_NONE
 | 
				
			||||||
    //////////////////////////////////////////////////
 | 
					    //////////////////////////////////////////////////
 | 
				
			||||||
    // If it is me, pass around the IPC access key
 | 
					    // If it is me, pass around the IPC access key
 | 
				
			||||||
@@ -541,7 +602,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    void * thisBuf = ShmCommBuf;
 | 
					    void * thisBuf = ShmCommBuf;
 | 
				
			||||||
    if(!Stencil_force_mpi) {
 | 
					    if(!Stencil_force_mpi) {
 | 
				
			||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
 | 
					#ifdef GRID_SYCL_LEVEL_ZERO_IPC
 | 
				
			||||||
    typedef struct { int fd; pid_t pid ; } clone_mem_t;
 | 
					    typedef struct { int fd; pid_t pid ; ze_ipc_mem_handle_t ze; } clone_mem_t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
 | 
					    auto zeDevice    = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
 | 
				
			||||||
    auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
 | 
					    auto zeContext   = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
 | 
				
			||||||
@@ -552,13 +613,21 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    if ( r==WorldShmRank ) { 
 | 
					    if ( r==WorldShmRank ) { 
 | 
				
			||||||
      auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
 | 
					      auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
 | 
				
			||||||
      if ( err != ZE_RESULT_SUCCESS ) {
 | 
					      if ( err != ZE_RESULT_SUCCESS ) {
 | 
				
			||||||
	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
 | 
						std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
 | 
				
			||||||
	exit(EXIT_FAILURE);
 | 
						exit(EXIT_FAILURE);
 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
	std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
 | 
						std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
 | 
					      memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
 | 
				
			||||||
      handle.pid = getpid();
 | 
					      handle.pid = getpid();
 | 
				
			||||||
 | 
					      memcpy((void *)&handle.ze,(void *)&ihandle,sizeof(ihandle));
 | 
				
			||||||
 | 
					#ifdef SHM_SOCKETS
 | 
				
			||||||
 | 
					      for(int rr=0;rr<WorldShmSize;rr++){
 | 
				
			||||||
 | 
						if(rr!=r){
 | 
				
			||||||
 | 
						  UnixSockets::SendFileDescriptor(handle.fd,rr);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#ifdef GRID_CUDA
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
@@ -586,6 +655,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    // Share this IPC handle across the Shm Comm
 | 
					    // Share this IPC handle across the Shm Comm
 | 
				
			||||||
    //////////////////////////////////////////////////
 | 
					    //////////////////////////////////////////////////
 | 
				
			||||||
    { 
 | 
					    { 
 | 
				
			||||||
 | 
					      MPI_Barrier(WorldShmComm);
 | 
				
			||||||
      int ierr=MPI_Bcast(&handle,
 | 
					      int ierr=MPI_Bcast(&handle,
 | 
				
			||||||
			 sizeof(handle),
 | 
								 sizeof(handle),
 | 
				
			||||||
			 MPI_BYTE,
 | 
								 MPI_BYTE,
 | 
				
			||||||
@@ -601,6 +671,10 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
 | 
					#ifdef GRID_SYCL_LEVEL_ZERO_IPC
 | 
				
			||||||
    if ( r!=WorldShmRank ) {
 | 
					    if ( r!=WorldShmRank ) {
 | 
				
			||||||
      thisBuf = nullptr;
 | 
					      thisBuf = nullptr;
 | 
				
			||||||
 | 
					      int myfd;
 | 
				
			||||||
 | 
					#ifdef SHM_SOCKETS
 | 
				
			||||||
 | 
					      myfd=UnixSockets::RecvFileDescriptor();
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
      std::cout<<"mapping seeking remote pid/fd "
 | 
					      std::cout<<"mapping seeking remote pid/fd "
 | 
				
			||||||
	       <<handle.pid<<"/"
 | 
						       <<handle.pid<<"/"
 | 
				
			||||||
	       <<handle.fd<<std::endl;
 | 
						       <<handle.fd<<std::endl;
 | 
				
			||||||
@@ -608,16 +682,22 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
      int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
 | 
					      int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
 | 
				
			||||||
      std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
 | 
					      std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
 | 
				
			||||||
      //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
 | 
					      //      int myfd  = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
 | 
				
			||||||
      int myfd  = syscall(438,pidfd,handle.fd,0);
 | 
					      myfd  = syscall(438,pidfd,handle.fd,0);
 | 
				
			||||||
 | 
					      int err_t = errno;
 | 
				
			||||||
      std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
 | 
					      if (myfd < 0) {
 | 
				
			||||||
      
 | 
					        fprintf(stderr,"pidfd_getfd returned %d errno was %d\n", myfd,err_t); fflush(stderr);
 | 
				
			||||||
 | 
						perror("pidfd_getfd failed ");
 | 
				
			||||||
 | 
						assert(0);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					      std::cout<<"Using IpcHandle mapped remote pid "<<handle.pid <<" FD "<<handle.fd <<" to myfd "<<myfd<<"\n";
 | 
				
			||||||
 | 
					      memcpy((void *)&ihandle,(void *)&handle.ze,sizeof(ihandle));
 | 
				
			||||||
      memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
 | 
					      memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
 | 
					      auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
 | 
				
			||||||
      if ( err != ZE_RESULT_SUCCESS ) {
 | 
					      if ( err != ZE_RESULT_SUCCESS ) {
 | 
				
			||||||
	std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
 | 
						std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
 | 
				
			||||||
	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; 
 | 
						std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl; 
 | 
				
			||||||
	exit(EXIT_FAILURE);
 | 
						exit(EXIT_FAILURE);
 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
	std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
 | 
						std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
 | 
				
			||||||
@@ -652,6 +732,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
#else
 | 
					#else
 | 
				
			||||||
    WorldShmCommBufs[r] = ShmCommBuf;
 | 
					    WorldShmCommBufs[r] = ShmCommBuf;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					    MPI_Barrier(WorldShmComm);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  _ShmAllocBytes=bytes;
 | 
					  _ShmAllocBytes=bytes;
 | 
				
			||||||
@@ -663,7 +744,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
#ifdef GRID_MPI3_SHMMMAP
 | 
					#ifdef GRID_MPI3_SHMMMAP
 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
 | 
					  std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
 | 
				
			||||||
  assert(_ShmSetup==1);
 | 
					  assert(_ShmSetup==1);
 | 
				
			||||||
  assert(_ShmAlloc==0);
 | 
					  assert(_ShmAlloc==0);
 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -700,7 +781,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
    assert(((uint64_t)ptr&0x3F)==0);
 | 
					    assert(((uint64_t)ptr&0x3F)==0);
 | 
				
			||||||
    close(fd);
 | 
					    close(fd);
 | 
				
			||||||
    WorldShmCommBufs[r] =ptr;
 | 
					    WorldShmCommBufs[r] =ptr;
 | 
				
			||||||
    //    std::cout << header "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
					    //    std::cout << Mheader "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  _ShmAlloc=1;
 | 
					  _ShmAlloc=1;
 | 
				
			||||||
  _ShmAllocBytes  = bytes;
 | 
					  _ShmAllocBytes  = bytes;
 | 
				
			||||||
@@ -710,7 +791,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
#ifdef GRID_MPI3_SHM_NONE
 | 
					#ifdef GRID_MPI3_SHM_NONE
 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  std::cout << header "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
 | 
					  std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
 | 
				
			||||||
  assert(_ShmSetup==1);
 | 
					  assert(_ShmSetup==1);
 | 
				
			||||||
  assert(_ShmAlloc==0);
 | 
					  assert(_ShmAlloc==0);
 | 
				
			||||||
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -757,7 +838,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			|||||||
////////////////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
					void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
 | 
				
			||||||
{ 
 | 
					{ 
 | 
				
			||||||
  std::cout << header "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
 | 
					  std::cout << Mheader "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
 | 
				
			||||||
  assert(_ShmSetup==1);
 | 
					  assert(_ShmSetup==1);
 | 
				
			||||||
  assert(_ShmAlloc==0); 
 | 
					  assert(_ShmAlloc==0); 
 | 
				
			||||||
  MPI_Barrier(WorldShmComm);
 | 
					  MPI_Barrier(WorldShmComm);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -707,9 +707,9 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
 | 
				
			|||||||
  Coordinate ist = Tg->_istride;
 | 
					  Coordinate ist = Tg->_istride;
 | 
				
			||||||
  Coordinate ost = Tg->_ostride;
 | 
					  Coordinate ost = Tg->_ostride;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  autoView( t_v , To, AcceleratorWrite);
 | 
					  autoView( t_v , To, CpuWrite);
 | 
				
			||||||
  autoView( f_v , From, AcceleratorRead);
 | 
					  autoView( f_v , From, CpuRead);
 | 
				
			||||||
  accelerator_for(idx,Fg->lSites(),1,{
 | 
					  thread_for(idx,Fg->lSites(),{
 | 
				
			||||||
    sobj s;
 | 
					    sobj s;
 | 
				
			||||||
    Coordinate Fcoor(nd);
 | 
					    Coordinate Fcoor(nd);
 | 
				
			||||||
    Coordinate Tcoor(nd);
 | 
					    Coordinate Tcoor(nd);
 | 
				
			||||||
@@ -722,15 +722,20 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
 | 
				
			|||||||
      Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
 | 
					      Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if (in_region) {
 | 
					    if (in_region) {
 | 
				
			||||||
      Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
 | 
					#if 0      
 | 
				
			||||||
      Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
 | 
					      Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]); // inner index from
 | 
				
			||||||
      Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
 | 
					      Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]); // inner index to
 | 
				
			||||||
      Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
 | 
					      Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]); // outer index from
 | 
				
			||||||
      vector_type * fp = (vector_type *)&f_v[odx_f];
 | 
					      Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]); // outer index to
 | 
				
			||||||
      vector_type * tp = (vector_type *)&t_v[odx_t];
 | 
					      scalar_type * fp = (scalar_type *)&f_v[odx_f];
 | 
				
			||||||
 | 
					      scalar_type * tp = (scalar_type *)&t_v[odx_t];
 | 
				
			||||||
      for(int w=0;w<words;w++){
 | 
					      for(int w=0;w<words;w++){
 | 
				
			||||||
	tp[w].putlane(fp[w].getlane(idx_f),idx_t);
 | 
						tp[w].putlane(fp[w].getlane(idx_f),idx_t);
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					    peekLocalSite(s,f_v,Fcoor);
 | 
				
			||||||
 | 
					    pokeLocalSite(s,t_v,Tcoor);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										136
									
								
								Grid/lattice/PaddedCell.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								Grid/lattice/PaddedCell.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,136 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/lattice/PaddedCell.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2019
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle pboyle@bnl.gov
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PaddedCell {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  GridCartesian * unpadded_grid;
 | 
				
			||||||
 | 
					  int dims;
 | 
				
			||||||
 | 
					  int depth;
 | 
				
			||||||
 | 
					  std::vector<GridCartesian *> grids;
 | 
				
			||||||
 | 
					  ~PaddedCell()
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    DeleteGrids();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  PaddedCell(int _depth,GridCartesian *_grid)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    unpadded_grid = _grid;
 | 
				
			||||||
 | 
					    depth=_depth;
 | 
				
			||||||
 | 
					    dims=_grid->Nd();
 | 
				
			||||||
 | 
					    AllocateGrids();
 | 
				
			||||||
 | 
					    Coordinate local     =unpadded_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    for(int d=0;d<dims;d++){
 | 
				
			||||||
 | 
					      assert(local[d]>=depth);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void DeleteGrids(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    for(int d=0;d<grids.size();d++){
 | 
				
			||||||
 | 
					      delete grids[d];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    grids.resize(0);
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  void AllocateGrids(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    Coordinate local     =unpadded_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    Coordinate simd      =unpadded_grid->_simd_layout;
 | 
				
			||||||
 | 
					    Coordinate processors=unpadded_grid->_processors;
 | 
				
			||||||
 | 
					    Coordinate plocal    =unpadded_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    Coordinate global(dims);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // expand up one dim at a time
 | 
				
			||||||
 | 
					    for(int d=0;d<dims;d++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      plocal[d] += 2*depth; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for(int d=0;d<dims;d++){
 | 
				
			||||||
 | 
						global[d] = plocal[d]*processors[d];
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      grids.push_back(new GridCartesian(global,simd,processors));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template<class vobj>
 | 
				
			||||||
 | 
					  inline Lattice<vobj> Extract(Lattice<vobj> &in)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    Lattice<vobj> out(unpadded_grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Coordinate local     =unpadded_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    Coordinate fll(dims,depth); // depends on the MPI spread
 | 
				
			||||||
 | 
					    Coordinate tll(dims,0); // depends on the MPI spread
 | 
				
			||||||
 | 
					    localCopyRegion(in,out,fll,tll,local);
 | 
				
			||||||
 | 
					    return out;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  template<class vobj>
 | 
				
			||||||
 | 
					  inline Lattice<vobj> Exchange(Lattice<vobj> &in)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GridBase *old_grid = in.Grid();
 | 
				
			||||||
 | 
					    int dims = old_grid->Nd();
 | 
				
			||||||
 | 
					    Lattice<vobj> tmp = in;
 | 
				
			||||||
 | 
					    for(int d=0;d<dims;d++){
 | 
				
			||||||
 | 
					      tmp = Expand(d,tmp); // rvalue && assignment
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return tmp;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  // expand up one dim at a time
 | 
				
			||||||
 | 
					  template<class vobj>
 | 
				
			||||||
 | 
					  inline Lattice<vobj> Expand(int dim,Lattice<vobj> &in)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GridBase *old_grid = in.Grid();
 | 
				
			||||||
 | 
					    GridCartesian *new_grid = grids[dim];//These are new grids
 | 
				
			||||||
 | 
					    Lattice<vobj>  padded(new_grid);
 | 
				
			||||||
 | 
					    Lattice<vobj> shifted(old_grid);    
 | 
				
			||||||
 | 
					    Coordinate local     =old_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    Coordinate plocal    =new_grid->LocalDimensions();
 | 
				
			||||||
 | 
					    if(dim==0) conformable(old_grid,unpadded_grid);
 | 
				
			||||||
 | 
					    else       conformable(old_grid,grids[dim-1]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
 | 
				
			||||||
 | 
					    // Middle bit
 | 
				
			||||||
 | 
					    for(int x=0;x<local[dim];x++){
 | 
				
			||||||
 | 
					      InsertSliceLocal(in,padded,x,depth+x,dim);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // High bit
 | 
				
			||||||
 | 
					    shifted = Cshift(in,dim,depth);
 | 
				
			||||||
 | 
					    for(int x=0;x<depth;x++){
 | 
				
			||||||
 | 
					      InsertSliceLocal(shifted,padded,local[dim]-depth+x,depth+local[dim]+x,dim);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Low bit
 | 
				
			||||||
 | 
					    shifted = Cshift(in,dim,-depth);
 | 
				
			||||||
 | 
					    for(int x=0;x<depth;x++){
 | 
				
			||||||
 | 
					      InsertSliceLocal(shifted,padded,x,x,dim);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return padded;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -104,6 +104,7 @@ template<typename vtype> using iSpinMatrix                = iScalar<iMatrix<iSca
 | 
				
			|||||||
template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
 | 
					template<typename vtype> using iColourMatrix              = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
 | 
				
			||||||
template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
 | 
					template<typename vtype> using iSpinColourMatrix          = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
 | 
				
			||||||
template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
 | 
					template<typename vtype> using iLorentzColourMatrix       = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
 | 
				
			||||||
 | 
					template<typename vtype> using iLorentzComplex            = iVector<iScalar<iScalar<vtype> >, Nd > ;
 | 
				
			||||||
template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
 | 
					template<typename vtype> using iDoubleStoredColourMatrix  = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
 | 
				
			||||||
template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >;
 | 
					template<typename vtype> using iSpinVector                = iScalar<iVector<iScalar<vtype>, Ns> >;
 | 
				
			||||||
template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >;
 | 
					template<typename vtype> using iColourVector              = iScalar<iScalar<iVector<vtype, Nc> > >;
 | 
				
			||||||
@@ -178,6 +179,15 @@ typedef iLorentzColourMatrix<vComplexF>  vLorentzColourMatrixF;
 | 
				
			|||||||
typedef iLorentzColourMatrix<vComplexD>  vLorentzColourMatrixD;
 | 
					typedef iLorentzColourMatrix<vComplexD>  vLorentzColourMatrixD;
 | 
				
			||||||
typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
 | 
					typedef iLorentzColourMatrix<vComplexD2> vLorentzColourMatrixD2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// LorentzComplex
 | 
				
			||||||
 | 
					typedef iLorentzComplex<Complex  > LorentzComplex;
 | 
				
			||||||
 | 
					typedef iLorentzComplex<ComplexF > LorentzComplexF;
 | 
				
			||||||
 | 
					typedef iLorentzComplex<ComplexD > LorentzComplexD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef iLorentzComplex<vComplex > vLorentzComplex;
 | 
				
			||||||
 | 
					typedef iLorentzComplex<vComplexF> vLorentzComplexF;
 | 
				
			||||||
 | 
					typedef iLorentzComplex<vComplexD> vLorentzComplexD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// DoubleStored gauge field
 | 
					// DoubleStored gauge field
 | 
				
			||||||
typedef iDoubleStoredColourMatrix<Complex  > DoubleStoredColourMatrix;
 | 
					typedef iDoubleStoredColourMatrix<Complex  > DoubleStoredColourMatrix;
 | 
				
			||||||
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
 | 
					typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
 | 
				
			||||||
@@ -307,6 +317,10 @@ typedef Lattice<vLorentzColourMatrixF>  LatticeLorentzColourMatrixF;
 | 
				
			|||||||
typedef Lattice<vLorentzColourMatrixD>  LatticeLorentzColourMatrixD;
 | 
					typedef Lattice<vLorentzColourMatrixD>  LatticeLorentzColourMatrixD;
 | 
				
			||||||
typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
 | 
					typedef Lattice<vLorentzColourMatrixD2> LatticeLorentzColourMatrixD2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef Lattice<vLorentzComplex>  LatticeLorentzComplex;
 | 
				
			||||||
 | 
					typedef Lattice<vLorentzComplexF> LatticeLorentzComplexF;
 | 
				
			||||||
 | 
					typedef Lattice<vLorentzComplexD> LatticeLorentzComplexD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// DoubleStored gauge field
 | 
					// DoubleStored gauge field
 | 
				
			||||||
typedef Lattice<vDoubleStoredColourMatrix>   LatticeDoubleStoredColourMatrix;
 | 
					typedef Lattice<vDoubleStoredColourMatrix>   LatticeDoubleStoredColourMatrix;
 | 
				
			||||||
typedef Lattice<vDoubleStoredColourMatrixF>  LatticeDoubleStoredColourMatrixF;
 | 
					typedef Lattice<vDoubleStoredColourMatrixF>  LatticeDoubleStoredColourMatrixF;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,10 +34,24 @@ directory
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///////////////////////////////////
 | 
				
			||||||
 | 
					// Smart configuration base class
 | 
				
			||||||
 | 
					///////////////////////////////////
 | 
				
			||||||
 | 
					template< class Field >
 | 
				
			||||||
 | 
					class ConfigurationBase
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  ConfigurationBase() {}
 | 
				
			||||||
 | 
					  virtual ~ConfigurationBase() {}
 | 
				
			||||||
 | 
					  virtual void set_Field(Field& U) =0;
 | 
				
			||||||
 | 
					  virtual void smeared_force(Field&) = 0;
 | 
				
			||||||
 | 
					  virtual Field& get_SmearedU() =0;
 | 
				
			||||||
 | 
					  virtual Field &get_U(bool smeared = false) = 0;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class GaugeField >
 | 
					template <class GaugeField >
 | 
				
			||||||
class Action 
 | 
					class Action 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  bool is_smeared = false;
 | 
					  bool is_smeared = false;
 | 
				
			||||||
  RealD deriv_norm_sum;
 | 
					  RealD deriv_norm_sum;
 | 
				
			||||||
@@ -77,11 +91,39 @@ public:
 | 
				
			|||||||
  void refresh_timer_stop(void)  { refresh_us+=usecond(); }
 | 
					  void refresh_timer_stop(void)  { refresh_us+=usecond(); }
 | 
				
			||||||
  void S_timer_start(void)       { S_us-=usecond(); }
 | 
					  void S_timer_start(void)       { S_us-=usecond(); }
 | 
				
			||||||
  void S_timer_stop(void)        { S_us+=usecond(); }
 | 
					  void S_timer_stop(void)        { S_us+=usecond(); }
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
  // Heatbath?
 | 
					  // Heatbath?
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
  virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
 | 
					  virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
 | 
				
			||||||
  virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action
 | 
					  virtual RealD S(const GaugeField& U) = 0;                             // evaluate the action
 | 
				
			||||||
  virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ;  // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
 | 
					  virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ;  // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
 | 
				
			||||||
  virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0;        // evaluate the action derivative
 | 
					  virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0;        // evaluate the action derivative
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // virtual smeared interface through configuration container
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    refresh(U.get_U(is_smeared),sRNG,pRNG);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual RealD S(ConfigurationBase<GaugeField>& U)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    return S(U.get_U(is_smeared));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual RealD Sinitial(ConfigurationBase<GaugeField>& U) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    return Sinitial(U.get_U(is_smeared));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    deriv(U.get_U(is_smeared),dSdU); 
 | 
				
			||||||
 | 
					    if ( is_smeared ) {
 | 
				
			||||||
 | 
					      U.smeared_force(dSdU);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  ///////////////////////////////
 | 
				
			||||||
 | 
					  // Logging
 | 
				
			||||||
 | 
					  ///////////////////////////////
 | 
				
			||||||
  virtual std::string action_name()    = 0;                             // return the action name
 | 
					  virtual std::string action_name()    = 0;                             // return the action name
 | 
				
			||||||
  virtual std::string LogParameters()  = 0;                             // prints action parameters
 | 
					  virtual std::string LogParameters()  = 0;                             // prints action parameters
 | 
				
			||||||
  virtual ~Action(){}
 | 
					  virtual ~Action(){}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,6 +30,8 @@ directory
 | 
				
			|||||||
#ifndef QCD_ACTION_CORE
 | 
					#ifndef QCD_ACTION_CORE
 | 
				
			||||||
#define QCD_ACTION_CORE
 | 
					#define QCD_ACTION_CORE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/gauge/GaugeImplementations.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <Grid/qcd/action/ActionBase.h>
 | 
					#include <Grid/qcd/action/ActionBase.h>
 | 
				
			||||||
NAMESPACE_CHECK(ActionBase);
 | 
					NAMESPACE_CHECK(ActionBase);
 | 
				
			||||||
#include <Grid/qcd/action/ActionSet.h>
 | 
					#include <Grid/qcd/action/ActionSet.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -507,6 +507,7 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    this->face_table_computed=1;
 | 
					    this->face_table_computed=1;
 | 
				
			||||||
    assert(this->u_comm_offset==this->_unified_buffer_size);
 | 
					    assert(this->u_comm_offset==this->_unified_buffer_size);
 | 
				
			||||||
 | 
					    accelerator_barrier();
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
 | 
				
			|||||||
  /////////////////////////////
 | 
					  /////////////////////////////
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    GRID_TRACE("Gather");
 | 
					    GRID_TRACE("Gather");
 | 
				
			||||||
    st.HaloExchangeOptGather(in,compressor);
 | 
					    st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
 | 
				
			||||||
    accelerator_barrier();
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  std::vector<std::vector<CommsRequest_t> > requests;
 | 
					  std::vector<std::vector<CommsRequest_t> > requests;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -423,14 +423,14 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
 | 
				
			|||||||
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
 | 
					#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define KERNEL_CALL_EXT(A)						\
 | 
					#define KERNEL_CALL_EXT(A)						\
 | 
				
			||||||
  const uint64_t    NN = Nsite*Ls;					\
 | 
					 | 
				
			||||||
  const uint64_t    sz = st.surface_list.size();			\
 | 
					  const uint64_t    sz = st.surface_list.size();			\
 | 
				
			||||||
  auto ptr = &st.surface_list[0];					\
 | 
					  auto ptr = &st.surface_list[0];					\
 | 
				
			||||||
  accelerator_forNB( ss, sz, Simd::Nsimd(), {				\
 | 
					  accelerator_forNB( ss, sz, Simd::Nsimd(), {				\
 | 
				
			||||||
      int sF = ptr[ss];							\
 | 
					      int sF = ptr[ss];							\
 | 
				
			||||||
      int sU = ss/Ls;							\
 | 
					      int sU = sF/Ls;							\
 | 
				
			||||||
      WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\
 | 
					      WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\
 | 
				
			||||||
    });									
 | 
					    });									\
 | 
				
			||||||
 | 
					  accelerator_barrier();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ASM_CALL(A)							\
 | 
					#define ASM_CALL(A)							\
 | 
				
			||||||
  thread_for( sss, Nsite, {						\
 | 
					  thread_for( sss, Nsite, {						\
 | 
				
			||||||
@@ -474,9 +474,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField
 | 
				
			|||||||
     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteInt);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteInt);    return;}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
   } else if( exterior ) {
 | 
					   } else if( exterior ) {
 | 
				
			||||||
 | 
					     // dependent on result of merge
 | 
				
			||||||
     acceleratorFenceComputeStream();
 | 
					     acceleratorFenceComputeStream();
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteExt); return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt);    return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					#ifndef GRID_CUDA
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteExt);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteExt);    return;}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -506,9 +507,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st,  DoubledGaugeField
 | 
				
			|||||||
     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagInt);     return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagInt);     return;}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
   } else if( exterior ) {
 | 
					   } else if( exterior ) {
 | 
				
			||||||
 | 
					     // Dependent on result of merge
 | 
				
			||||||
     acceleratorFenceComputeStream();
 | 
					     acceleratorFenceComputeStream();
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptGeneric    ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt);    return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt);    return;}
 | 
				
			||||||
#ifndef GRID_CUDA
 | 
					#ifndef GRID_CUDA
 | 
				
			||||||
     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagExt);     return;}
 | 
					     if (Opt == WilsonKernelsStatic::OptInlineAsm  ) {  ASM_CALL(AsmDhopSiteDagExt);     return;}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -86,8 +86,13 @@ public:
 | 
				
			|||||||
    assert(ForceE.Checkerboard()==Even);
 | 
					    assert(ForceE.Checkerboard()==Even);
 | 
				
			||||||
    assert(ForceO.Checkerboard()==Odd);
 | 
					    assert(ForceO.Checkerboard()==Odd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_CUDA) || defined(GRID_HIP)  || defined(GRID_SYCL)
 | 
				
			||||||
 | 
					    acceleratorSetCheckerboard(Force,ForceE);
 | 
				
			||||||
 | 
					    acceleratorSetCheckerboard(Force,ForceO);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
    setCheckerboard(Force,ForceE); 
 | 
					    setCheckerboard(Force,ForceE); 
 | 
				
			||||||
    setCheckerboard(Force,ForceO);
 | 
					    setCheckerboard(Force,ForceO);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    Force=-Force;
 | 
					    Force=-Force;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    delete forcecb;
 | 
					    delete forcecb;
 | 
				
			||||||
@@ -130,8 +135,13 @@ public:
 | 
				
			|||||||
    assert(ForceE.Checkerboard()==Even);
 | 
					    assert(ForceE.Checkerboard()==Even);
 | 
				
			||||||
    assert(ForceO.Checkerboard()==Odd);
 | 
					    assert(ForceO.Checkerboard()==Odd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_CUDA) || defined(GRID_HIP)  || defined(GRID_SYCL)
 | 
				
			||||||
 | 
					    acceleratorSetCheckerboard(Force,ForceE);
 | 
				
			||||||
 | 
					    acceleratorSetCheckerboard(Force,ForceO);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
    setCheckerboard(Force,ForceE); 
 | 
					    setCheckerboard(Force,ForceE); 
 | 
				
			||||||
    setCheckerboard(Force,ForceO);
 | 
					    setCheckerboard(Force,ForceO);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    Force=-Force;
 | 
					    Force=-Force;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    delete forcecb;
 | 
					    delete forcecb;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,91 +38,73 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
    // cf. GeneralEvenOddRational.h for details
 | 
					    // cf. GeneralEvenOddRational.h for details
 | 
				
			||||||
    /////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    /////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    template<class ImplD, class ImplF, class ImplD2>
 | 
					    template<class ImplD, class ImplF>
 | 
				
			||||||
    class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
 | 
					    class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
 | 
				
			||||||
    private:
 | 
					    private:
 | 
				
			||||||
      typedef typename ImplD2::FermionField FermionFieldD2;
 | 
					 | 
				
			||||||
      typedef typename ImplD::FermionField FermionFieldD;
 | 
					      typedef typename ImplD::FermionField FermionFieldD;
 | 
				
			||||||
      typedef typename ImplF::FermionField FermionFieldF;
 | 
					      typedef typename ImplF::FermionField FermionFieldF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      FermionOperator<ImplD> & NumOpD;
 | 
					      FermionOperator<ImplD> & NumOpD;
 | 
				
			||||||
      FermionOperator<ImplD> & DenOpD;
 | 
					      FermionOperator<ImplD> & DenOpD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      FermionOperator<ImplD2> & NumOpD2;
 | 
					 | 
				
			||||||
      FermionOperator<ImplD2> & DenOpD2;
 | 
					 | 
				
			||||||
     
 | 
					 | 
				
			||||||
      FermionOperator<ImplF> & NumOpF;
 | 
					      FermionOperator<ImplF> & NumOpF;
 | 
				
			||||||
      FermionOperator<ImplF> & DenOpF;
 | 
					      FermionOperator<ImplF> & DenOpF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      Integer ReliableUpdateFreq;
 | 
					      Integer ReliableUpdateFreq;
 | 
				
			||||||
    protected:
 | 
					    protected:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      //Action evaluation
 | 
				
			||||||
      //Allow derived classes to override the multishift CG
 | 
					      //Allow derived classes to override the multishift CG
 | 
				
			||||||
      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
 | 
					      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
 | 
				
			||||||
#if 0
 | 
					#if 1
 | 
				
			||||||
	SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
 | 
						SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
 | 
				
			||||||
	ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
 | 
						ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
 | 
				
			||||||
	msCG(schurOp,in, out);
 | 
						msCG(schurOp,in, out);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
 | 
						SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
 | 
				
			||||||
	SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
 | 
						SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
 | 
				
			||||||
	FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
 | 
						FermionFieldD inD(NumOpD.FermionRedBlackGrid());
 | 
				
			||||||
	FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
 | 
						FermionFieldD outD(NumOpD.FermionRedBlackGrid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Action better with higher precision?
 | 
						// Action better with higher precision?
 | 
				
			||||||
	ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
 | 
						ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
 | 
				
			||||||
	precisionChange(inD2,in);
 | 
						msCG(schurOpD, in, out);
 | 
				
			||||||
	std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
 | 
					 | 
				
			||||||
	msCG(schurOpD2, inD2, outD2);
 | 
					 | 
				
			||||||
	precisionChange(out,outD2);
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					      //Force evaluation
 | 
				
			||||||
      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
 | 
					      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
 | 
				
			||||||
	SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
 | 
						SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
 | 
				
			||||||
	SchurDifferentiableOperator<ImplF>  schurOpF(numerator ? NumOpF  : DenOpF);
 | 
						SchurDifferentiableOperator<ImplF>  schurOpF(numerator ? NumOpF  : DenOpF);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
 | 
						FermionFieldD inD(NumOpD.FermionRedBlackGrid());
 | 
				
			||||||
	FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
 | 
						FermionFieldD outD(NumOpD.FermionRedBlackGrid());
 | 
				
			||||||
	std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
 | 
						std::vector<FermionFieldD> out_elemsD(out_elems.size(),NumOpD.FermionRedBlackGrid());
 | 
				
			||||||
	ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
 | 
						ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
 | 
				
			||||||
	precisionChange(inD2,in);
 | 
						msCG(schurOpD, in, out_elems, out);
 | 
				
			||||||
	std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
 | 
					 | 
				
			||||||
	msCG(schurOpD2, inD2, out_elemsD2, outD2);
 | 
					 | 
				
			||||||
	precisionChange(out,outD2);
 | 
					 | 
				
			||||||
	for(int i=0;i<out_elems.size();i++){
 | 
					 | 
				
			||||||
	  precisionChange(out_elems[i],out_elemsD2[i]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      //Allow derived classes to override the gauge import
 | 
					      //Allow derived classes to override the gauge import
 | 
				
			||||||
      virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
 | 
					      virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
 | 
						typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
 | 
				
			||||||
	typename ImplD2::GaugeField Ud2(NumOpD2.GaugeGrid());
 | 
					 | 
				
			||||||
	precisionChange(Uf, Ud);
 | 
						precisionChange(Uf, Ud);
 | 
				
			||||||
	precisionChange(Ud2, Ud);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
 | 
						std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl;
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	NumOpD.ImportGauge(Ud);
 | 
						NumOpD.ImportGauge(Ud);
 | 
				
			||||||
	DenOpD.ImportGauge(Ud);
 | 
						DenOpD.ImportGauge(Ud);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	NumOpF.ImportGauge(Uf);
 | 
						NumOpF.ImportGauge(Uf);
 | 
				
			||||||
	DenOpF.ImportGauge(Uf);
 | 
						DenOpF.ImportGauge(Uf);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	NumOpD2.ImportGauge(Ud2);
 | 
					 | 
				
			||||||
	DenOpD2.ImportGauge(Ud2);
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
      GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD>  &_NumOpD, FermionOperator<ImplD>  &_DenOpD, 
 | 
					      GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD>  &_NumOpD, FermionOperator<ImplD>  &_DenOpD, 
 | 
				
			||||||
							      FermionOperator<ImplF>  &_NumOpF, FermionOperator<ImplF>  &_DenOpF, 
 | 
												      FermionOperator<ImplF>  &_NumOpF, FermionOperator<ImplF>  &_DenOpF, 
 | 
				
			||||||
							      FermionOperator<ImplD2>  &_NumOpD2, FermionOperator<ImplD2>  &_DenOpD2, 
 | 
					 | 
				
			||||||
							      const RationalActionParams & p, Integer _ReliableUpdateFreq
 | 
												      const RationalActionParams & p, Integer _ReliableUpdateFreq
 | 
				
			||||||
							      ) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
 | 
												      ) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
 | 
				
			||||||
								  ReliableUpdateFreq(_ReliableUpdateFreq),
 | 
													  ReliableUpdateFreq(_ReliableUpdateFreq),
 | 
				
			||||||
								  NumOpD(_NumOpD), DenOpD(_DenOpD),
 | 
													  NumOpD(_NumOpD), DenOpD(_DenOpD),
 | 
				
			||||||
								  NumOpF(_NumOpF), DenOpF(_DenOpF),
 | 
													  NumOpF(_NumOpF), DenOpF(_DenOpF)
 | 
				
			||||||
								  NumOpD2(_NumOpD2), DenOpD2(_DenOpD2)
 | 
					 | 
				
			||||||
      {}
 | 
					      {}
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}
 | 
					      virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -67,9 +67,9 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}      
 | 
					      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}      
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class Impl,class ImplF,class ImplD2>
 | 
					    template<class Impl,class ImplF>
 | 
				
			||||||
    class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction
 | 
					    class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction
 | 
				
			||||||
      : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2> {
 | 
					      : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
      typedef OneFlavourRationalParams Params;
 | 
					      typedef OneFlavourRationalParams Params;
 | 
				
			||||||
    private:
 | 
					    private:
 | 
				
			||||||
@@ -91,11 +91,9 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
								 FermionOperator<Impl>  &_DenOp, 
 | 
													 FermionOperator<Impl>  &_DenOp, 
 | 
				
			||||||
								 FermionOperator<ImplF>  &_NumOpF, 
 | 
													 FermionOperator<ImplF>  &_NumOpF, 
 | 
				
			||||||
								 FermionOperator<ImplF>  &_DenOpF, 
 | 
													 FermionOperator<ImplF>  &_DenOpF, 
 | 
				
			||||||
								 FermionOperator<ImplD2>  &_NumOpD2, 
 | 
					 | 
				
			||||||
								 FermionOperator<ImplD2>  &_DenOpD2, 
 | 
					 | 
				
			||||||
								 const Params & p, Integer ReliableUpdateFreq
 | 
													 const Params & p, Integer ReliableUpdateFreq
 | 
				
			||||||
							) : 
 | 
												) : 
 | 
				
			||||||
	GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF,ImplD2>(_NumOp, _DenOp,_NumOpF, _DenOpF,_NumOpD2, _DenOpD2, transcribe(p),ReliableUpdateFreq){}
 | 
						GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}      
 | 
					      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}      
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -207,20 +207,27 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
        //X = (Mdag M)^-1 V^dag phi
 | 
					        //X = (Mdag M)^-1 V^dag phi
 | 
				
			||||||
        //Y = (Mdag)^-1 V^dag  phi
 | 
					        //Y = (Mdag)^-1 V^dag  phi
 | 
				
			||||||
        Vpc.MpcDag(PhiOdd,Y);          // Y= Vdag phi
 | 
					        Vpc.MpcDag(PhiOdd,Y);          // Y= Vdag phi
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
 | 
				
			||||||
        X=Zero();
 | 
					        X=Zero();
 | 
				
			||||||
        DerivativeSolver(Mpc,Y,X);     // X= (MdagM)^-1 Vdag phi
 | 
					        DerivativeSolver(Mpc,Y,X);     // X= (MdagM)^-1 Vdag phi
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" X "<<norm2(X)<<std::endl;
 | 
				
			||||||
        Mpc.Mpc(X,Y);                  // Y=  Mdag^-1 Vdag phi
 | 
					        Mpc.Mpc(X,Y);                  // Y=  Mdag^-1 Vdag phi
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" Y "<<norm2(Y)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // phi^dag V (Mdag M)^-1 dV^dag  phi
 | 
					        // phi^dag V (Mdag M)^-1 dV^dag  phi
 | 
				
			||||||
        Vpc.MpcDagDeriv(force , X, PhiOdd );   dSdU = force;
 | 
					        Vpc.MpcDagDeriv(force , X, PhiOdd );   dSdU = force;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
        // phi^dag dV (Mdag M)^-1 V^dag  phi
 | 
					        // phi^dag dV (Mdag M)^-1 V^dag  phi
 | 
				
			||||||
        Vpc.MpcDeriv(force , PhiOdd, X );      dSdU = dSdU+force;
 | 
					        Vpc.MpcDeriv(force , PhiOdd, X );      dSdU = dSdU+force;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        //    -    phi^dag V (Mdag M)^-1 Mdag dM   (Mdag M)^-1 V^dag  phi
 | 
					        //    -    phi^dag V (Mdag M)^-1 Mdag dM   (Mdag M)^-1 V^dag  phi
 | 
				
			||||||
        //    -    phi^dag V (Mdag M)^-1 dMdag M   (Mdag M)^-1 V^dag  phi
 | 
					        //    -    phi^dag V (Mdag M)^-1 dMdag M   (Mdag M)^-1 V^dag  phi
 | 
				
			||||||
        Mpc.MpcDeriv(force,Y,X);              dSdU = dSdU-force;
 | 
					        Mpc.MpcDeriv(force,Y,X);              dSdU = dSdU-force;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
 | 
				
			||||||
        Mpc.MpcDagDeriv(force,X,Y);           dSdU = dSdU-force;
 | 
					        Mpc.MpcDagDeriv(force,X,Y);           dSdU = dSdU-force;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" deriv "<<norm2(force)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // FIXME No force contribution from EvenEven assumed here
 | 
					        // FIXME No force contribution from EvenEven assumed here
 | 
				
			||||||
        // Needs a fix for clover.
 | 
					        // Needs a fix for clover.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -284,11 +284,12 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      TheIntegrator.print_timer();
 | 
					      TheIntegrator.print_timer();
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 | 
					      TheIntegrator.Smearer.set_Field(Ucur);
 | 
				
			||||||
      for (int obs = 0; obs < Observables.size(); obs++) {
 | 
					      for (int obs = 0; obs < Observables.size(); obs++) {
 | 
				
			||||||
      	std::cout << GridLogDebug << "Observables # " << obs << std::endl;
 | 
					      	std::cout << GridLogDebug << "Observables # " << obs << std::endl;
 | 
				
			||||||
      	std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl;
 | 
					      	std::cout << GridLogDebug << "Observables total " << Observables.size() << std::endl;
 | 
				
			||||||
      	std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
 | 
					      	std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
 | 
				
			||||||
        Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
 | 
					        Observables[obs]->TrajectoryComplete(traj + 1, TheIntegrator.Smearer, sRNG, pRNG);
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
 | 
					      std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,13 +35,16 @@ class CheckpointerParameters : Serializable {
 | 
				
			|||||||
public:
 | 
					public:
 | 
				
			||||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters, 
 | 
					  GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters, 
 | 
				
			||||||
				  std::string, config_prefix, 
 | 
									  std::string, config_prefix, 
 | 
				
			||||||
 | 
									  std::string, smeared_prefix, 
 | 
				
			||||||
				  std::string, rng_prefix, 
 | 
									  std::string, rng_prefix, 
 | 
				
			||||||
				  int, saveInterval, 
 | 
									  int, saveInterval, 
 | 
				
			||||||
 | 
									  bool, saveSmeared, 
 | 
				
			||||||
				  std::string, format, );
 | 
									  std::string, format, );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng",
 | 
					  CheckpointerParameters(std::string cf = "cfg", std::string sf="cfg_smr" , std::string rn = "rng",
 | 
				
			||||||
			 int savemodulo = 1, const std::string &f = "IEEE64BIG")
 | 
								 int savemodulo = 1, const std::string &f = "IEEE64BIG")
 | 
				
			||||||
    : config_prefix(cf),
 | 
					    : config_prefix(cf),
 | 
				
			||||||
 | 
					      smeared_prefix(sf),
 | 
				
			||||||
      rng_prefix(rn),
 | 
					      rng_prefix(rn),
 | 
				
			||||||
      saveInterval(savemodulo),
 | 
					      saveInterval(savemodulo),
 | 
				
			||||||
      format(f){};
 | 
					      format(f){};
 | 
				
			||||||
@@ -61,13 +64,21 @@ template <class Impl>
 | 
				
			|||||||
class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
 | 
					class BaseHmcCheckpointer : public HmcObservable<typename Impl::Field> {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  void build_filenames(int traj, CheckpointerParameters &Params,
 | 
					  void build_filenames(int traj, CheckpointerParameters &Params,
 | 
				
			||||||
                       std::string &conf_file, std::string &rng_file) {
 | 
					                       std::string &conf_file,
 | 
				
			||||||
 | 
					                       std::string &smear_file,
 | 
				
			||||||
 | 
							       std::string &rng_file) {
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      std::ostringstream os;
 | 
					      std::ostringstream os;
 | 
				
			||||||
      os << Params.rng_prefix << "." << traj;
 | 
					      os << Params.rng_prefix << "." << traj;
 | 
				
			||||||
      rng_file = os.str();
 | 
					      rng_file = os.str();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      std::ostringstream os;
 | 
				
			||||||
 | 
					      os << Params.smeared_prefix << "." << traj;
 | 
				
			||||||
 | 
					      smear_file = os.str();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      std::ostringstream os;
 | 
					      std::ostringstream os;
 | 
				
			||||||
      os << Params.config_prefix << "." << traj;
 | 
					      os << Params.config_prefix << "." << traj;
 | 
				
			||||||
@@ -84,6 +95,11 @@ public:
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  virtual void initialize(const CheckpointerParameters &Params) = 0;
 | 
					  virtual void initialize(const CheckpointerParameters &Params) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void TrajectoryComplete(int traj,
 | 
				
			||||||
 | 
					                                  typename Impl::Field &U,
 | 
				
			||||||
 | 
					                                  GridSerialRNG &sRNG,
 | 
				
			||||||
 | 
					                                  GridParallelRNG &pRNG) { assert(0); } ; // HMC should pass the smart config with smeared and unsmeared
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  virtual void CheckpointRestore(int traj, typename Impl::Field &U,
 | 
					  virtual void CheckpointRestore(int traj, typename Impl::Field &U,
 | 
				
			||||||
                                 GridSerialRNG &sRNG,
 | 
					                                 GridSerialRNG &sRNG,
 | 
				
			||||||
                                 GridParallelRNG &pRNG) = 0;
 | 
					                                 GridParallelRNG &pRNG) = 0;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -61,11 +61,14 @@ public:
 | 
				
			|||||||
    fout.close();
 | 
					    fout.close();
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
 | 
					  void TrajectoryComplete(int traj,
 | 
				
			||||||
 | 
								  ConfigurationBase<Field> &SmartConfig,
 | 
				
			||||||
 | 
								  GridSerialRNG &sRNG, GridParallelRNG &pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if ((traj % Params.saveInterval) == 0) {
 | 
					    if ((traj % Params.saveInterval) == 0) {
 | 
				
			||||||
      std::string config, rng;
 | 
					      std::string config, rng, smr;
 | 
				
			||||||
      this->build_filenames(traj, Params, config, rng);
 | 
					      this->build_filenames(traj, Params, config, smr, rng);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      uint32_t nersc_csum;
 | 
					      uint32_t nersc_csum;
 | 
				
			||||||
      uint32_t scidac_csuma;
 | 
					      uint32_t scidac_csuma;
 | 
				
			||||||
@@ -74,9 +77,15 @@ public:
 | 
				
			|||||||
      BinarySimpleUnmunger<sobj_double, sobj> munge;
 | 
					      BinarySimpleUnmunger<sobj_double, sobj> munge;
 | 
				
			||||||
      truncate(rng);
 | 
					      truncate(rng);
 | 
				
			||||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
					      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
				
			||||||
      truncate(config);
 | 
					      std::cout << GridLogMessage << "Written Binary RNG " << rng
 | 
				
			||||||
 | 
					                << " checksum " << std::hex 
 | 
				
			||||||
 | 
							<< nersc_csum   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csuma   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csumb 
 | 
				
			||||||
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format,
 | 
					      truncate(config);
 | 
				
			||||||
 | 
					      BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(false), config, munge, 0, Params.format,
 | 
				
			||||||
						      nersc_csum,scidac_csuma,scidac_csumb);
 | 
											      nersc_csum,scidac_csuma,scidac_csumb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      std::cout << GridLogMessage << "Written Binary Configuration " << config
 | 
					      std::cout << GridLogMessage << "Written Binary Configuration " << config
 | 
				
			||||||
@@ -85,6 +94,18 @@ public:
 | 
				
			|||||||
		<< scidac_csuma   <<"/"
 | 
							<< scidac_csuma   <<"/"
 | 
				
			||||||
		<< scidac_csumb 
 | 
							<< scidac_csumb 
 | 
				
			||||||
		<< std::dec << std::endl;
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( Params.saveSmeared ) {
 | 
				
			||||||
 | 
						truncate(smr);
 | 
				
			||||||
 | 
						BinaryIO::writeLatticeObject<vobj, sobj_double>(SmartConfig.get_U(true), smr, munge, 0, Params.format,
 | 
				
			||||||
 | 
												nersc_csum,scidac_csuma,scidac_csumb);
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << "Written Binary Smeared Configuration " << smr
 | 
				
			||||||
 | 
					                << " checksum " << std::hex 
 | 
				
			||||||
 | 
							<< nersc_csum   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csuma   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csumb 
 | 
				
			||||||
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -69,17 +69,27 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
					  void TrajectoryComplete(int traj,
 | 
				
			||||||
 | 
								  ConfigurationBase<GaugeField> &SmartConfig,
 | 
				
			||||||
 | 
								  GridSerialRNG &sRNG,
 | 
				
			||||||
                          GridParallelRNG &pRNG) {
 | 
					                          GridParallelRNG &pRNG) {
 | 
				
			||||||
    if ((traj % Params.saveInterval) == 0) {
 | 
					    if ((traj % Params.saveInterval) == 0) {
 | 
				
			||||||
      std::string config, rng;
 | 
					      std::string config, rng, smr;
 | 
				
			||||||
      this->build_filenames(traj, Params, config, rng);
 | 
					      this->build_filenames(traj, Params, config, rng);
 | 
				
			||||||
      GridBase *grid = U.Grid();
 | 
					      GridBase *grid = SmartConfig.get_U(false).Grid();
 | 
				
			||||||
      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
					      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
				
			||||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
					      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Written BINARY RNG " << rng
 | 
				
			||||||
 | 
					                << " checksum " << std::hex 
 | 
				
			||||||
 | 
							<< nersc_csum<<"/"
 | 
				
			||||||
 | 
							<< scidac_csuma<<"/"
 | 
				
			||||||
 | 
							<< scidac_csumb
 | 
				
			||||||
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
      IldgWriter _IldgWriter(grid->IsBoss());
 | 
					      IldgWriter _IldgWriter(grid->IsBoss());
 | 
				
			||||||
      _IldgWriter.open(config);
 | 
					      _IldgWriter.open(config);
 | 
				
			||||||
      _IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
 | 
					      _IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(false), traj, config, config);
 | 
				
			||||||
      _IldgWriter.close();
 | 
					      _IldgWriter.close();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      std::cout << GridLogMessage << "Written ILDG Configuration on " << config
 | 
					      std::cout << GridLogMessage << "Written ILDG Configuration on " << config
 | 
				
			||||||
@@ -88,6 +98,21 @@ public:
 | 
				
			|||||||
		<< scidac_csuma<<"/"
 | 
							<< scidac_csuma<<"/"
 | 
				
			||||||
		<< scidac_csumb
 | 
							<< scidac_csumb
 | 
				
			||||||
		<< std::dec << std::endl;
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( Params.saveSmeared ) { 
 | 
				
			||||||
 | 
						IldgWriter _IldgWriter(grid->IsBoss());
 | 
				
			||||||
 | 
						_IldgWriter.open(smr);
 | 
				
			||||||
 | 
						_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, config, config);
 | 
				
			||||||
 | 
						_IldgWriter.close();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << "Written ILDG Configuration on " << smr
 | 
				
			||||||
 | 
					                << " checksum " << std::hex 
 | 
				
			||||||
 | 
							<< nersc_csum<<"/"
 | 
				
			||||||
 | 
							<< scidac_csuma<<"/"
 | 
				
			||||||
 | 
							<< scidac_csumb
 | 
				
			||||||
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,23 +52,29 @@ public:
 | 
				
			|||||||
    Params.format = "IEEE64BIG";  // fixed, overwrite any other choice
 | 
					    Params.format = "IEEE64BIG";  // fixed, overwrite any other choice
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
					  virtual void TrajectoryComplete(int traj,
 | 
				
			||||||
                          GridParallelRNG &pRNG) {
 | 
					                                  ConfigurationBase<GaugeField> &SmartConfig,
 | 
				
			||||||
 | 
					                                  GridSerialRNG &sRNG,
 | 
				
			||||||
 | 
					                                  GridParallelRNG &pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    if ((traj % Params.saveInterval) == 0) {
 | 
					    if ((traj % Params.saveInterval) == 0) {
 | 
				
			||||||
      std::string config, rng;
 | 
					      std::string config, rng, smr;
 | 
				
			||||||
      this->build_filenames(traj, Params, config, rng);
 | 
					      this->build_filenames(traj, Params, config, smr, rng);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      int precision32 = 1;
 | 
					      int precision32 = 1;
 | 
				
			||||||
      int tworow = 0;
 | 
					      int tworow = 0;
 | 
				
			||||||
      NerscIO::writeRNGState(sRNG, pRNG, rng);
 | 
					      NerscIO::writeRNGState(sRNG, pRNG, rng);
 | 
				
			||||||
      NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
 | 
					      NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(false), config, tworow, precision32);
 | 
				
			||||||
 | 
					      if ( Params.saveSmeared ) {
 | 
				
			||||||
 | 
						NerscIO::writeConfiguration<GaugeStats>(SmartConfig.get_U(true), smr, tworow, precision32);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
					  void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
 | 
				
			||||||
                         GridParallelRNG &pRNG) {
 | 
					                         GridParallelRNG &pRNG) {
 | 
				
			||||||
    std::string config, rng;
 | 
					    std::string config, rng, smr;
 | 
				
			||||||
    this->build_filenames(traj, Params, config, rng);
 | 
					    this->build_filenames(traj, Params, config, smr, rng );
 | 
				
			||||||
    this->check_filename(rng);
 | 
					    this->check_filename(rng);
 | 
				
			||||||
    this->check_filename(config);
 | 
					    this->check_filename(config);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -70,19 +70,37 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG,
 | 
					  void TrajectoryComplete(int traj, 
 | 
				
			||||||
 | 
								  ConfigurationBase<Field> &SmartConfig,
 | 
				
			||||||
 | 
								  GridSerialRNG &sRNG,
 | 
				
			||||||
                          GridParallelRNG &pRNG) {
 | 
					                          GridParallelRNG &pRNG) {
 | 
				
			||||||
    if ((traj % Params.saveInterval) == 0) {
 | 
					    if ((traj % Params.saveInterval) == 0) {
 | 
				
			||||||
      std::string config, rng;
 | 
					      std::string config, rng,smr;
 | 
				
			||||||
      this->build_filenames(traj, Params, config, rng);
 | 
					      this->build_filenames(traj, Params, config, smr, rng);
 | 
				
			||||||
      GridBase *grid = U.Grid();
 | 
					      GridBase *grid = SmartConfig.get_U(false).Grid();
 | 
				
			||||||
      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
					      uint32_t nersc_csum,scidac_csuma,scidac_csumb;
 | 
				
			||||||
      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
					      BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "Written Binary RNG " << rng
 | 
				
			||||||
 | 
					                << " checksum " << std::hex 
 | 
				
			||||||
 | 
							<< nersc_csum   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csuma   <<"/"
 | 
				
			||||||
 | 
							<< scidac_csumb 
 | 
				
			||||||
 | 
							<< std::dec << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
	ScidacWriter _ScidacWriter(grid->IsBoss());
 | 
						ScidacWriter _ScidacWriter(grid->IsBoss());
 | 
				
			||||||
	_ScidacWriter.open(config);
 | 
						_ScidacWriter.open(config);
 | 
				
			||||||
      _ScidacWriter.writeScidacFieldRecord(U, MData);
 | 
						_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(false), MData);
 | 
				
			||||||
	_ScidacWriter.close();
 | 
						_ScidacWriter.close();
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 | 
					      if ( Params.saveSmeared ) {
 | 
				
			||||||
 | 
						ScidacWriter _ScidacWriter(grid->IsBoss());
 | 
				
			||||||
 | 
						_ScidacWriter.open(smr);
 | 
				
			||||||
 | 
						_ScidacWriter.writeScidacFieldRecord(SmartConfig.get_U(true), MData);
 | 
				
			||||||
 | 
						_ScidacWriter.close();
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
 | 
					      std::cout << GridLogMessage << "Written Scidac Configuration on " << config << std::endl;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -66,6 +66,7 @@ public:
 | 
				
			|||||||
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
 | 
					template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
 | 
				
			||||||
class Integrator {
 | 
					class Integrator {
 | 
				
			||||||
protected:
 | 
					protected:
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
  typedef FieldImplementation_ FieldImplementation;
 | 
					  typedef FieldImplementation_ FieldImplementation;
 | 
				
			||||||
  typedef typename FieldImplementation::Field MomentaField;  //for readability
 | 
					  typedef typename FieldImplementation::Field MomentaField;  //for readability
 | 
				
			||||||
  typedef typename FieldImplementation::Field Field;
 | 
					  typedef typename FieldImplementation::Field Field;
 | 
				
			||||||
@@ -96,7 +97,6 @@ protected:
 | 
				
			|||||||
  {
 | 
					  {
 | 
				
			||||||
    t_P[level] += ep;
 | 
					    t_P[level] += ep;
 | 
				
			||||||
    update_P(P, U, level, ep);
 | 
					    update_P(P, U, level, ep);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
 | 
					    std::cout << GridLogIntegrator << "[" << level << "] P " << " dt " << ep << " : t_P " << t_P[level] << std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -130,28 +130,20 @@ protected:
 | 
				
			|||||||
      Field force(U.Grid());
 | 
					      Field force(U.Grid());
 | 
				
			||||||
      conformable(U.Grid(), Mom.Grid());
 | 
					      conformable(U.Grid(), Mom.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
 | 
					 | 
				
			||||||
      double start_force = usecond();
 | 
					      double start_force = usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] before"<<std::endl;
 | 
					 | 
				
			||||||
      
 | 
					 | 
				
			||||||
      as[level].actions.at(a)->deriv_timer_start();
 | 
					      as[level].actions.at(a)->deriv_timer_start();
 | 
				
			||||||
      as[level].actions.at(a)->deriv(Us, force);  // deriv should NOT include Ta
 | 
					      as[level].actions.at(a)->deriv(Smearer, force);  // deriv should NOT include Ta
 | 
				
			||||||
      as[level].actions.at(a)->deriv_timer_stop();
 | 
					      as[level].actions.at(a)->deriv_timer_stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] after"<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
 | 
					 | 
				
			||||||
      auto name = as[level].actions.at(a)->action_name();
 | 
					      auto name = as[level].actions.at(a)->action_name();
 | 
				
			||||||
      if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      force = FieldImplementation::projectForce(force); // Ta for gauge fields
 | 
					      force = FieldImplementation::projectForce(force); // Ta for gauge fields
 | 
				
			||||||
      double end_force = usecond();
 | 
					      double end_force = usecond();
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      //      DumpSliceNorm("force ",force,Nd-1);
 | 
					 | 
				
			||||||
      MomFilter->applyFilter(force);
 | 
					      MomFilter->applyFilter(force);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<<  std::endl;
 | 
					      std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<<" dt "<<ep<<  std::endl;
 | 
				
			||||||
      DumpSliceNorm("force filtered ",force,Nd-1);
 | 
					 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      Real force_abs   = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm.  nb. norm2(latt) = \sum_x norm2(latt[x]) 
 | 
					      Real force_abs   = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm.  nb. norm2(latt) = \sum_x norm2(latt[x]) 
 | 
				
			||||||
      Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;    
 | 
					      Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;    
 | 
				
			||||||
@@ -377,14 +369,9 @@ public:
 | 
				
			|||||||
	auto name = as[level].actions.at(actionID)->action_name();
 | 
						auto name = as[level].actions.at(actionID)->action_name();
 | 
				
			||||||
        std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
 | 
					        std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] before"<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	as[level].actions.at(actionID)->refresh_timer_start();
 | 
						as[level].actions.at(actionID)->refresh_timer_start();
 | 
				
			||||||
        as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
 | 
					        as[level].actions.at(actionID)->refresh(Smearer, sRNG, pRNG);
 | 
				
			||||||
	as[level].actions.at(actionID)->refresh_timer_stop();
 | 
						as[level].actions.at(actionID)->refresh_timer_stop();
 | 
				
			||||||
	std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] after"<<std::endl;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -425,10 +412,9 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // get gauge field from the SmearingPolicy and
 | 
					        // get gauge field from the SmearingPolicy and
 | 
				
			||||||
        // based on the boolean is_smeared in actionID
 | 
					        // based on the boolean is_smeared in actionID
 | 
				
			||||||
        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
 | 
					 | 
				
			||||||
        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
 | 
					        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
 | 
				
			||||||
	        as[level].actions.at(actionID)->S_timer_start();
 | 
						        as[level].actions.at(actionID)->S_timer_start();
 | 
				
			||||||
        Hterm = as[level].actions.at(actionID)->S(Us);
 | 
					        Hterm = as[level].actions.at(actionID)->S(Smearer);
 | 
				
			||||||
   	        as[level].actions.at(actionID)->S_timer_stop();
 | 
					   	        as[level].actions.at(actionID)->S_timer_stop();
 | 
				
			||||||
        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
 | 
					        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
 | 
				
			||||||
        H += Hterm;
 | 
					        H += Hterm;
 | 
				
			||||||
@@ -469,11 +455,10 @@ public:
 | 
				
			|||||||
      for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
 | 
					      for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
 | 
				
			||||||
        // get gauge field from the SmearingPolicy and
 | 
					        // get gauge field from the SmearingPolicy and
 | 
				
			||||||
        // based on the boolean is_smeared in actionID
 | 
					        // based on the boolean is_smeared in actionID
 | 
				
			||||||
        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
 | 
					 | 
				
			||||||
        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
 | 
					        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
 | 
				
			||||||
	        as[level].actions.at(actionID)->S_timer_start();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Hterm = as[level].actions.at(actionID)->Sinitial(Us);
 | 
						as[level].actions.at(actionID)->S_timer_start();
 | 
				
			||||||
 | 
					        Hterm = as[level].actions.at(actionID)->S(Smearer);
 | 
				
			||||||
	as[level].actions.at(actionID)->S_timer_stop();
 | 
						as[level].actions.at(actionID)->S_timer_stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
 | 
					        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,13 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
template <class Field>
 | 
					template <class Field>
 | 
				
			||||||
class HmcObservable {
 | 
					class HmcObservable {
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual void TrajectoryComplete(int traj,
 | 
				
			||||||
 | 
					                                  ConfigurationBase<Field> &SmartConfig,
 | 
				
			||||||
 | 
					                                  GridSerialRNG &sRNG,
 | 
				
			||||||
 | 
					                                  GridParallelRNG &pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  virtual void TrajectoryComplete(int traj,
 | 
					  virtual void TrajectoryComplete(int traj,
 | 
				
			||||||
                                  Field &U,
 | 
					                                  Field &U,
 | 
				
			||||||
                                  GridSerialRNG &sRNG,
 | 
					                                  GridSerialRNG &sRNG,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,6 +42,18 @@ public:
 | 
				
			|||||||
  // necessary for HmcObservable compatibility
 | 
					  // necessary for HmcObservable compatibility
 | 
				
			||||||
  typedef typename Impl::Field Field;
 | 
					  typedef typename Impl::Field Field;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void TrajectoryComplete(int traj,
 | 
				
			||||||
 | 
					                                  ConfigurationBase<Field> &SmartConfig,
 | 
				
			||||||
 | 
					                                  GridSerialRNG &sRNG,
 | 
				
			||||||
 | 
					                                  GridParallelRNG &pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Unsmeared plaquette"<<std::endl;
 | 
				
			||||||
 | 
					    TrajectoryComplete(traj,SmartConfig.get_U(false),sRNG,pRNG); // Unsmeared observable
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Smeared plaquette"<<std::endl;
 | 
				
			||||||
 | 
					    TrajectoryComplete(traj,SmartConfig.get_U(true),sRNG,pRNG); // Unsmeared observable
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "+++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
  void TrajectoryComplete(int traj,
 | 
					  void TrajectoryComplete(int traj,
 | 
				
			||||||
                          Field &U,
 | 
					                          Field &U,
 | 
				
			||||||
                          GridSerialRNG &sRNG,
 | 
					                          GridSerialRNG &sRNG,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,26 +7,27 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//trivial class for no smearing
 | 
					//trivial class for no smearing
 | 
				
			||||||
template< class Impl >
 | 
					template< class Impl >
 | 
				
			||||||
class NoSmearing
 | 
					class NoSmearing : public ConfigurationBase<typename Impl::Field>
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  INHERIT_FIELD_TYPES(Impl);
 | 
					  INHERIT_FIELD_TYPES(Impl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Field* ThinField;
 | 
					  Field* ThinLinks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  NoSmearing(): ThinField(NULL) {}
 | 
					  NoSmearing(): ThinLinks(NULL) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void set_Field(Field& U) { ThinField = &U; }
 | 
					  virtual void set_Field(Field& U) { ThinLinks = &U; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void smeared_force(Field&) const {}
 | 
					  virtual void smeared_force(Field&) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Field& get_SmearedU() { return *ThinField; }
 | 
					  virtual Field& get_SmearedU() { return *ThinLinks; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Field &get_U(bool smeared = false)
 | 
					  virtual Field &get_U(bool smeared = false)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    return *ThinField;
 | 
					    return *ThinLinks;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -42,19 +43,24 @@ public:
 | 
				
			|||||||
  It stores a list of smeared configurations.
 | 
					  It stores a list of smeared configurations.
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
template <class Gimpl>
 | 
					template <class Gimpl>
 | 
				
			||||||
class SmearedConfiguration
 | 
					class SmearedConfiguration : public ConfigurationBase<typename Gimpl::Field>
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  INHERIT_GIMPL_TYPES(Gimpl);
 | 
					  INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					protected:
 | 
				
			||||||
  const unsigned int smearingLevels;
 | 
					  const unsigned int smearingLevels;
 | 
				
			||||||
  Smear_Stout<Gimpl> *StoutSmearing;
 | 
					  Smear_Stout<Gimpl> *StoutSmearing;
 | 
				
			||||||
  std::vector<GaugeField> SmearedSet;
 | 
					  std::vector<GaugeField> SmearedSet;
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  GaugeField*  ThinLinks; /* Pointer to the thin links configuration */ // move to base???
 | 
				
			||||||
 | 
					protected:
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // Member functions
 | 
					  // Member functions
 | 
				
			||||||
  //====================================================================
 | 
					  //====================================================================
 | 
				
			||||||
  void fill_smearedSet(GaugeField &U)
 | 
					
 | 
				
			||||||
 | 
					  // Overridden in masked version
 | 
				
			||||||
 | 
					  virtual void fill_smearedSet(GaugeField &U)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    ThinLinks = &U;  // attach the smearing routine to the field U
 | 
					    ThinLinks = &U;  // attach the smearing routine to the field U
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -82,8 +88,9 @@ private:
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //====================================================================
 | 
					
 | 
				
			||||||
  GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
 | 
					  //overridden in masked verson
 | 
				
			||||||
 | 
					  virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
 | 
				
			||||||
					  const GaugeField& GaugeK) const 
 | 
										  const GaugeField& GaugeK) const 
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    GridBase* grid = GaugeK.Grid();
 | 
					    GridBase* grid = GaugeK.Grid();
 | 
				
			||||||
@@ -213,8 +220,6 @@ private:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  //====================================================================
 | 
					  //====================================================================
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  GaugeField*
 | 
					 | 
				
			||||||
      ThinLinks; /* Pointer to the thin links configuration */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /* Standard constructor */
 | 
					  /* Standard constructor */
 | 
				
			||||||
  SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
 | 
					  SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
 | 
				
			||||||
@@ -230,7 +235,7 @@ public:
 | 
				
			|||||||
    : smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
 | 
					    : smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // attach the smeared routines to the thin links U and fill the smeared set
 | 
					  // attach the smeared routines to the thin links U and fill the smeared set
 | 
				
			||||||
  void set_Field(GaugeField &U)
 | 
					  virtual void set_Field(GaugeField &U)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    double start = usecond();
 | 
					    double start = usecond();
 | 
				
			||||||
    fill_smearedSet(U);
 | 
					    fill_smearedSet(U);
 | 
				
			||||||
@@ -240,7 +245,7 @@ public:
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //====================================================================
 | 
					  //====================================================================
 | 
				
			||||||
  void smeared_force(GaugeField &SigmaTilde) const
 | 
					  virtual void smeared_force(GaugeField &SigmaTilde) 
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    if (smearingLevels > 0)
 | 
					    if (smearingLevels > 0)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
@@ -267,14 +272,16 @@ public:
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
      double end = usecond();
 | 
					      double end = usecond();
 | 
				
			||||||
      double time = (end - start)/ 1e3;
 | 
					      double time = (end - start)/ 1e3;
 | 
				
			||||||
      std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl;  
 | 
					      std::cout << GridLogMessage << " GaugeConfiguration: Smeared Force chain rule took " << time << " ms" << std::endl;
 | 
				
			||||||
    }  // if smearingLevels = 0 do nothing
 | 
					    }  // if smearingLevels = 0 do nothing
 | 
				
			||||||
 | 
					    SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //====================================================================
 | 
					  //====================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
 | 
					  virtual GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  GaugeField &get_U(bool smeared = false)
 | 
					  virtual GaugeField &get_U(bool smeared = false)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    // get the config, thin links by default
 | 
					    // get the config, thin links by default
 | 
				
			||||||
    if (smeared)
 | 
					    if (smeared)
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										813
									
								
								Grid/qcd/smearing/GaugeConfigurationMasked.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										813
									
								
								Grid/qcd/smearing/GaugeConfigurationMasked.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,813 @@
 | 
				
			|||||||
 | 
					/*!
 | 
				
			||||||
 | 
					  @file GaugeConfiguration.h
 | 
				
			||||||
 | 
					  @brief Declares the GaugeConfiguration class
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*!
 | 
				
			||||||
 | 
					  @brief Smeared configuration masked container
 | 
				
			||||||
 | 
					  Modified for a multi-subset smearing (aka Luscher Flowed HMC)
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					template <class Gimpl>
 | 
				
			||||||
 | 
					class SmearedConfigurationMasked : public SmearedConfiguration<Gimpl>
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					  // These live in base class
 | 
				
			||||||
 | 
					  //  const unsigned int smearingLevels;
 | 
				
			||||||
 | 
					  //  Smear_Stout<Gimpl> *StoutSmearing;
 | 
				
			||||||
 | 
					  //  std::vector<GaugeField> SmearedSet;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<LatticeLorentzComplex> masks;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef typename SU3Adjoint::AMatrix AdjMatrix;
 | 
				
			||||||
 | 
					  typedef typename SU3Adjoint::LatticeAdjMatrix  AdjMatrixField;
 | 
				
			||||||
 | 
					  typedef typename SU3Adjoint::LatticeAdjVector  AdjVectorField;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Adjoint vector to GaugeField force
 | 
				
			||||||
 | 
					  void InsertForce(GaugeField &Fdet,AdjVectorField &Fdet_nu,int nu)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    Complex ci(0,1);
 | 
				
			||||||
 | 
					    GaugeLinkField Fdet_pol(Fdet.Grid());
 | 
				
			||||||
 | 
					    Fdet_pol=Zero();
 | 
				
			||||||
 | 
					    for(int e=0;e<8;e++){
 | 
				
			||||||
 | 
					      ColourMatrix te;
 | 
				
			||||||
 | 
					      SU3::generator(e, te);
 | 
				
			||||||
 | 
					      auto tmp=peekColour(Fdet_nu,e);
 | 
				
			||||||
 | 
					      Fdet_pol=Fdet_pol + ci*tmp*te; // but norm of te is different.. why?
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    pokeLorentz(Fdet, Fdet_pol, nu);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void Compute_MpInvJx_dNxxdSy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR, AdjMatrixField MpInvJx,AdjVectorField &Fdet2 )
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GaugeLinkField UtaU(PlaqL.Grid());
 | 
				
			||||||
 | 
					    GaugeLinkField D(PlaqL.Grid());
 | 
				
			||||||
 | 
					    AdjMatrixField Dbc(PlaqL.Grid());
 | 
				
			||||||
 | 
					    LatticeComplex tmp(PlaqL.Grid());
 | 
				
			||||||
 | 
					    const int Ngen = SU3Adjoint::Dimension;
 | 
				
			||||||
 | 
					    Complex ci(0,1);
 | 
				
			||||||
 | 
					    ColourMatrix   ta,tb,tc;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int a=0;a<Ngen;a++) {
 | 
				
			||||||
 | 
					      SU3::generator(a, ta);
 | 
				
			||||||
 | 
					      // Qlat Tb = 2i Tb^Grid
 | 
				
			||||||
 | 
					      UtaU= 2.0*ci*adj(PlaqL)*ta*PlaqR;
 | 
				
			||||||
 | 
					      for(int c=0;c<Ngen;c++) {
 | 
				
			||||||
 | 
						SU3::generator(c, tc);
 | 
				
			||||||
 | 
						D = Ta( (2.0)*ci*tc *UtaU);
 | 
				
			||||||
 | 
						for(int b=0;b<Ngen;b++){
 | 
				
			||||||
 | 
						  SU3::generator(b, tb);
 | 
				
			||||||
 | 
						  tmp =-trace(ci*tb*D); 
 | 
				
			||||||
 | 
						  PokeIndex<ColourIndex>(Dbc,tmp,b,c);  // Adjoint rep
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      tmp = trace(MpInvJx * Dbc);
 | 
				
			||||||
 | 
					      PokeIndex<ColourIndex>(Fdet2,tmp,a);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  void ComputeNxy(const GaugeLinkField &PlaqL,const GaugeLinkField &PlaqR,AdjMatrixField &NxAd)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GaugeLinkField Nx(PlaqL.Grid());
 | 
				
			||||||
 | 
					    const int Ngen = SU3Adjoint::Dimension;
 | 
				
			||||||
 | 
					    Complex ci(0,1);
 | 
				
			||||||
 | 
					    ColourMatrix   tb;
 | 
				
			||||||
 | 
					    ColourMatrix   tc;
 | 
				
			||||||
 | 
					    for(int b=0;b<Ngen;b++) {
 | 
				
			||||||
 | 
					      SU3::generator(b, tb);
 | 
				
			||||||
 | 
					      Nx = (2.0)*Ta( adj(PlaqL)*ci*tb * PlaqR );
 | 
				
			||||||
 | 
					      for(int c=0;c<Ngen;c++) {
 | 
				
			||||||
 | 
						SU3::generator(c, tc);
 | 
				
			||||||
 | 
						auto tmp =closure( -trace(ci*tc*Nx)); 
 | 
				
			||||||
 | 
						PokeIndex<ColourIndex>(NxAd,tmp,c,b); 
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void ApplyMask(GaugeField &U,int smr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    LatticeComplex tmp(U.Grid());
 | 
				
			||||||
 | 
					    GaugeLinkField Umu(U.Grid());
 | 
				
			||||||
 | 
					    for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					      Umu=PeekIndex<LorentzIndex>(U,mu);
 | 
				
			||||||
 | 
					      tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
 | 
				
			||||||
 | 
					      Umu=Umu*tmp;
 | 
				
			||||||
 | 
					      PokeIndex<LorentzIndex>(U, Umu, mu);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void logDetJacobianForceLevel(const GaugeField &U, GaugeField &force ,int smr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GridBase* grid = U.Grid();
 | 
				
			||||||
 | 
					    ColourMatrix   tb;
 | 
				
			||||||
 | 
					    ColourMatrix   tc;
 | 
				
			||||||
 | 
					    ColourMatrix   ta;
 | 
				
			||||||
 | 
					    GaugeField C(grid);
 | 
				
			||||||
 | 
					    GaugeField Umsk(grid);
 | 
				
			||||||
 | 
					    std::vector<GaugeLinkField> Umu(Nd,grid);
 | 
				
			||||||
 | 
					    GaugeLinkField Cmu(grid); // U and staple; C contains factor of epsilon
 | 
				
			||||||
 | 
					    GaugeLinkField Zx(grid);  // U times Staple, contains factor of epsilon
 | 
				
			||||||
 | 
					    GaugeLinkField Nxx(grid);  // Nxx fundamental space
 | 
				
			||||||
 | 
					    GaugeLinkField Utmp(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField PlaqL(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField PlaqR(grid);
 | 
				
			||||||
 | 
					    const int Ngen = SU3Adjoint::Dimension;
 | 
				
			||||||
 | 
					    AdjMatrix TRb;
 | 
				
			||||||
 | 
					    ColourMatrix Ident;
 | 
				
			||||||
 | 
					    LatticeComplex  cplx(grid);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    AdjVectorField  dJdXe_nMpInv(grid); 
 | 
				
			||||||
 | 
					    AdjVectorField  dJdXe_nMpInv_y(grid); 
 | 
				
			||||||
 | 
					    AdjMatrixField  MpAd(grid);    // Mprime luchang's notes
 | 
				
			||||||
 | 
					    AdjMatrixField  MpAdInv(grid); // Mprime inverse
 | 
				
			||||||
 | 
					    AdjMatrixField  NxxAd(grid);    // Nxx in adjoint space
 | 
				
			||||||
 | 
					    AdjMatrixField  JxAd(grid);     
 | 
				
			||||||
 | 
					    AdjMatrixField  ZxAd(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  mZxAd(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  X(grid);
 | 
				
			||||||
 | 
					    Complex ci(0,1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD t0 = usecond();
 | 
				
			||||||
 | 
					    Ident = ComplexD(1.0);
 | 
				
			||||||
 | 
					    for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					      Umu[d] = peekLorentz(U, d);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    int mu= (smr/2) %Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Mask the gauge field
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Umsk = U;
 | 
				
			||||||
 | 
					    ApplyMask(Umsk,smr);
 | 
				
			||||||
 | 
					    Utmp = peekLorentz(Umsk,mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Retrieve the eps/rho parameter(s) -- could allow all different but not so far
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    double rho=this->StoutSmearing->SmearRho[1];
 | 
				
			||||||
 | 
					    int idx=0;
 | 
				
			||||||
 | 
					    for(int mu=0;mu<4;mu++){
 | 
				
			||||||
 | 
					    for(int nu=0;nu<4;nu++){
 | 
				
			||||||
 | 
					      if ( mu!=nu) assert(this->StoutSmearing->SmearRho[idx]==rho);
 | 
				
			||||||
 | 
					      else         assert(this->StoutSmearing->SmearRho[idx]==0.0);
 | 
				
			||||||
 | 
					      idx++;
 | 
				
			||||||
 | 
					    }}
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Assemble the N matrix
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Computes ALL the staples -- could compute one only and do it here
 | 
				
			||||||
 | 
					    RealD time;
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    this->StoutSmearing->BaseSmear(C, U);
 | 
				
			||||||
 | 
					    Cmu = peekLorentz(C, mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Assemble Luscher exp diff map J matrix 
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Ta so Z lives in Lie algabra
 | 
				
			||||||
 | 
					    Zx  = Ta(Cmu * adj(Umu[mu]));
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Z took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    // Move Z to the Adjoint Rep == make_adjoint_representation
 | 
				
			||||||
 | 
					    ZxAd = Zero();
 | 
				
			||||||
 | 
					    for(int b=0;b<8;b++) {
 | 
				
			||||||
 | 
					      // Adj group sets traceless antihermitian T's -- Guido, really????
 | 
				
			||||||
 | 
					      SU3::generator(b, tb);         // Fund group sets traceless hermitian T's
 | 
				
			||||||
 | 
					      SU3Adjoint::generator(b,TRb);
 | 
				
			||||||
 | 
					      TRb=-TRb;
 | 
				
			||||||
 | 
					      cplx = 2.0*trace(ci*tb*Zx); // my convention 1/2 delta ba
 | 
				
			||||||
 | 
					      ZxAd = ZxAd + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "ZxAd took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    // J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    X=1.0; 
 | 
				
			||||||
 | 
					    JxAd = X;
 | 
				
			||||||
 | 
					    mZxAd = (-1.0)*ZxAd; 
 | 
				
			||||||
 | 
					    RealD kpfac = 1;
 | 
				
			||||||
 | 
					    for(int k=1;k<12;k++){
 | 
				
			||||||
 | 
					      X=X*mZxAd;
 | 
				
			||||||
 | 
					      kpfac = kpfac /(k+1);
 | 
				
			||||||
 | 
					      JxAd = JxAd + X * kpfac;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "Jx took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    // dJ(x)/dxe
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    std::vector<AdjMatrixField>  dJdX;    dJdX.resize(8,grid);
 | 
				
			||||||
 | 
					    AdjMatrixField tbXn(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField sumXtbX(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField t2(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField dt2(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField t3(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField dt3(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField aunit(grid);
 | 
				
			||||||
 | 
					    for(int b=0;b<8;b++){
 | 
				
			||||||
 | 
					      aunit = ComplexD(1.0);
 | 
				
			||||||
 | 
					      SU3Adjoint::generator(b, TRb); //dt2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      X  = (-1.0)*ZxAd; 
 | 
				
			||||||
 | 
					      t2 = X;
 | 
				
			||||||
 | 
					      dt2 = TRb;
 | 
				
			||||||
 | 
					      for (int j = 20; j > 1; --j) {
 | 
				
			||||||
 | 
						t3 = t2*(1.0 / (j + 1))  + aunit;
 | 
				
			||||||
 | 
						dt3 = dt2*(1.0 / (j + 1));
 | 
				
			||||||
 | 
						t2 = X * t3;
 | 
				
			||||||
 | 
						dt2 = TRb * t3 + X * dt3;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      dJdX[b] = -dt2; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "dJx took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					    /////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Mask Umu for this link
 | 
				
			||||||
 | 
					    /////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    PlaqL = Ident;
 | 
				
			||||||
 | 
					    PlaqR = Utmp*adj(Cmu);
 | 
				
			||||||
 | 
					    ComputeNxy(PlaqL,PlaqR,NxxAd);
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "ComputeNxy took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // Mab
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    MpAd = Complex(1.0,0.0);
 | 
				
			||||||
 | 
					    MpAd = MpAd - JxAd * NxxAd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /////////////////////////
 | 
				
			||||||
 | 
					    // invert the 8x8
 | 
				
			||||||
 | 
					    /////////////////////////
 | 
				
			||||||
 | 
					    time=-usecond();
 | 
				
			||||||
 | 
					    MpAdInv = Inverse(MpAd);
 | 
				
			||||||
 | 
					    time+=usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "MpAdInv took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    RealD t3a = usecond();
 | 
				
			||||||
 | 
					    /////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Nxx Mp^-1
 | 
				
			||||||
 | 
					    /////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    AdjVectorField  FdetV(grid);
 | 
				
			||||||
 | 
					    AdjVectorField  Fdet1_nu(grid);
 | 
				
			||||||
 | 
					    AdjVectorField  Fdet2_nu(grid);
 | 
				
			||||||
 | 
					    AdjVectorField  Fdet2_mu(grid);
 | 
				
			||||||
 | 
					    AdjVectorField  Fdet1_mu(grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    AdjMatrixField nMpInv(grid);
 | 
				
			||||||
 | 
					    nMpInv= NxxAd *MpAdInv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    AdjMatrixField MpInvJx(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField MpInvJx_nu(grid);
 | 
				
			||||||
 | 
					    MpInvJx = (-1.0)*MpAdInv * JxAd;// rho is on the plaq factor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
 | 
				
			||||||
 | 
					    Fdet2_mu=FdetV;
 | 
				
			||||||
 | 
					    Fdet1_mu=Zero();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for(int e =0 ; e<8 ; e++){
 | 
				
			||||||
 | 
					      LatticeComplexD tr(grid);
 | 
				
			||||||
 | 
					      ColourMatrix te;
 | 
				
			||||||
 | 
					      SU3::generator(e, te);
 | 
				
			||||||
 | 
					      tr = trace(dJdX[e] * nMpInv);
 | 
				
			||||||
 | 
					      pokeColour(dJdXe_nMpInv,tr,e);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    ///////////////////////////////
 | 
				
			||||||
 | 
					    // Mask it off
 | 
				
			||||||
 | 
					    ///////////////////////////////
 | 
				
			||||||
 | 
					    auto tmp=PeekIndex<LorentzIndex>(masks[smr],mu);
 | 
				
			||||||
 | 
					    dJdXe_nMpInv = dJdXe_nMpInv*tmp;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    //    dJdXe_nMpInv needs to multiply:
 | 
				
			||||||
 | 
					    //       Nxx_mu (site local)                           (1)
 | 
				
			||||||
 | 
					    //       Nxy_mu one site forward  in each nu direction (3)
 | 
				
			||||||
 | 
					    //       Nxy_mu one site backward in each nu direction (3)
 | 
				
			||||||
 | 
					    //       Nxy_nu 0,0  ; +mu,0; 0,-nu; +mu-nu   [ 3x4 = 12]
 | 
				
			||||||
 | 
					    // 19 terms.
 | 
				
			||||||
 | 
					    AdjMatrixField Nxy(grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GaugeField Fdet1(grid);
 | 
				
			||||||
 | 
					    GaugeField Fdet2(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField Fdet_pol(grid); // one polarisation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD t4 = usecond();
 | 
				
			||||||
 | 
					    for(int nu=0;nu<Nd;nu++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (nu!=mu) {
 | 
				
			||||||
 | 
						///////////////// +ve nu /////////////////
 | 
				
			||||||
 | 
						//     __
 | 
				
			||||||
 | 
						//    |  |
 | 
				
			||||||
 | 
						//    x==    // nu polarisation -- clockwise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time=-usecond();
 | 
				
			||||||
 | 
						PlaqL=Ident;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqR=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
 | 
				
			||||||
 | 
					 	       Gimpl::CovShiftForward(Umu[mu], mu,
 | 
				
			||||||
 | 
						         Gimpl::CovShiftBackward(Umu[nu], nu,
 | 
				
			||||||
 | 
							   Gimpl::CovShiftIdentityBackward(Utmp, mu))));
 | 
				
			||||||
 | 
						time+=usecond();
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << "PlaqLR took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time=-usecond();
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y =   dJdXe_nMpInv;
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL,PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_nu = transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
						time+=usecond();
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << "ComputeNxy (occurs 6x) took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						time=-usecond();
 | 
				
			||||||
 | 
						PlaqR=(-1.0)*PlaqR;
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx,FdetV);
 | 
				
			||||||
 | 
						Fdet2_nu = FdetV;
 | 
				
			||||||
 | 
						time+=usecond();
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << "Compute_MpInvJx_dNxxSy (occurs 6x) took "<<time<< " us"<<std::endl;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						//    x==
 | 
				
			||||||
 | 
						//    |  |
 | 
				
			||||||
 | 
						//    .__|    // nu polarisation -- anticlockwise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqR=(rho)*Gimpl::CovShiftForward(Umu[nu], nu,
 | 
				
			||||||
 | 
							      Gimpl::CovShiftBackward(Umu[mu], mu,
 | 
				
			||||||
 | 
					    	 	        Gimpl::CovShiftIdentityBackward(Umu[nu], nu)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqL=Gimpl::CovShiftIdentityBackward(Utmp, mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL, PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_nu = Fdet1_nu+transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx,mu,-1);
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
 | 
				
			||||||
 | 
						Fdet2_nu = Fdet2_nu+FdetV;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						///////////////// -ve nu /////////////////
 | 
				
			||||||
 | 
						//  __
 | 
				
			||||||
 | 
						// |  |
 | 
				
			||||||
 | 
						// x==          // nu polarisation -- clockwise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqL=(rho)* Gimpl::CovShiftForward(Umu[mu], mu,
 | 
				
			||||||
 | 
							       Gimpl::CovShiftForward(Umu[nu], nu,
 | 
				
			||||||
 | 
								 Gimpl::CovShiftIdentityBackward(Utmp, mu)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        PlaqR = Gimpl::CovShiftIdentityForward(Umu[nu], nu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL,PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx,nu,1);
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
 | 
				
			||||||
 | 
						Fdet2_nu = Fdet2_nu+FdetV;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						// x==
 | 
				
			||||||
 | 
						// |  |
 | 
				
			||||||
 | 
						// |__|         // nu polarisation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[nu], nu,
 | 
				
			||||||
 | 
					 	        Gimpl::CovShiftIdentityBackward(Utmp, mu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqR=Gimpl::CovShiftBackward(Umu[mu], mu,
 | 
				
			||||||
 | 
						        Gimpl::CovShiftIdentityForward(Umu[nu], nu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,mu,-1);
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv_y,nu,1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL,PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_nu = Fdet1_nu + transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx,mu,-1);
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx_nu,nu,1);
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
 | 
				
			||||||
 | 
						Fdet2_nu = Fdet2_nu+FdetV;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// Set up the determinant force contribution in 3x3 algebra basis
 | 
				
			||||||
 | 
						/////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						InsertForce(Fdet1,Fdet1_nu,nu);
 | 
				
			||||||
 | 
						InsertForce(Fdet2,Fdet2_nu,nu);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						//////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// Parallel direction terms
 | 
				
			||||||
 | 
						//////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        //     __
 | 
				
			||||||
 | 
						//    |  "
 | 
				
			||||||
 | 
						//    |__"x    // mu polarisation
 | 
				
			||||||
 | 
						PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
 | 
				
			||||||
 | 
							      Gimpl::CovShiftBackward(Umu[nu], nu,
 | 
				
			||||||
 | 
					   		        Gimpl::CovShiftIdentityBackward(Utmp, mu)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqR=Gimpl::CovShiftIdentityBackward(Umu[nu], nu);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,-1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL,PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx,nu,-1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
 | 
				
			||||||
 | 
						Fdet2_mu = Fdet2_mu+FdetV;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//  __
 | 
				
			||||||
 | 
						// "  |
 | 
				
			||||||
 | 
						// x__|          // mu polarisation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PlaqL=(-rho)*Gimpl::CovShiftForward(Umu[mu], mu,
 | 
				
			||||||
 | 
							       Gimpl::CovShiftForward(Umu[nu], nu,
 | 
				
			||||||
 | 
							 	 Gimpl::CovShiftIdentityBackward(Utmp, mu)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        PlaqR=Gimpl::CovShiftIdentityForward(Umu[nu], nu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						dJdXe_nMpInv_y = Cshift(dJdXe_nMpInv,nu,1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ComputeNxy(PlaqL,PlaqR,Nxy);
 | 
				
			||||||
 | 
						Fdet1_mu = Fdet1_mu + transpose(Nxy)*dJdXe_nMpInv_y;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						MpInvJx_nu = Cshift(MpInvJx,nu,1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Compute_MpInvJx_dNxxdSy(PlaqL,PlaqR,MpInvJx_nu,FdetV);
 | 
				
			||||||
 | 
						Fdet2_mu = Fdet2_mu+FdetV;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    RealD t5 = usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Fdet1_mu = Fdet1_mu + transpose(NxxAd)*dJdXe_nMpInv;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    InsertForce(Fdet1,Fdet1_mu,mu);
 | 
				
			||||||
 | 
					    InsertForce(Fdet2,Fdet2_mu,mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    force= (-0.5)*( Fdet1 + Fdet2);
 | 
				
			||||||
 | 
					    RealD t1 = usecond();
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce t3-t0 "<<t3a-t0<<" us "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce t4-t3 dJdXe_nMpInv "<<t4-t3a<<" us "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce t5-t4 mu nu loop "<<t5-t4<<" us "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce t1-t5 "<<t1-t5<<" us "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " logDetJacobianForce level took "<<t1-t0<<" us "<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  RealD logDetJacobianLevel(const GaugeField &U,int smr)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GridBase* grid = U.Grid();
 | 
				
			||||||
 | 
					    GaugeField C(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField Nb(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField Z(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField Umu(grid), Cmu(grid);
 | 
				
			||||||
 | 
					    ColourMatrix   Tb;
 | 
				
			||||||
 | 
					    ColourMatrix   Tc;
 | 
				
			||||||
 | 
					    typedef typename SU3Adjoint::AMatrix AdjMatrix;
 | 
				
			||||||
 | 
					    typedef typename SU3Adjoint::LatticeAdjMatrix  AdjMatrixField;
 | 
				
			||||||
 | 
					    typedef typename SU3Adjoint::LatticeAdjVector  AdjVectorField;
 | 
				
			||||||
 | 
					    const int Ngen = SU3Adjoint::Dimension;
 | 
				
			||||||
 | 
					    AdjMatrix TRb;
 | 
				
			||||||
 | 
					    LatticeComplex       cplx(grid); 
 | 
				
			||||||
 | 
					    AdjVectorField  AlgV(grid); 
 | 
				
			||||||
 | 
					    AdjMatrixField  Mab(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  Ncb(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  Jac(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  Zac(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  mZac(grid);
 | 
				
			||||||
 | 
					    AdjMatrixField  X(grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int mu= (smr/2) %Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto mask=PeekIndex<LorentzIndex>(masks[smr],mu); // the cb mask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Assemble the N matrix
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Computes ALL the staples -- could compute one only here
 | 
				
			||||||
 | 
					    this->StoutSmearing->BaseSmear(C, U);
 | 
				
			||||||
 | 
					    Cmu = peekLorentz(C, mu);
 | 
				
			||||||
 | 
					    Umu = peekLorentz(U, mu);
 | 
				
			||||||
 | 
					    Complex ci(0,1);
 | 
				
			||||||
 | 
					    for(int b=0;b<Ngen;b++) {
 | 
				
			||||||
 | 
					      SU3::generator(b, Tb);
 | 
				
			||||||
 | 
					      // Qlat Tb = 2i Tb^Grid
 | 
				
			||||||
 | 
					      Nb = (2.0)*Ta( ci*Tb * Umu * adj(Cmu));
 | 
				
			||||||
 | 
					      for(int c=0;c<Ngen;c++) {
 | 
				
			||||||
 | 
						SU3::generator(c, Tc);
 | 
				
			||||||
 | 
						auto tmp = -trace(ci*Tc*Nb); // Luchang's norm: (2Tc) (2Td) N^db = -2 delta cd N^db // - was important
 | 
				
			||||||
 | 
						PokeIndex<ColourIndex>(Ncb,tmp,c,b); 
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }      
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Assemble Luscher exp diff map J matrix 
 | 
				
			||||||
 | 
					    //////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Ta so Z lives in Lie algabra
 | 
				
			||||||
 | 
					    Z  = Ta(Cmu * adj(Umu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Move Z to the Adjoint Rep == make_adjoint_representation
 | 
				
			||||||
 | 
					    Zac = Zero();
 | 
				
			||||||
 | 
					    for(int b=0;b<8;b++) {
 | 
				
			||||||
 | 
					      // Adj group sets traceless antihermitian T's -- Guido, really????
 | 
				
			||||||
 | 
					      // Is the mapping of these the same? Same structure constants
 | 
				
			||||||
 | 
					      // Might never have been checked.
 | 
				
			||||||
 | 
					      SU3::generator(b, Tb);         // Fund group sets traceless hermitian T's
 | 
				
			||||||
 | 
					      SU3Adjoint::generator(b,TRb);
 | 
				
			||||||
 | 
					      TRb=-TRb;
 | 
				
			||||||
 | 
					      cplx = 2.0*trace(ci*Tb*Z); // my convention 1/2 delta ba
 | 
				
			||||||
 | 
					      Zac = Zac + cplx * TRb; // is this right? YES - Guido used Anti herm Ta's and with bloody wrong sign.
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    // J(x) = 1 + Sum_k=1..N (-Zac)^k/(k+1)!
 | 
				
			||||||
 | 
					    //////////////////////////////////////
 | 
				
			||||||
 | 
					    X=1.0; 
 | 
				
			||||||
 | 
					    Jac = X;
 | 
				
			||||||
 | 
					    mZac = (-1.0)*Zac; 
 | 
				
			||||||
 | 
					    RealD kpfac = 1;
 | 
				
			||||||
 | 
					    for(int k=1;k<12;k++){
 | 
				
			||||||
 | 
					      X=X*mZac;
 | 
				
			||||||
 | 
					      kpfac = kpfac /(k+1);
 | 
				
			||||||
 | 
					      Jac = Jac + X * kpfac;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // Mab
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    Mab = Complex(1.0,0.0);
 | 
				
			||||||
 | 
					    Mab = Mab - Jac * Ncb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // det
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    LatticeComplex       det(grid); 
 | 
				
			||||||
 | 
					    det = Determinant(Mab);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // ln det
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    LatticeComplex       ln_det(grid); 
 | 
				
			||||||
 | 
					    ln_det = log(det);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // Masked sum
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    ln_det = ln_det * mask;
 | 
				
			||||||
 | 
					    Complex result = sum(ln_det);
 | 
				
			||||||
 | 
					    return result.real();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  RealD logDetJacobian(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    RealD ln_det = 0;
 | 
				
			||||||
 | 
					    if (this->smearingLevels > 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      double start = usecond();
 | 
				
			||||||
 | 
					      for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
 | 
				
			||||||
 | 
						ln_det+= logDetJacobianLevel(this->get_smeared_conf(ismr-1),ismr);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      ln_det +=logDetJacobianLevel(*(this->ThinLinks),0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      double end = usecond();
 | 
				
			||||||
 | 
					      double time = (end - start)/ 1e3;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "GaugeConfigurationMasked: logDetJacobian took " << time << " ms" << std::endl;  
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return ln_det;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void logDetJacobianForce(GaugeField &force)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    force =Zero();
 | 
				
			||||||
 | 
					    GaugeField force_det(force.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (this->smearingLevels > 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      double start = usecond();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      GaugeLinkField tmp_mu(force.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// remove U in UdSdU...
 | 
				
			||||||
 | 
						for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
						  tmp_mu = adj(peekLorentz(this->get_smeared_conf(ismr), mu)) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
						  pokeLorentz(force, tmp_mu, mu);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					      	// Propagate existing force
 | 
				
			||||||
 | 
					        force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1), ismr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Add back U in UdSdU...
 | 
				
			||||||
 | 
						for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
						  tmp_mu = peekLorentz(this->get_smeared_conf(ismr - 1), mu) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
						  pokeLorentz(force, tmp_mu, mu);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					    	
 | 
				
			||||||
 | 
						// Get this levels determinant force
 | 
				
			||||||
 | 
						force_det = Zero();
 | 
				
			||||||
 | 
						logDetJacobianForceLevel(this->get_smeared_conf(ismr-1),force_det,ismr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Sum the contributions
 | 
				
			||||||
 | 
						force = force + force_det;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					      // remove U in UdSdU...
 | 
				
			||||||
 | 
					      for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
						tmp_mu = adj(peekLorentz(this->get_smeared_conf(0), mu)) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
						pokeLorentz(force, tmp_mu, mu);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
						tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
						pokeLorentz(force, tmp_mu, mu);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      force_det = Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      logDetJacobianForceLevel(*this->ThinLinks,force_det,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      force = force + force_det;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      force=Ta(force); // Ta
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      double end = usecond();
 | 
				
			||||||
 | 
					      double time = (end - start)/ 1e3;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "GaugeConfigurationMasked: lnDetJacobianForce took " << time << " ms" << std::endl;  
 | 
				
			||||||
 | 
					    }  // if smearingLevels = 0 do nothing
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					  //====================================================================
 | 
				
			||||||
 | 
					  // Override base clas here to mask it
 | 
				
			||||||
 | 
					  virtual void fill_smearedSet(GaugeField &U)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    this->ThinLinks = &U;  // attach the smearing routine to the field U
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // check the pointer is not null
 | 
				
			||||||
 | 
					    if (this->ThinLinks == NULL)
 | 
				
			||||||
 | 
					      std::cout << GridLogError << "[SmearedConfigurationMasked] Error in ThinLinks pointer\n";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (this->smearingLevels > 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "[SmearedConfigurationMasked] Filling SmearedSet\n";
 | 
				
			||||||
 | 
					      GaugeField previous_u(this->ThinLinks->Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      GaugeField smeared_A(this->ThinLinks->Grid());
 | 
				
			||||||
 | 
					      GaugeField smeared_B(this->ThinLinks->Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      previous_u = *this->ThinLinks;
 | 
				
			||||||
 | 
					      double start = usecond();
 | 
				
			||||||
 | 
					      for (int smearLvl = 0; smearLvl < this->smearingLevels; ++smearLvl)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        this->StoutSmearing->smear(smeared_A, previous_u);
 | 
				
			||||||
 | 
						ApplyMask(smeared_A,smearLvl);
 | 
				
			||||||
 | 
						smeared_B = previous_u;
 | 
				
			||||||
 | 
						ApplyMask(smeared_B,smearLvl);
 | 
				
			||||||
 | 
						// Replace only the masked portion
 | 
				
			||||||
 | 
						this->SmearedSet[smearLvl] = previous_u-smeared_B + smeared_A;
 | 
				
			||||||
 | 
					        previous_u = this->SmearedSet[smearLvl];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // For debug purposes
 | 
				
			||||||
 | 
					        RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(previous_u);
 | 
				
			||||||
 | 
					        std::cout << GridLogMessage << "[SmearedConfigurationMasked] smeared Plaq: " << impl_plaq << std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      double end = usecond();
 | 
				
			||||||
 | 
					      double time = (end - start)/ 1e3;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << "GaugeConfigurationMasked: Link smearing took " << time << " ms" << std::endl;  
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //====================================================================
 | 
				
			||||||
 | 
					  // Override base to add masking
 | 
				
			||||||
 | 
					  virtual GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
 | 
				
			||||||
 | 
										  const GaugeField& GaugeK,int level) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GridBase* grid = GaugeK.Grid();
 | 
				
			||||||
 | 
					    GaugeField C(grid), SigmaK(grid), iLambda(grid);
 | 
				
			||||||
 | 
					    GaugeField SigmaKPrimeA(grid);
 | 
				
			||||||
 | 
					    GaugeField SigmaKPrimeB(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField iLambda_mu(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField iQ(grid), e_iQ(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField SigmaKPrime_mu(grid);
 | 
				
			||||||
 | 
					    GaugeLinkField GaugeKmu(grid), Cmu(grid);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    this->StoutSmearing->BaseSmear(C, GaugeK);
 | 
				
			||||||
 | 
					    SigmaK = Zero();
 | 
				
			||||||
 | 
					    iLambda = Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    SigmaKPrimeA = SigmaKPrime;
 | 
				
			||||||
 | 
					    ApplyMask(SigmaKPrimeA,level);
 | 
				
			||||||
 | 
					    SigmaKPrimeB = SigmaKPrime - SigmaKPrimeA;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Could get away with computing only one polarisation here
 | 
				
			||||||
 | 
					    // int mu= (smr/2) %Nd;
 | 
				
			||||||
 | 
					    // SigmaKprime_A has only one component
 | 
				
			||||||
 | 
					    for (int mu = 0; mu < Nd; mu++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      Cmu = peekLorentz(C, mu);
 | 
				
			||||||
 | 
					      GaugeKmu = peekLorentz(GaugeK, mu);
 | 
				
			||||||
 | 
					      SigmaKPrime_mu = peekLorentz(SigmaKPrimeA, mu);
 | 
				
			||||||
 | 
					      iQ = Ta(Cmu * adj(GaugeKmu));
 | 
				
			||||||
 | 
					      this->set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
 | 
				
			||||||
 | 
					      pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
 | 
				
			||||||
 | 
					      pokeLorentz(iLambda, iLambda_mu, mu);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    this->StoutSmearing->derivative(SigmaK, iLambda,GaugeK);  // derivative of SmearBase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // propagate the rest of the force as identity map, just add back
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    SigmaK = SigmaK+SigmaKPrimeB;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return SigmaK;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /* Standard constructor */
 | 
				
			||||||
 | 
					  SmearedConfigurationMasked(GridCartesian* _UGrid, unsigned int Nsmear, Smear_Stout<Gimpl>& Stout)
 | 
				
			||||||
 | 
					    : SmearedConfiguration<Gimpl>(_UGrid, Nsmear,Stout)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    assert(Nsmear%(2*Nd)==0); // Or multiply by 8??
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // was resized in base class
 | 
				
			||||||
 | 
					    assert(this->SmearedSet.size()==Nsmear);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * UrbGrid;
 | 
				
			||||||
 | 
					    UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(_UGrid);
 | 
				
			||||||
 | 
					    LatticeComplex one(_UGrid); one = ComplexD(1.0,0.0);
 | 
				
			||||||
 | 
					    LatticeComplex tmp(_UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (unsigned int i = 0; i < this->smearingLevels; ++i) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      masks.push_back(*(new LatticeLorentzComplex(_UGrid)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      int mu= (i/2) %Nd;
 | 
				
			||||||
 | 
					      int cb= (i%2);
 | 
				
			||||||
 | 
					      LatticeComplex tmpcb(UrbGrid);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					      masks[i]=Zero();
 | 
				
			||||||
 | 
					      ////////////////////
 | 
				
			||||||
 | 
					      // Setup the mask
 | 
				
			||||||
 | 
					      ////////////////////
 | 
				
			||||||
 | 
					      tmp = Zero();
 | 
				
			||||||
 | 
					      pickCheckerboard(cb,tmpcb,one);
 | 
				
			||||||
 | 
					      setCheckerboard(tmp,tmpcb);
 | 
				
			||||||
 | 
					      PokeIndex<LorentzIndex>(masks[i],tmp, mu);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    delete UrbGrid;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  virtual void smeared_force(GaugeField &SigmaTilde) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    if (this->smearingLevels > 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      double start = usecond();
 | 
				
			||||||
 | 
					      GaugeField force = SigmaTilde; // actually = U*SigmaTilde
 | 
				
			||||||
 | 
					      GaugeLinkField tmp_mu(SigmaTilde.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Remove U from UdSdU
 | 
				
			||||||
 | 
					      for (int mu = 0; mu < Nd; mu++)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        // to get just SigmaTilde
 | 
				
			||||||
 | 
					        tmp_mu = adj(peekLorentz(this->SmearedSet[this->smearingLevels - 1], mu)) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
					        pokeLorentz(force, tmp_mu, mu);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (int ismr = this->smearingLevels - 1; ismr > 0; --ismr) {
 | 
				
			||||||
 | 
					        force = this->AnalyticSmearedForce(force, this->get_smeared_conf(ismr - 1),ismr);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      force = this->AnalyticSmearedForce(force, *this->ThinLinks,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Add U to UdSdU
 | 
				
			||||||
 | 
					      for (int mu = 0; mu < Nd; mu++)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        tmp_mu = peekLorentz(*this->ThinLinks, mu) * peekLorentz(force, mu);
 | 
				
			||||||
 | 
					        pokeLorentz(SigmaTilde, tmp_mu, mu);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      double end = usecond();
 | 
				
			||||||
 | 
					      double time = (end - start)/ 1e3;
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage << " GaugeConfigurationMasked: Smeared Force chain rule took " << time << " ms" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    }  // if smearingLevels = 0 do nothing
 | 
				
			||||||
 | 
					    SigmaTilde=Gimpl::projectForce(SigmaTilde); // Ta
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										87
									
								
								Grid/qcd/smearing/JacobianAction.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								Grid/qcd/smearing/JacobianAction.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,87 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/gauge/JacobianAction.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
								   /*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Jacobian Action .. 
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <class Gimpl>
 | 
				
			||||||
 | 
					class JacobianAction : public Action<typename Gimpl::GaugeField> {
 | 
				
			||||||
 | 
					public:  
 | 
				
			||||||
 | 
					  INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmearedConfigurationMasked<Gimpl> * smearer;
 | 
				
			||||||
 | 
					  /////////////////////////// constructors
 | 
				
			||||||
 | 
					  explicit JacobianAction(SmearedConfigurationMasked<Gimpl> * _smearer ) { smearer=_smearer;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::string action_name() {return "JacobianAction";}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::string LogParameters(){
 | 
				
			||||||
 | 
					    std::stringstream sstream;
 | 
				
			||||||
 | 
					    sstream << GridLogMessage << "[JacobianAction] " << std::endl;
 | 
				
			||||||
 | 
					    return sstream.str();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //////////////////////////////////
 | 
				
			||||||
 | 
					  // Usual cases are not used
 | 
				
			||||||
 | 
					  //////////////////////////////////
 | 
				
			||||||
 | 
					  virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG){ assert(0);};
 | 
				
			||||||
 | 
					  virtual RealD S(const GaugeField &U) { assert(0); }
 | 
				
			||||||
 | 
					  virtual void deriv(const GaugeField &U, GaugeField &dSdU) { assert(0);  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //////////////////////////////////
 | 
				
			||||||
 | 
					  // Functions of smart configs only
 | 
				
			||||||
 | 
					  //////////////////////////////////
 | 
				
			||||||
 | 
					  virtual void refresh(ConfigurationBase<GaugeField> & U, GridSerialRNG &sRNG, GridParallelRNG& pRNG)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual RealD S(ConfigurationBase<GaugeField>& U)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    // det M = e^{ - ( - logDetM) }
 | 
				
			||||||
 | 
					    assert( &U == smearer );
 | 
				
			||||||
 | 
					    return -smearer->logDetJacobian();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual RealD Sinitial(ConfigurationBase<GaugeField>& U) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    return S(U);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual void deriv(ConfigurationBase<GaugeField>& U, GaugeField& dSdU)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    assert( &U == smearer );
 | 
				
			||||||
 | 
					    smearer->logDetJacobianForce(dSdU);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					 };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -40,7 +40,9 @@ template <class Gimpl>
 | 
				
			|||||||
class Smear_Stout : public Smear<Gimpl> {
 | 
					class Smear_Stout : public Smear<Gimpl> {
 | 
				
			||||||
 private:
 | 
					 private:
 | 
				
			||||||
  int OrthogDim = -1;
 | 
					  int OrthogDim = -1;
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
  const std::vector<double> SmearRho;
 | 
					  const std::vector<double> SmearRho;
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
  // Smear<Gimpl>* ownership semantics:
 | 
					  // Smear<Gimpl>* ownership semantics:
 | 
				
			||||||
  //    Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
 | 
					  //    Smear<Gimpl>* passed in to constructor are owned by caller, so we don't delete them here
 | 
				
			||||||
  //    Smear<Gimpl>* created within constructor need to be deleted as part of the destructor
 | 
					  //    Smear<Gimpl>* created within constructor need to be deleted as part of the destructor
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,61 @@ directory
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<int N, class Vec>
 | 
				
			||||||
 | 
					Lattice<iScalar<iScalar<iScalar<Vec> > > > Determinant(const Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GridBase *grid=Umu.Grid();
 | 
				
			||||||
 | 
					  auto lvol = grid->lSites();
 | 
				
			||||||
 | 
					  Lattice<iScalar<iScalar<iScalar<Vec> > > > ret(grid);
 | 
				
			||||||
 | 
					  typedef typename Vec::scalar_type scalar;
 | 
				
			||||||
 | 
					  autoView(Umu_v,Umu,CpuRead);
 | 
				
			||||||
 | 
					  autoView(ret_v,ret,CpuWrite);
 | 
				
			||||||
 | 
					  thread_for(site,lvol,{
 | 
				
			||||||
 | 
					    Eigen::MatrixXcd EigenU = Eigen::MatrixXcd::Zero(N,N);
 | 
				
			||||||
 | 
					    Coordinate lcoor;
 | 
				
			||||||
 | 
					    grid->LocalIndexToLocalCoor(site, lcoor);
 | 
				
			||||||
 | 
					    iScalar<iScalar<iMatrix<scalar, N> > > Us;
 | 
				
			||||||
 | 
					    peekLocalSite(Us, Umu_v, lcoor);
 | 
				
			||||||
 | 
					    for(int i=0;i<N;i++){
 | 
				
			||||||
 | 
					      for(int j=0;j<N;j++){
 | 
				
			||||||
 | 
						scalar tmp= Us()()(i,j);
 | 
				
			||||||
 | 
						ComplexD ztmp(real(tmp),imag(tmp));
 | 
				
			||||||
 | 
						EigenU(i,j)=ztmp;
 | 
				
			||||||
 | 
					      }}
 | 
				
			||||||
 | 
					    ComplexD detD  = EigenU.determinant();
 | 
				
			||||||
 | 
					    typename Vec::scalar_type det(detD.real(),detD.imag());
 | 
				
			||||||
 | 
					    pokeLocalSite(det,ret_v,lcoor);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<int N, class Vec>
 | 
				
			||||||
 | 
					static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<Vec, N> > > > &Umu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Umu      = ProjectOnGroup(Umu);
 | 
				
			||||||
 | 
					  auto det = Determinant(Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  det = conjugate(det);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int i=0;i<N;i++){
 | 
				
			||||||
 | 
					    auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
 | 
				
			||||||
 | 
					    element = element * det;
 | 
				
			||||||
 | 
					    PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<int N,class Vec>
 | 
				
			||||||
 | 
					static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<Vec, N> >,Nd> > &U)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GridBase *grid=U.Grid();
 | 
				
			||||||
 | 
					  // Reunitarise
 | 
				
			||||||
 | 
					  for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					    auto Umu = PeekIndex<LorentzIndex>(U,mu);
 | 
				
			||||||
 | 
					    Umu      = ProjectOnGroup(Umu);
 | 
				
			||||||
 | 
					    ProjectSUn(Umu);
 | 
				
			||||||
 | 
					    PokeIndex<LorentzIndex>(U,Umu,mu);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <int ncolour>
 | 
					template <int ncolour>
 | 
				
			||||||
class SU {
 | 
					class SU {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
@@ -741,8 +796,14 @@ public:
 | 
				
			|||||||
    typedef Lattice<vMatrixType> LatticeMatrixType;
 | 
					    typedef Lattice<vMatrixType> LatticeMatrixType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LatticeMatrixType Umu(out.Grid());
 | 
					    LatticeMatrixType Umu(out.Grid());
 | 
				
			||||||
 | 
					    LatticeMatrixType tmp(out.Grid());
 | 
				
			||||||
    for (int mu = 0; mu < Nd; mu++) {
 | 
					    for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
      LieRandomize(pRNG, Umu, 1.0);
 | 
					      //      LieRandomize(pRNG, Umu, 1.0);
 | 
				
			||||||
 | 
					      //      PokeIndex<LorentzIndex>(out, Umu, mu);
 | 
				
			||||||
 | 
					      gaussian(pRNG,Umu);
 | 
				
			||||||
 | 
					      tmp = Ta(Umu);
 | 
				
			||||||
 | 
					      taExp(tmp,Umu);
 | 
				
			||||||
 | 
					      ProjectSUn(Umu);
 | 
				
			||||||
      PokeIndex<LorentzIndex>(out, Umu, mu);
 | 
					      PokeIndex<LorentzIndex>(out, Umu, mu);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -799,11 +860,11 @@ public:
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<int N>
 | 
					template<int N>
 | 
				
			||||||
LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
 | 
					Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > Inverse(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  GridBase *grid=Umu.Grid();
 | 
					  GridBase *grid=Umu.Grid();
 | 
				
			||||||
  auto lvol = grid->lSites();
 | 
					  auto lvol = grid->lSites();
 | 
				
			||||||
  LatticeComplexD ret(grid);
 | 
					  Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > ret(grid);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  autoView(Umu_v,Umu,CpuRead);
 | 
					  autoView(Umu_v,Umu,CpuRead);
 | 
				
			||||||
  autoView(ret_v,ret,CpuWrite);
 | 
					  autoView(ret_v,ret,CpuWrite);
 | 
				
			||||||
@@ -812,42 +873,21 @@ LatticeComplexD Determinant(const Lattice<iScalar<iScalar<iMatrix<vComplexD, N>
 | 
				
			|||||||
    Coordinate lcoor;
 | 
					    Coordinate lcoor;
 | 
				
			||||||
    grid->LocalIndexToLocalCoor(site, lcoor);
 | 
					    grid->LocalIndexToLocalCoor(site, lcoor);
 | 
				
			||||||
    iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
 | 
					    iScalar<iScalar<iMatrix<ComplexD, N> > > Us;
 | 
				
			||||||
 | 
					    iScalar<iScalar<iMatrix<ComplexD, N> > > Ui;
 | 
				
			||||||
    peekLocalSite(Us, Umu_v, lcoor);
 | 
					    peekLocalSite(Us, Umu_v, lcoor);
 | 
				
			||||||
    for(int i=0;i<N;i++){
 | 
					    for(int i=0;i<N;i++){
 | 
				
			||||||
      for(int j=0;j<N;j++){
 | 
					      for(int j=0;j<N;j++){
 | 
				
			||||||
	EigenU(i,j) = Us()()(i,j);
 | 
						EigenU(i,j) = Us()()(i,j);
 | 
				
			||||||
      }}
 | 
					      }}
 | 
				
			||||||
    ComplexD det = EigenU.determinant();
 | 
					    Eigen::MatrixXcd EigenUinv = EigenU.inverse();
 | 
				
			||||||
    pokeLocalSite(det,ret_v,lcoor);
 | 
					    for(int i=0;i<N;i++){
 | 
				
			||||||
 | 
					      for(int j=0;j<N;j++){
 | 
				
			||||||
 | 
						Ui()()(i,j) = EigenUinv(i,j);
 | 
				
			||||||
 | 
					      }}
 | 
				
			||||||
 | 
					    pokeLocalSite(Ui,ret_v,lcoor);
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<int N>
 | 
					 | 
				
			||||||
static void ProjectSUn(Lattice<iScalar<iScalar<iMatrix<vComplexD, N> > > > &Umu)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  Umu      = ProjectOnGroup(Umu);
 | 
					 | 
				
			||||||
  auto det = Determinant(Umu);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  det = conjugate(det);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){
 | 
					 | 
				
			||||||
    auto element = PeekIndex<ColourIndex>(Umu,N-1,i);
 | 
					 | 
				
			||||||
    element = element * det;
 | 
					 | 
				
			||||||
    PokeIndex<ColourIndex>(Umu,element,Nc-1,i);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
template<int N>
 | 
					 | 
				
			||||||
static void ProjectSUn(Lattice<iVector<iScalar<iMatrix<vComplexD, N> >,Nd> > &U)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  GridBase *grid=U.Grid();
 | 
					 | 
				
			||||||
  // Reunitarise
 | 
					 | 
				
			||||||
  for(int mu=0;mu<Nd;mu++){
 | 
					 | 
				
			||||||
    auto Umu = PeekIndex<LorentzIndex>(U,mu);
 | 
					 | 
				
			||||||
    Umu      = ProjectOnGroup(Umu);
 | 
					 | 
				
			||||||
    ProjectSUn(Umu);
 | 
					 | 
				
			||||||
    PokeIndex<LorentzIndex>(U,Umu,mu);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
// Explicit specialisation for SU(3).
 | 
					// Explicit specialisation for SU(3).
 | 
				
			||||||
// Explicit specialisation for SU(3).
 | 
					// Explicit specialisation for SU(3).
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -51,6 +51,7 @@ public:
 | 
				
			|||||||
  typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
 | 
					  typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
 | 
				
			||||||
  typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
 | 
					  typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef Lattice<iScalar<iScalar<iVector<vComplex, Dimension> > > >  LatticeAdjVector;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template <class cplx>
 | 
					  template <class cplx>
 | 
				
			||||||
  static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {
 | 
					  static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -320,7 +320,7 @@ struct Conj{
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
struct TimesMinusI{
 | 
					struct TimesMinusI{
 | 
				
			||||||
  //Complex single
 | 
					  //Complex single
 | 
				
			||||||
  inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
					  inline float32x4_t operator()(float32x4_t in){
 | 
				
			||||||
    // ar ai br bi -> ai -ar ai -br
 | 
					    // ar ai br bi -> ai -ar ai -br
 | 
				
			||||||
    float32x4_t r0, r1;
 | 
					    float32x4_t r0, r1;
 | 
				
			||||||
    r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
					    r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
				
			||||||
@@ -328,7 +328,7 @@ struct TimesMinusI{
 | 
				
			|||||||
    return vtrn1q_f32(r1, r0); //  ar -ai  br -bi
 | 
					    return vtrn1q_f32(r1, r0); //  ar -ai  br -bi
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //Complex double
 | 
					  //Complex double
 | 
				
			||||||
  inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
 | 
					  inline float64x2_t operator()(float64x2_t in){
 | 
				
			||||||
    // a ib -> b -ia
 | 
					    // a ib -> b -ia
 | 
				
			||||||
    float64x2_t tmp;
 | 
					    float64x2_t tmp;
 | 
				
			||||||
    tmp = vnegq_f64(in);
 | 
					    tmp = vnegq_f64(in);
 | 
				
			||||||
@@ -338,7 +338,7 @@ struct TimesMinusI{
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
struct TimesI{
 | 
					struct TimesI{
 | 
				
			||||||
  //Complex single
 | 
					  //Complex single
 | 
				
			||||||
  inline float32x4_t operator()(float32x4_t in, float32x4_t ret){
 | 
					  inline float32x4_t operator()(float32x4_t in){
 | 
				
			||||||
    // ar ai br bi -> -ai ar -bi br
 | 
					    // ar ai br bi -> -ai ar -bi br
 | 
				
			||||||
    float32x4_t r0, r1;
 | 
					    float32x4_t r0, r1;
 | 
				
			||||||
    r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
					    r0 = vnegq_f32(in);        // -ar -ai -br -bi
 | 
				
			||||||
@@ -346,7 +346,7 @@ struct TimesI{
 | 
				
			|||||||
    return vtrn1q_f32(r1, in); // -ai  ar -bi  br
 | 
					    return vtrn1q_f32(r1, in); // -ai  ar -bi  br
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //Complex double
 | 
					  //Complex double
 | 
				
			||||||
  inline float64x2_t operator()(float64x2_t in, float64x2_t ret){
 | 
					  inline float64x2_t operator()(float64x2_t in){
 | 
				
			||||||
    // a ib -> -b ia
 | 
					    // a ib -> -b ia
 | 
				
			||||||
    float64x2_t tmp;
 | 
					    float64x2_t tmp;
 | 
				
			||||||
    tmp = vnegq_f64(in);
 | 
					    tmp = vnegq_f64(in);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -123,7 +123,7 @@ public:
 | 
				
			|||||||
	  }
 | 
						  }
 | 
				
			||||||
	  if ( permute_slice ) {
 | 
						  if ( permute_slice ) {
 | 
				
			||||||
	    int ptype       =grid->PermuteType(d);
 | 
						    int ptype       =grid->PermuteType(d);
 | 
				
			||||||
	    uint8_t mask    =grid->Nsimd() >> (ptype + 1);		
 | 
						    uint8_t mask    =0x1<<ptype;
 | 
				
			||||||
	    SE._permute    |= mask;
 | 
						    SE._permute    |= mask;
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
	}	
 | 
						}	
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -339,8 +339,8 @@ public:
 | 
				
			|||||||
  // Vectors that live on the symmetric heap in case of SHMEM
 | 
					  // Vectors that live on the symmetric heap in case of SHMEM
 | 
				
			||||||
  // These are used; either SHM objects or refs to the above symmetric heap vectors
 | 
					  // These are used; either SHM objects or refs to the above symmetric heap vectors
 | 
				
			||||||
  // depending on comms target
 | 
					  // depending on comms target
 | 
				
			||||||
  Vector<cobj *> u_simd_send_buf;
 | 
					  std::vector<cobj *> u_simd_send_buf;
 | 
				
			||||||
  Vector<cobj *> u_simd_recv_buf;
 | 
					  std::vector<cobj *> u_simd_recv_buf;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int u_comm_offset;
 | 
					  int u_comm_offset;
 | 
				
			||||||
  int _unified_buffer_size;
 | 
					  int _unified_buffer_size;
 | 
				
			||||||
@@ -348,7 +348,7 @@ public:
 | 
				
			|||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
  // Stencil query
 | 
					  // Stencil query
 | 
				
			||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
#ifdef SHM_FAST_PATH
 | 
					#if 1
 | 
				
			||||||
  inline int SameNode(int point) {
 | 
					  inline int SameNode(int point) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int dimension    = this->_directions[point];
 | 
					    int dimension    = this->_directions[point];
 | 
				
			||||||
@@ -451,7 +451,6 @@ public:
 | 
				
			|||||||
    else if ( this->fullDirichlet ) DslashLogDirichlet();
 | 
					    else if ( this->fullDirichlet ) DslashLogDirichlet();
 | 
				
			||||||
    else DslashLogFull();
 | 
					    else DslashLogFull();
 | 
				
			||||||
    acceleratorCopySynchronise();
 | 
					    acceleratorCopySynchronise();
 | 
				
			||||||
    // Everyone agrees we are all done
 | 
					 | 
				
			||||||
    _grid->StencilBarrier(); 
 | 
					    _grid->StencilBarrier(); 
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -540,6 +539,7 @@ public:
 | 
				
			|||||||
      compress.Point(point);
 | 
					      compress.Point(point);
 | 
				
			||||||
      HaloGatherDir(source,compress,point,face_idx);
 | 
					      HaloGatherDir(source,compress,point,face_idx);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    accelerator_barrier();
 | 
				
			||||||
    face_table_computed=1;
 | 
					    face_table_computed=1;
 | 
				
			||||||
    assert(u_comm_offset==_unified_buffer_size);
 | 
					    assert(u_comm_offset==_unified_buffer_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -665,11 +665,9 @@ public:
 | 
				
			|||||||
    for(int i=0;i<mm.size();i++){
 | 
					    for(int i=0;i<mm.size();i++){
 | 
				
			||||||
      decompressor::MergeFace(decompress,mm[i]);
 | 
					      decompressor::MergeFace(decompress,mm[i]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if ( mm.size() )    acceleratorFenceComputeStream();
 | 
					 | 
				
			||||||
    for(int i=0;i<dd.size();i++){
 | 
					    for(int i=0;i<dd.size();i++){
 | 
				
			||||||
      decompressor::DecompressFace(decompress,dd[i]);
 | 
					      decompressor::DecompressFace(decompress,dd[i]);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if ( dd.size() )    acceleratorFenceComputeStream();
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
  // Set up routines
 | 
					  // Set up routines
 | 
				
			||||||
@@ -707,6 +705,7 @@ public:
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  /// Introduce a block structure and switch off comms on boundaries
 | 
					  /// Introduce a block structure and switch off comms on boundaries
 | 
				
			||||||
  void DirichletBlock(const Coordinate &dirichlet_block)
 | 
					  void DirichletBlock(const Coordinate &dirichlet_block)
 | 
				
			||||||
@@ -1368,10 +1367,11 @@ public:
 | 
				
			|||||||
	    int recv_from_rank;
 | 
						    int recv_from_rank;
 | 
				
			||||||
	    int xmit_to_rank;
 | 
						    int xmit_to_rank;
 | 
				
			||||||
	    int shm_send=0;
 | 
						    int shm_send=0;
 | 
				
			||||||
	    int shm_recv=0;
 | 
					
 | 
				
			||||||
	    _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
 | 
						    _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
 | 
				
			||||||
#ifdef SHM_FAST_PATH
 | 
					#ifdef SHM_FAST_PATH
 | 
				
			||||||
  #warning STENCIL SHM FAST PATH SELECTED
 | 
					  #warning STENCIL SHM FAST PATH SELECTED
 | 
				
			||||||
 | 
					  	  int shm_recv=0;
 | 
				
			||||||
	    // shm == receive pointer         if offnode
 | 
						    // shm == receive pointer         if offnode
 | 
				
			||||||
	    // shm == Translate[send pointer] if on node -- my view of his send pointer
 | 
						    // shm == Translate[send pointer] if on node -- my view of his send pointer
 | 
				
			||||||
	    cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
 | 
						    cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
 | 
				
			||||||
@@ -1404,7 +1404,6 @@ public:
 | 
				
			|||||||
		acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
 | 
							acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
 | 
				
			||||||
	      }
 | 
						      }
 | 
				
			||||||
	      int do_send = (comms_send|comms_partial_send) && (!shm_send );
 | 
						      int do_send = (comms_send|comms_partial_send) && (!shm_send );
 | 
				
			||||||
	      int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
 | 
					 | 
				
			||||||
	      AddPacket((void *)sp,(void *)rp,
 | 
						      AddPacket((void *)sp,(void *)rp,
 | 
				
			||||||
			xmit_to_rank,do_send,
 | 
								xmit_to_rank,do_send,
 | 
				
			||||||
			recv_from_rank,do_send,
 | 
								recv_from_rank,do_send,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -55,7 +55,7 @@ template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(c
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Specialisation: Cayley-Hamilton exponential for SU(3)
 | 
					// Specialisation: Cayley-Hamilton exponential for SU(3)
 | 
				
			||||||
#ifndef GRID_ACCELERATED
 | 
					#if 0
 | 
				
			||||||
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
 | 
					template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> 
 | 
				
			||||||
accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
 | 
					accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP )
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -133,7 +133,6 @@ typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec
 | 
				
			|||||||
  typedef scalar_type * pointer;
 | 
					  typedef scalar_type * pointer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  constexpr int words=sizeof(vobj)/sizeof(vector_type);
 | 
					  constexpr int words=sizeof(vobj)/sizeof(vector_type);
 | 
				
			||||||
  constexpr int Nsimd=vector_type::Nsimd();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  scalar_object extracted;
 | 
					  scalar_object extracted;
 | 
				
			||||||
  pointer __restrict__  sp = (pointer)&extracted; // Type pun
 | 
					  pointer __restrict__  sp = (pointer)&extracted; // Type pun
 | 
				
			||||||
@@ -153,7 +152,6 @@ void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_ob
 | 
				
			|||||||
  typedef scalar_type * pointer;
 | 
					  typedef scalar_type * pointer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  constexpr int words=sizeof(vobj)/sizeof(vector_type);
 | 
					  constexpr int words=sizeof(vobj)/sizeof(vector_type);
 | 
				
			||||||
  constexpr int Nsimd=vector_type::Nsimd();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pointer __restrict__ sp = (pointer)&extracted;
 | 
					  pointer __restrict__ sp = (pointer)&extracted;
 | 
				
			||||||
  vector_type *vp = (vector_type *)&vec;
 | 
					  vector_type *vp = (vector_type *)&vec;
 | 
				
			||||||
@@ -178,8 +176,6 @@ void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int off
 | 
				
			|||||||
  const int s = Nsimd/Nextr;
 | 
					  const int s = Nsimd/Nextr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  vector_type * vp = (vector_type *)&vec;
 | 
					  vector_type * vp = (vector_type *)&vec;
 | 
				
			||||||
  scalar_type      vtmp;
 | 
					 | 
				
			||||||
  sobj_scalar_type stmp;
 | 
					 | 
				
			||||||
  for(int w=0;w<words;w++){
 | 
					  for(int w=0;w<words;w++){
 | 
				
			||||||
    for(int i=0;i<Nextr;i++){
 | 
					    for(int i=0;i<Nextr;i++){
 | 
				
			||||||
      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
 | 
					      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
 | 
				
			||||||
@@ -205,7 +201,6 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  vector_type * vp = (vector_type *)&vec;
 | 
					  vector_type * vp = (vector_type *)&vec;
 | 
				
			||||||
  scalar_type      vtmp;
 | 
					  scalar_type      vtmp;
 | 
				
			||||||
  sobj_scalar_type stmp;
 | 
					 | 
				
			||||||
  for(int w=0;w<words;w++){
 | 
					  for(int w=0;w<words;w++){
 | 
				
			||||||
    for(int i=0;i<Nextr;i++){
 | 
					    for(int i=0;i<Nextr;i++){
 | 
				
			||||||
      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
 | 
					      sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
 | 
				
			||||||
@@ -242,9 +237,6 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest
 | 
				
			|||||||
  typedef oextract_type * opointer;
 | 
					  typedef oextract_type * opointer;
 | 
				
			||||||
  typedef iextract_type * ipointer;
 | 
					  typedef iextract_type * ipointer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  constexpr int oNsimd=ovector_type::Nsimd();
 | 
					 | 
				
			||||||
  constexpr int iNsimd=ivector_type::Nsimd();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  iscalar_type itmp;
 | 
					  iscalar_type itmp;
 | 
				
			||||||
  oscalar_type otmp;
 | 
					  oscalar_type otmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -458,6 +458,7 @@ inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream);
 | 
				
			|||||||
// Common on all GPU targets
 | 
					// Common on all GPU targets
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
#if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP)
 | 
					#if defined(GRID_SYCL) || defined(GRID_CUDA) || defined(GRID_HIP)
 | 
				
			||||||
 | 
					// FIXME -- the non-blocking nature got broken March 30 2023 by PAB
 | 
				
			||||||
#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );  
 | 
					#define accelerator_forNB( iter1, num1, nsimd, ... ) accelerator_for2dNB( iter1, num1, iter2, 1, nsimd, {__VA_ARGS__} );  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define accelerator_for( iter, num, nsimd, ... )		\
 | 
					#define accelerator_for( iter, num, nsimd, ... )		\
 | 
				
			||||||
@@ -525,7 +526,7 @@ inline void acceleratorFreeCpu  (void *ptr){free(ptr);};
 | 
				
			|||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_SYCL
 | 
					#ifdef GRID_SYCL
 | 
				
			||||||
inline void acceleratorFenceComputeStream(void){ accelerator_barrier();};
 | 
					inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); };
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
// Ordering within a stream guaranteed on Nvidia & AMD
 | 
					// Ordering within a stream guaranteed on Nvidia & AMD
 | 
				
			||||||
inline void acceleratorFenceComputeStream(void){ };
 | 
					inline void acceleratorFenceComputeStream(void){ };
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										224
									
								
								HMC/FTHMC2p1f.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										224
									
								
								HMC/FTHMC2p1f.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,224 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2023
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/JacobianAction.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::cout << std::setprecision(12);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Grid_init(&argc, &argv);
 | 
				
			||||||
 | 
					  int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					  // here make a routine to print all the relevant information on the run
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   // Typedefs to simplify notation
 | 
				
			||||||
 | 
					  typedef WilsonImplR FermionImplPolicy;
 | 
				
			||||||
 | 
					  typedef MobiusFermionD FermionAction;
 | 
				
			||||||
 | 
					  typedef typename FermionAction::FermionField FermionField;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef Grid::XmlReader       Serialiser;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
				
			||||||
 | 
					  IntegratorParameters MD;
 | 
				
			||||||
 | 
					  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
 | 
				
			||||||
 | 
					  //  MD.name    = std::string("Leap Frog");
 | 
				
			||||||
 | 
					  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
 | 
				
			||||||
 | 
					  //  MD.name    = std::string("Force Gradient");
 | 
				
			||||||
 | 
					  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
 | 
				
			||||||
 | 
					  MD.name    = std::string("MinimumNorm2");
 | 
				
			||||||
 | 
					  MD.MDsteps = 12;
 | 
				
			||||||
 | 
					  MD.trajL   = 1.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  HMCparameters HMCparams;
 | 
				
			||||||
 | 
					  HMCparams.StartTrajectory  = 0;
 | 
				
			||||||
 | 
					  HMCparams.Trajectories     = 200;
 | 
				
			||||||
 | 
					  HMCparams.NoMetropolisUntil=  20;
 | 
				
			||||||
 | 
					  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
 | 
				
			||||||
 | 
					  HMCparams.StartingType     =std::string("HotStart");
 | 
				
			||||||
 | 
					  HMCparams.MD = MD;
 | 
				
			||||||
 | 
					  HMCWrapper TheHMC(HMCparams);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Grid from the command line arguments --grid and --mpi
 | 
				
			||||||
 | 
					  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CheckpointerParameters CPparams;
 | 
				
			||||||
 | 
					  CPparams.config_prefix = "ckpoint_EODWF_lat";
 | 
				
			||||||
 | 
					  CPparams.smeared_prefix = "ckpoint_EODWF_lat_smr";
 | 
				
			||||||
 | 
					  CPparams.rng_prefix    = "ckpoint_EODWF_rng";
 | 
				
			||||||
 | 
					  CPparams.saveInterval  = 1;
 | 
				
			||||||
 | 
					  CPparams.saveSmeared   = true;
 | 
				
			||||||
 | 
					  CPparams.format        = "IEEE64BIG";
 | 
				
			||||||
 | 
					  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RNGModuleParameters RNGpar;
 | 
				
			||||||
 | 
					  RNGpar.serial_seeds = "1 2 3 4 5";
 | 
				
			||||||
 | 
					  RNGpar.parallel_seeds = "6 7 8 9 10";
 | 
				
			||||||
 | 
					  TheHMC.Resources.SetRNGSeeds(RNGpar);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Construct observables
 | 
				
			||||||
 | 
					  // here there is too much indirection
 | 
				
			||||||
 | 
					  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
 | 
				
			||||||
 | 
					  TheHMC.Resources.AddObservable<PlaqObs>();
 | 
				
			||||||
 | 
					  //////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Ls      = 16;
 | 
				
			||||||
 | 
					  Real beta         = 2.13;
 | 
				
			||||||
 | 
					  Real light_mass   = 0.01;
 | 
				
			||||||
 | 
					  Real strange_mass = 0.04;
 | 
				
			||||||
 | 
					  Real pv_mass      = 1.0;
 | 
				
			||||||
 | 
					  RealD M5  = 1.8;
 | 
				
			||||||
 | 
					  RealD b   = 1.0; // Scale factor two
 | 
				
			||||||
 | 
					  RealD c   = 0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  OneFlavourRationalParams OFRp;
 | 
				
			||||||
 | 
					  OFRp.lo       = 1.0e-2;
 | 
				
			||||||
 | 
					  OFRp.hi       = 64;
 | 
				
			||||||
 | 
					  OFRp.MaxIter  = 10000;
 | 
				
			||||||
 | 
					  OFRp.tolerance= 1.0e-10;
 | 
				
			||||||
 | 
					  OFRp.degree   = 14;
 | 
				
			||||||
 | 
					  OFRp.precision= 40;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<Real> hasenbusch({ 0.1 });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto GridPtr   = TheHMC.Resources.GetCartesian();
 | 
				
			||||||
 | 
					  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
 | 
				
			||||||
 | 
					  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
 | 
				
			||||||
 | 
					  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  IwasakiGaugeActionR GaugeAction(beta);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // temporarily need a gauge field
 | 
				
			||||||
 | 
					  LatticeGaugeField U(GridPtr);
 | 
				
			||||||
 | 
					  LatticeGaugeField Uhot(GridPtr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // These lines are unecessary if BC are all periodic
 | 
				
			||||||
 | 
					  std::vector<Complex> boundary = {1,1,1,-1};
 | 
				
			||||||
 | 
					  FermionAction::ImplParams Params(boundary);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  double StoppingCondition = 1e-10;
 | 
				
			||||||
 | 
					  double MaxCGIterations = 30000;
 | 
				
			||||||
 | 
					  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bool ApplySmearing = true;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  // Collect actions
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  ActionLevel<HMCWrapper::Field> Level1(1);
 | 
				
			||||||
 | 
					  ActionLevel<HMCWrapper::Field> Level2(2);
 | 
				
			||||||
 | 
					  ActionLevel<HMCWrapper::Field> Level3(4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  // Strange action
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
 | 
				
			||||||
 | 
					  MobiusEOFAFermionD Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
 | 
				
			||||||
 | 
					  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
 | 
				
			||||||
 | 
					    EOFA(Strange_Op_L, Strange_Op_R, 
 | 
				
			||||||
 | 
						 CG,
 | 
				
			||||||
 | 
						 CG, CG,
 | 
				
			||||||
 | 
						 CG, CG, 
 | 
				
			||||||
 | 
						 OFRp, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  EOFA.is_smeared = ApplySmearing;
 | 
				
			||||||
 | 
					  Level1.push_back(&EOFA);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  // up down action
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  std::vector<Real> light_den;
 | 
				
			||||||
 | 
					  std::vector<Real> light_num;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int n_hasenbusch = hasenbusch.size();
 | 
				
			||||||
 | 
					  light_den.push_back(light_mass);
 | 
				
			||||||
 | 
					  for(int h=0;h<n_hasenbusch;h++){
 | 
				
			||||||
 | 
					    light_den.push_back(hasenbusch[h]);
 | 
				
			||||||
 | 
					    light_num.push_back(hasenbusch[h]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  light_num.push_back(pv_mass);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<FermionAction *> Numerators;
 | 
				
			||||||
 | 
					  std::vector<FermionAction *> Denominators;
 | 
				
			||||||
 | 
					  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int h=0;h<n_hasenbusch+1;h++){
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
 | 
				
			||||||
 | 
					    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
 | 
				
			||||||
 | 
					    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
 | 
				
			||||||
 | 
					    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],CG,CG));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int h=0;h<n_hasenbusch+1;h++){
 | 
				
			||||||
 | 
					    Quotients[h]->is_smeared = ApplySmearing;
 | 
				
			||||||
 | 
					    Level1.push_back(Quotients[h]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // lnDetJacobianAction
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  double rho = 0.1;  // smearing parameter
 | 
				
			||||||
 | 
					  int Nsmear = 1;    // number of smearing levels - must be multiple of 2Nd
 | 
				
			||||||
 | 
					  int Nstep  = 8*Nsmear;    // number of smearing levels - must be multiple of 2Nd
 | 
				
			||||||
 | 
					  Smear_Stout<HMCWrapper::ImplPolicy> Stout(rho);
 | 
				
			||||||
 | 
					  SmearedConfigurationMasked<HMCWrapper::ImplPolicy> SmearingPolicy(GridPtr, Nstep, Stout);
 | 
				
			||||||
 | 
					  JacobianAction<HMCWrapper::ImplPolicy> Jacobian(&SmearingPolicy);
 | 
				
			||||||
 | 
					  if( ApplySmearing ) Level2.push_back(&Jacobian);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Built the Jacobian "<< std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Gauge action
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  //  GaugeAction.is_smeared = ApplySmearing;
 | 
				
			||||||
 | 
					  GaugeAction.is_smeared = true;
 | 
				
			||||||
 | 
					  Level3.push_back(&GaugeAction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " ************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Action complete -- NO FERMIONS FOR NOW -- FIXME"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " ************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage <<  std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage <<  std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Running the FT HMC "<< std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TheHMC.TheAction.push_back(Level1);
 | 
				
			||||||
 | 
					  TheHMC.TheAction.push_back(Level2);
 | 
				
			||||||
 | 
					  TheHMC.TheAction.push_back(Level3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TheHMC.Run(SmearingPolicy); // for smearing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					} // main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -227,7 +227,7 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  //  std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
 | 
					  //  std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
 | 
				
			||||||
  //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
 | 
					  //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int SP_iters=10000;
 | 
					  int SP_iters=9000;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  RationalActionParams OFRp; // Up/down
 | 
					  RationalActionParams OFRp; // Up/down
 | 
				
			||||||
  OFRp.lo       = 6.0e-5;
 | 
					  OFRp.lo       = 6.0e-5;
 | 
				
			||||||
@@ -362,12 +362,12 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  // Probably dominates the force - back to EOFA.
 | 
					  // Probably dominates the force - back to EOFA.
 | 
				
			||||||
  OneFlavourRationalParams SFRp;
 | 
					  OneFlavourRationalParams SFRp;
 | 
				
			||||||
  SFRp.lo       = 0.25;
 | 
					  SFRp.lo       = 0.1;
 | 
				
			||||||
  SFRp.hi       = 25.0;
 | 
					  SFRp.hi       = 25.0;
 | 
				
			||||||
  SFRp.MaxIter  = 10000;
 | 
					  SFRp.MaxIter  = 10000;
 | 
				
			||||||
  SFRp.tolerance= 1.0e-5;
 | 
					  SFRp.tolerance= 1.0e-8;
 | 
				
			||||||
  SFRp.mdtolerance= 2.0e-4;
 | 
					  SFRp.mdtolerance= 2.0e-4;
 | 
				
			||||||
  SFRp.degree   = 8;
 | 
					  SFRp.degree   = 12;
 | 
				
			||||||
  SFRp.precision= 50;
 | 
					  SFRp.precision= 50;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
 | 
					  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
 | 
				
			||||||
@@ -451,7 +451,7 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  
 | 
					  
 | 
				
			||||||
#define MIXED_PRECISION
 | 
					#define MIXED_PRECISION
 | 
				
			||||||
#ifdef MIXED_PRECISION
 | 
					#ifdef MIXED_PRECISION
 | 
				
			||||||
  std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy> *> Bdys;
 | 
					  std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
  std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
 | 
					  std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -526,15 +526,13 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
      Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
 | 
					      Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
#ifdef MIXED_PRECISION
 | 
					#ifdef MIXED_PRECISION
 | 
				
			||||||
      Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
 | 
					      Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
 | 
				
			||||||
			   *Numerators[h],*Denominators[h],
 | 
								   *Numerators[h],*Denominators[h],
 | 
				
			||||||
			   *NumeratorsF[h],*DenominatorsF[h],
 | 
								   *NumeratorsF[h],*DenominatorsF[h],
 | 
				
			||||||
			   *Numerators[h],*Denominators[h],
 | 
					 | 
				
			||||||
			   OFRp, SP_iters) );
 | 
								   OFRp, SP_iters) );
 | 
				
			||||||
      Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
 | 
					      Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
 | 
				
			||||||
			   *Numerators[h],*Denominators[h],
 | 
								   *Numerators[h],*Denominators[h],
 | 
				
			||||||
			   *NumeratorsF[h],*DenominatorsF[h],
 | 
								   *NumeratorsF[h],*DenominatorsF[h],
 | 
				
			||||||
			   *Numerators[h],*Denominators[h],
 | 
					 | 
				
			||||||
			   OFRp, SP_iters) );
 | 
								   OFRp, SP_iters) );
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
      Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
 | 
					      Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -329,7 +329,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    auto grid4= GridPtr;
 | 
					    auto grid4= GridPtr;
 | 
				
			||||||
    auto rbgrid4= GridRBPtr;
 | 
					 | 
				
			||||||
    auto rbgrid = StrangeOp.FermionRedBlackGrid();
 | 
					    auto rbgrid = StrangeOp.FermionRedBlackGrid();
 | 
				
			||||||
    auto grid = StrangeOp.FermionGrid();
 | 
					    auto grid = StrangeOp.FermionGrid();
 | 
				
			||||||
    if(1){
 | 
					    if(1){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -146,6 +146,8 @@ NAMESPACE_END(Grid);
 | 
				
			|||||||
int main(int argc, char **argv) {
 | 
					int main(int argc, char **argv) {
 | 
				
			||||||
  using namespace Grid;
 | 
					  using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << " Grid Initialise "<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  Grid_init(&argc, &argv);
 | 
					  Grid_init(&argc, &argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  CartesianCommunicator::BarrierWorld();
 | 
					  CartesianCommunicator::BarrierWorld();
 | 
				
			||||||
@@ -164,35 +166,30 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  typedef MobiusEOFAFermionF FermionEOFAActionF;
 | 
					  typedef MobiusEOFAFermionF FermionEOFAActionF;
 | 
				
			||||||
  typedef typename FermionActionF::FermionField FermionFieldF;
 | 
					  typedef typename FermionActionF::FermionField FermionFieldF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef WilsonImplD2 FermionImplPolicyD2;
 | 
					 | 
				
			||||||
  typedef MobiusFermionD2 FermionActionD2;
 | 
					 | 
				
			||||||
  typedef MobiusEOFAFermionD2 FermionEOFAActionD2;
 | 
					 | 
				
			||||||
  typedef typename FermionActionD2::FermionField FermionFieldD2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  typedef Grid::XmlReader       Serialiser;
 | 
					  typedef Grid::XmlReader       Serialiser;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
					  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 | 
				
			||||||
  IntegratorParameters MD;
 | 
					  IntegratorParameters MD;
 | 
				
			||||||
  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
 | 
					  //  typedef GenericHMCRunner<LeapFrog> HMCWrapper;
 | 
				
			||||||
  //  MD.name    = std::string("Leap Frog");
 | 
					  //  MD.name    = std::string("Leap Frog");
 | 
				
			||||||
  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
 | 
					  //  typedef GenericHMCRunner<ForceGradient> HMCWrapper;
 | 
				
			||||||
  MD.name    = std::string("Force Gradient");
 | 
					  //  MD.name    = std::string("Force Gradient");
 | 
				
			||||||
  //typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
 | 
					  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
 | 
				
			||||||
  // MD.name    = std::string("MinimumNorm2");
 | 
					  MD.name    = std::string("MinimumNorm2");
 | 
				
			||||||
  // TrajL = 2
 | 
					  // TrajL = 2
 | 
				
			||||||
  // 4/2 => 0.6 dH
 | 
					  // 4/2 => 0.6 dH
 | 
				
			||||||
  // 3/3 => 0.8 dH .. depth 3, slower
 | 
					  // 3/3 => 0.8 dH .. depth 3, slower
 | 
				
			||||||
  //MD.MDsteps =  4;
 | 
					  //MD.MDsteps =  4;
 | 
				
			||||||
  MD.MDsteps =  12;
 | 
					  MD.MDsteps =  14;
 | 
				
			||||||
  MD.trajL   = 0.5;
 | 
					  MD.trajL   = 0.5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  HMCparameters HMCparams;
 | 
					  HMCparameters HMCparams;
 | 
				
			||||||
  HMCparams.StartTrajectory  = 1077;
 | 
					  HMCparams.StartTrajectory  = 1077;
 | 
				
			||||||
  HMCparams.Trajectories     = 1;
 | 
					  HMCparams.Trajectories     = 20;
 | 
				
			||||||
  HMCparams.NoMetropolisUntil=  0;
 | 
					  HMCparams.NoMetropolisUntil=  0;
 | 
				
			||||||
  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
 | 
					  // "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
 | 
				
			||||||
  //  HMCparams.StartingType     =std::string("ColdStart");
 | 
					  HMCparams.StartingType     =std::string("ColdStart");
 | 
				
			||||||
  HMCparams.StartingType     =std::string("CheckpointStart");
 | 
					  //  HMCparams.StartingType     =std::string("CheckpointStart");
 | 
				
			||||||
  HMCparams.MD = MD;
 | 
					  HMCparams.MD = MD;
 | 
				
			||||||
  HMCWrapper TheHMC(HMCparams);
 | 
					  HMCWrapper TheHMC(HMCparams);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -228,7 +225,7 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  Real pv_mass      = 1.0;
 | 
					  Real pv_mass      = 1.0;
 | 
				
			||||||
  //  std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
 | 
					  //  std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
 | 
				
			||||||
  //  std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
 | 
					  //  std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
 | 
				
			||||||
  std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
 | 
					  std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 }); // Updated
 | 
				
			||||||
  //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
 | 
					  //  std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  auto GridPtr   = TheHMC.Resources.GetCartesian();
 | 
					  auto GridPtr   = TheHMC.Resources.GetCartesian();
 | 
				
			||||||
@@ -250,11 +247,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  GlobalSharedMemory::GetShmDims(mpi,shm);
 | 
					  GlobalSharedMemory::GetShmDims(mpi,shm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Coordinate CommDim(Nd);
 | 
					 | 
				
			||||||
  for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Coordinate NonDirichlet(Nd+1,0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //////////////////////////
 | 
					  //////////////////////////
 | 
				
			||||||
  // Fermion Grids
 | 
					  // Fermion Grids
 | 
				
			||||||
  //////////////////////////
 | 
					  //////////////////////////
 | 
				
			||||||
@@ -272,7 +264,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  // temporarily need a gauge field
 | 
					  // temporarily need a gauge field
 | 
				
			||||||
  LatticeGaugeFieldD  U(GridPtr); U=Zero();
 | 
					  LatticeGaugeFieldD  U(GridPtr); U=Zero();
 | 
				
			||||||
  LatticeGaugeFieldF  UF(GridPtrF); UF=Zero();
 | 
					  LatticeGaugeFieldF  UF(GridPtrF); UF=Zero();
 | 
				
			||||||
  LatticeGaugeFieldD2 UD2(GridPtrF); UD2=Zero();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
 | 
					  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
 | 
				
			||||||
  TheHMC.ReadCommandLine(argc,argv);  // params on CML or from param file
 | 
					  TheHMC.ReadCommandLine(argc,argv);  // params on CML or from param file
 | 
				
			||||||
@@ -283,15 +274,13 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  std::vector<Complex> boundary = {1,1,1,-1};
 | 
					  std::vector<Complex> boundary = {1,1,1,-1};
 | 
				
			||||||
  FermionAction::ImplParams Params(boundary);
 | 
					  FermionAction::ImplParams Params(boundary);
 | 
				
			||||||
  FermionActionF::ImplParams ParamsF(boundary);
 | 
					  FermionActionF::ImplParams ParamsF(boundary);
 | 
				
			||||||
  Params.dirichlet=NonDirichlet;
 | 
					 | 
				
			||||||
  ParamsF.dirichlet=NonDirichlet;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //  double StoppingCondition = 1e-14;
 | 
					  //  double StoppingCondition = 1e-14;
 | 
				
			||||||
  //  double MDStoppingCondition = 1e-9;
 | 
					  //  double MDStoppingCondition = 1e-9;
 | 
				
			||||||
  double StoppingCondition = 1e-8;
 | 
					  double StoppingCondition = 1e-9;
 | 
				
			||||||
  double MDStoppingCondition = 1e-7;
 | 
					  double MDStoppingCondition = 1e-8;
 | 
				
			||||||
  double MDStoppingConditionLoose = 1e-7;
 | 
					  double MDStoppingConditionLoose = 1e-8;
 | 
				
			||||||
  double MDStoppingConditionStrange = 1e-7;
 | 
					  double MDStoppingConditionStrange = 1e-8;
 | 
				
			||||||
  double MaxCGIterations = 300000;
 | 
					  double MaxCGIterations = 300000;
 | 
				
			||||||
  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
 | 
					  ConjugateGradient<FermionField>  CG(StoppingCondition,MaxCGIterations);
 | 
				
			||||||
  ConjugateGradient<FermionField>  MDCG(MDStoppingCondition,MaxCGIterations);
 | 
					  ConjugateGradient<FermionField>  MDCG(MDStoppingCondition,MaxCGIterations);
 | 
				
			||||||
@@ -311,12 +300,12 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  // Probably dominates the force - back to EOFA.
 | 
					  // Probably dominates the force - back to EOFA.
 | 
				
			||||||
  OneFlavourRationalParams SFRp;
 | 
					  OneFlavourRationalParams SFRp;
 | 
				
			||||||
  SFRp.lo       = 0.25;
 | 
					  SFRp.lo       = 0.1;
 | 
				
			||||||
  SFRp.hi       = 25.0;
 | 
					  SFRp.hi       = 30.0;
 | 
				
			||||||
  SFRp.MaxIter  = 10000;
 | 
					  SFRp.MaxIter  = 10000;
 | 
				
			||||||
  SFRp.tolerance= 1.0e-5;
 | 
					  SFRp.tolerance= 1.0e-8;
 | 
				
			||||||
  SFRp.mdtolerance= 2.0e-4;
 | 
					  SFRp.mdtolerance= 2.0e-6;
 | 
				
			||||||
  SFRp.degree   = 8;
 | 
					  SFRp.degree   = 10;
 | 
				
			||||||
  SFRp.precision= 50;
 | 
					  SFRp.precision= 50;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
 | 
					  MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
 | 
				
			||||||
@@ -376,33 +365,29 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  ////////////////////////////////////
 | 
					  ////////////////////////////////////
 | 
				
			||||||
  std::vector<Real> light_den;
 | 
					  std::vector<Real> light_den;
 | 
				
			||||||
  std::vector<Real> light_num;
 | 
					  std::vector<Real> light_num;
 | 
				
			||||||
  std::vector<int> dirichlet_den;
 | 
					 | 
				
			||||||
  std::vector<int> dirichlet_num;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int n_hasenbusch = hasenbusch.size();
 | 
					  int n_hasenbusch = hasenbusch.size();
 | 
				
			||||||
  light_den.push_back(light_mass);  dirichlet_den.push_back(0);
 | 
					  light_den.push_back(light_mass); 
 | 
				
			||||||
  for(int h=0;h<n_hasenbusch;h++){
 | 
					  for(int h=0;h<n_hasenbusch;h++){
 | 
				
			||||||
    light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(0);
 | 
					    light_den.push_back(hasenbusch[h]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for(int h=0;h<n_hasenbusch;h++){
 | 
					  for(int h=0;h<n_hasenbusch;h++){
 | 
				
			||||||
    light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(0);
 | 
					    light_num.push_back(hasenbusch[h]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  light_num.push_back(pv_mass);  dirichlet_num.push_back(0);
 | 
					  light_num.push_back(pv_mass);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::vector<FermionAction *> Numerators;
 | 
					  std::vector<FermionAction *> Numerators;
 | 
				
			||||||
  std::vector<FermionAction *> Denominators;
 | 
					  std::vector<FermionAction *> Denominators;
 | 
				
			||||||
  std::vector<FermionActionF *> NumeratorsF;
 | 
					  std::vector<FermionActionF *> NumeratorsF;
 | 
				
			||||||
  std::vector<FermionActionF *> DenominatorsF;
 | 
					  std::vector<FermionActionF *> DenominatorsF;
 | 
				
			||||||
  std::vector<FermionActionD2 *> NumeratorsD2;
 | 
					 | 
				
			||||||
  std::vector<FermionActionD2 *> DenominatorsD2;
 | 
					 | 
				
			||||||
  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
 | 
					  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
 | 
				
			||||||
  std::vector<MxPCG *> ActionMPCG;
 | 
					  std::vector<MxPCG *> ActionMPCG;
 | 
				
			||||||
  std::vector<MxPCG *> MPCG;
 | 
					  std::vector<MxPCG *> MPCG;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
#define MIXED_PRECISION
 | 
					#define MIXED_PRECISION
 | 
				
			||||||
#ifdef MIXED_PRECISION
 | 
					#ifdef MIXED_PRECISION
 | 
				
			||||||
  std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2> *> Bdys;
 | 
					  std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
  std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
 | 
					  std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -416,9 +401,7 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
    std::cout << GridLogMessage
 | 
					    std::cout << GridLogMessage
 | 
				
			||||||
	      << " 2f quotient Action ";
 | 
						      << " 2f quotient Action ";
 | 
				
			||||||
    std::cout << "det D("<<light_den[h]<<")";
 | 
					    std::cout << "det D("<<light_den[h]<<")";
 | 
				
			||||||
    if ( dirichlet_den[h] ) std::cout << "^dirichlet    ";
 | 
					 | 
				
			||||||
    std::cout << "/ det D("<<light_num[h]<<")";
 | 
					    std::cout << "/ det D("<<light_num[h]<<")";
 | 
				
			||||||
    if ( dirichlet_num[h] ) std::cout << "^dirichlet    ";
 | 
					 | 
				
			||||||
    std::cout << std::endl;
 | 
					    std::cout << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    FermionAction::ImplParams ParamsNum(boundary);
 | 
					    FermionAction::ImplParams ParamsNum(boundary);
 | 
				
			||||||
@@ -426,21 +409,11 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
    FermionActionF::ImplParams ParamsDenF(boundary);
 | 
					    FermionActionF::ImplParams ParamsDenF(boundary);
 | 
				
			||||||
    FermionActionF::ImplParams ParamsNumF(boundary);
 | 
					    FermionActionF::ImplParams ParamsNumF(boundary);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    ParamsNum.dirichlet = NonDirichlet;
 | 
					 | 
				
			||||||
    ParamsDen.dirichlet = NonDirichlet;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ParamsNum.partialDirichlet = 0;
 | 
					 | 
				
			||||||
    ParamsDen.partialDirichlet = 0;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
 | 
					    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
 | 
				
			||||||
    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
 | 
					    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ParamsDenF.dirichlet = ParamsDen.dirichlet;
 | 
					 | 
				
			||||||
    ParamsDenF.partialDirichlet = ParamsDen.partialDirichlet;
 | 
					 | 
				
			||||||
    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
 | 
					    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ParamsNumF.dirichlet = ParamsNum.dirichlet;
 | 
					 | 
				
			||||||
    ParamsNumF.partialDirichlet = ParamsNum.partialDirichlet;
 | 
					 | 
				
			||||||
    NumeratorsF.push_back  (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
 | 
					    NumeratorsF.push_back  (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
 | 
					    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
 | 
				
			||||||
@@ -477,7 +450,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
  // Gauge action
 | 
					  // Gauge action
 | 
				
			||||||
  /////////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////////
 | 
				
			||||||
  Level3.push_back(&GaugeAction);
 | 
					  Level3.push_back(&GaugeAction);
 | 
				
			||||||
  //  TheHMC.TheAction.push_back(Level1);
 | 
					 | 
				
			||||||
  TheHMC.TheAction.push_back(Level2);
 | 
					  TheHMC.TheAction.push_back(Level2);
 | 
				
			||||||
  TheHMC.TheAction.push_back(Level3);
 | 
					  TheHMC.TheAction.push_back(Level3);
 | 
				
			||||||
  std::cout << GridLogMessage << " Action complete "<< std::endl;
 | 
					  std::cout << GridLogMessage << " Action complete "<< std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,8 @@
 | 
				
			|||||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) 
 | 
					# Grid 
 | 
				
			||||||
 | 
					 | 
				
			||||||
**Data parallel C++ mathematical object library.**
 | 
					**Data parallel C++ mathematical object library.**
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[),branch:default:true)/statusIcon.svg)](https://ci.dev.dirac.ed.ac.uk/project/GridBasedSoftware_Grid?mode=builds) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
License: GPL v2.
 | 
					License: GPL v2.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Last update June 2017.
 | 
					Last update June 2017.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  err = r_eo-result;
 | 
					  err = r_eo-result;
 | 
				
			||||||
  n2e= norm2(err);
 | 
					  n2e= norm2(err);
 | 
				
			||||||
  std::cout<<GridLogMessage << "norm diff   "<< n2e<< "  Line "<<__LINE__ <<std::endl;
 | 
					  std::cout<<GridLogMessage << "norm diff   "<< n2e<<std::endl;
 | 
				
			||||||
  assert(n2e<1.0e-4);
 | 
					  assert(n2e<1.0e-4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pickCheckerboard(Even,src_e,err);
 | 
					  pickCheckerboard(Even,src_e,err);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										90
									
								
								documentation/David_notes.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								documentation/David_notes.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,90 @@
 | 
				
			|||||||
 | 
					Branch: develop
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Files:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid/lattice/PaddedCell.h -- Halo exchange
 | 
				
			||||||
 | 
					tests/Test_general_stencil.cc -- test local off axis stencil addressing
 | 
				
			||||||
 | 
					tests/debug/Test_padded_cell.cc -- test PaddedCell halo exchange and the General local stencil  by computing ALL plaquettes on lattice
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Functionality:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- extend a lattice field:
 | 
				
			||||||
 | 
					Grid/lattice/PaddedCell.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Constructor
 | 
				
			||||||
 | 
					  PaddedCell(int _depth,GridCartesian *_grid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Expand a field "in" to depth "d"
 | 
				
			||||||
 | 
					  template<class vobj>
 | 
				
			||||||
 | 
					  inline Lattice<vobj> Exchange(Lattice<vobj> &in)
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					// Take the "apple core" of in to a smaller local volume
 | 
				
			||||||
 | 
					  template<class vobj>
 | 
				
			||||||
 | 
					  inline Lattice<vobj> Extract(Lattice<vobj> &in)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Plaquette test:
 | 
				
			||||||
 | 
					tests/debug/Test_padded_cell.cc
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Create a padded cell of extra padding depth=1
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int depth = 1;
 | 
				
			||||||
 | 
					  PaddedCell Ghost(depth,&GRID);
 | 
				
			||||||
 | 
					  LatticeGaugeField Ughost = Ghost.Exchange(Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///// Array for the site plaquette
 | 
				
			||||||
 | 
					  GridBase *GhostGrid = Ughost.Grid();
 | 
				
			||||||
 | 
					  LatticeComplex gplaq(GhostGrid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<Coordinate> shifts;
 | 
				
			||||||
 | 
					  for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					    for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					      //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x)
 | 
				
			||||||
 | 
					      Coordinate shift_0(Nd,0);
 | 
				
			||||||
 | 
					      Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
 | 
				
			||||||
 | 
					      Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
 | 
				
			||||||
 | 
					      shifts.push_back(shift_0);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_mu);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_nu);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  GeneralLocalStencil gStencil(GhostGrid,shifts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  gplaq=Zero();
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView( gp_v , gplaq, CpuWrite);
 | 
				
			||||||
 | 
					    autoView( t_v , trplaq, CpuRead);
 | 
				
			||||||
 | 
					    autoView( U_v , Ughost, CpuRead);
 | 
				
			||||||
 | 
					    for(int ss=0;ss<gp_v.size();ss++){
 | 
				
			||||||
 | 
					      int s=0;
 | 
				
			||||||
 | 
					      for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
						for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  auto SE0 = gStencil.GetEntry(s+0,ss);
 | 
				
			||||||
 | 
						  auto SE1 = gStencil.GetEntry(s+1,ss);
 | 
				
			||||||
 | 
						  auto SE2 = gStencil.GetEntry(s+2,ss);
 | 
				
			||||||
 | 
						  auto SE3 = gStencil.GetEntry(s+3,ss);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						  int o0 = SE0->_offset;
 | 
				
			||||||
 | 
						  int o1 = SE1->_offset;
 | 
				
			||||||
 | 
						  int o2 = SE2->_offset;
 | 
				
			||||||
 | 
						  int o3 = SE3->_offset;
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						  auto U0 = U_v[o0](mu);
 | 
				
			||||||
 | 
						  auto U1 = U_v[o1](nu);
 | 
				
			||||||
 | 
						  auto U2 = adj(U_v[o2](mu));
 | 
				
			||||||
 | 
						  auto U3 = adj(U_v[o3](nu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  gpermute(U0,SE0->_permute);
 | 
				
			||||||
 | 
						  gpermute(U1,SE1->_permute);
 | 
				
			||||||
 | 
						  gpermute(U2,SE2->_permute);
 | 
				
			||||||
 | 
						  gpermute(U3,SE3->_permute);
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						  gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 );
 | 
				
			||||||
 | 
						  s=s+4;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  cplaq = Ghost.Extract(gplaq);
 | 
				
			||||||
							
								
								
									
										133
									
								
								examples/socket_grid.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								examples/socket_grid.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,133 @@
 | 
				
			|||||||
 | 
					#include <sys/socket.h>
 | 
				
			||||||
 | 
					#include <sys/un.h>
 | 
				
			||||||
 | 
					#include <unistd.h>
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <err.h>
 | 
				
			||||||
 | 
					#include <fcntl.h>
 | 
				
			||||||
 | 
					#include <assert.h>
 | 
				
			||||||
 | 
					#include <string.h>
 | 
				
			||||||
 | 
					#include <stdlib.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int sock;
 | 
				
			||||||
 | 
					static const char *sock_path_fmt = "/tmp/GridUnixSocket.%d";
 | 
				
			||||||
 | 
					static char sock_path[256];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class UnixSockets {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  static void Open(int rank)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int errnum;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sock = socket(AF_UNIX, SOCK_DGRAM, 0);  assert(sock>0);
 | 
				
			||||||
 | 
					    printf("allocated socket %d\n",sock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    struct sockaddr_un sa_un = { 0 };
 | 
				
			||||||
 | 
					    sa_un.sun_family = AF_UNIX;
 | 
				
			||||||
 | 
					    snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,rank);
 | 
				
			||||||
 | 
					    unlink(sa_un.sun_path);
 | 
				
			||||||
 | 
					    if (bind(sock, (struct sockaddr *)&sa_un, sizeof(sa_un))) {
 | 
				
			||||||
 | 
					      perror("bind failure");
 | 
				
			||||||
 | 
					      exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    printf("bound socket %d to %s\n",sock,sa_un.sun_path);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static int RecvFileDescriptor(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int n;
 | 
				
			||||||
 | 
					    int fd;
 | 
				
			||||||
 | 
					    char buf[1];
 | 
				
			||||||
 | 
					    struct iovec iov;
 | 
				
			||||||
 | 
					    struct msghdr msg;
 | 
				
			||||||
 | 
					    struct cmsghdr *cmsg;
 | 
				
			||||||
 | 
					    char cms[CMSG_SPACE(sizeof(int))];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    iov.iov_base = buf;
 | 
				
			||||||
 | 
					    iov.iov_len = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    memset(&msg, 0, sizeof msg);
 | 
				
			||||||
 | 
					    msg.msg_name = 0;
 | 
				
			||||||
 | 
					    msg.msg_namelen = 0;
 | 
				
			||||||
 | 
					    msg.msg_iov = &iov;
 | 
				
			||||||
 | 
					    msg.msg_iovlen = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg.msg_control = (caddr_t)cms;
 | 
				
			||||||
 | 
					    msg.msg_controllen = sizeof cms;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if((n=recvmsg(sock, &msg, 0)) < 0) {
 | 
				
			||||||
 | 
					      perror("recvmsg failed");
 | 
				
			||||||
 | 
					      return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if(n == 0){
 | 
				
			||||||
 | 
					      perror("recvmsg returned 0");
 | 
				
			||||||
 | 
					      return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    cmsg = CMSG_FIRSTHDR(&msg);
 | 
				
			||||||
 | 
					    memmove(&fd, CMSG_DATA(cmsg), sizeof(int));
 | 
				
			||||||
 | 
					    printf("received fd %d from socket %d\n",fd,sock);
 | 
				
			||||||
 | 
					    return fd;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void SendFileDescriptor(int fildes,int xmit_to_rank)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    struct msghdr msg;
 | 
				
			||||||
 | 
					    struct iovec iov;
 | 
				
			||||||
 | 
					    struct cmsghdr *cmsg = NULL;
 | 
				
			||||||
 | 
					    char ctrl[CMSG_SPACE(sizeof(int))];
 | 
				
			||||||
 | 
					    char data = ' ';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    memset(&msg, 0, sizeof(struct msghdr));
 | 
				
			||||||
 | 
					    memset(ctrl, 0, CMSG_SPACE(sizeof(int)));
 | 
				
			||||||
 | 
					    iov.iov_base = &data;
 | 
				
			||||||
 | 
					    iov.iov_len = sizeof(data);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    sprintf(sock_path,sock_path_fmt,xmit_to_rank);
 | 
				
			||||||
 | 
					    printf("sending FD %d over socket %d to rank %d AF_UNIX path %s\n",fildes,sock,xmit_to_rank,sock_path);fflush(stdout);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    struct sockaddr_un sa_un = { 0 };
 | 
				
			||||||
 | 
					    sa_un.sun_family = AF_UNIX;
 | 
				
			||||||
 | 
					    snprintf(sa_un.sun_path, sizeof(sa_un.sun_path),sock_path_fmt,xmit_to_rank);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg.msg_name = (void *)&sa_un;
 | 
				
			||||||
 | 
					    msg.msg_namelen = sizeof(sa_un);
 | 
				
			||||||
 | 
					    msg.msg_iov = &iov;
 | 
				
			||||||
 | 
					    msg.msg_iovlen = 1;
 | 
				
			||||||
 | 
					    msg.msg_controllen =  CMSG_SPACE(sizeof(int));
 | 
				
			||||||
 | 
					    msg.msg_control = ctrl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cmsg = CMSG_FIRSTHDR(&msg);
 | 
				
			||||||
 | 
					    cmsg->cmsg_level = SOL_SOCKET;
 | 
				
			||||||
 | 
					    cmsg->cmsg_type = SCM_RIGHTS;
 | 
				
			||||||
 | 
					    cmsg->cmsg_len = CMSG_LEN(sizeof(int));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    *((int *) CMSG_DATA(cmsg)) = fildes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if ( sendmsg(sock, &msg, 0) == -1 ) perror("sendmsg failed");
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main(int argc, char **argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int me = fork()?0:1;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  UnixSockets::Open(me);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // need MPI barrier
 | 
				
			||||||
 | 
					  sleep(10);
 | 
				
			||||||
 | 
					  const char * message = "Hello, World\n";
 | 
				
			||||||
 | 
					  if( me ) {
 | 
				
			||||||
 | 
					    int fd = open("foo",O_RDWR|O_CREAT,0666);
 | 
				
			||||||
 | 
					    if ( fd < 0 ) {
 | 
				
			||||||
 | 
					      perror("failed to open file");
 | 
				
			||||||
 | 
					      exit(EXIT_FAILURE);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // rank 1 sends ot rank 0
 | 
				
			||||||
 | 
					    UnixSockets::SendFileDescriptor(fd,0);
 | 
				
			||||||
 | 
					    close(fd);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    // rank 0 sends receives frmo rank 1
 | 
				
			||||||
 | 
					    int fd = UnixSockets::RecvFileDescriptor();
 | 
				
			||||||
 | 
					    write(fd,(const void *)message,strlen(message));
 | 
				
			||||||
 | 
					    close(fd);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -60,7 +60,7 @@ while test $# -gt 0; do
 | 
				
			|||||||
    ;;
 | 
					    ;;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    --cxxflags)
 | 
					    --cxxflags)
 | 
				
			||||||
      echo @GRID_CXXFLAGS@
 | 
					      echo @GRID_CXXFLAGS@ -I@prefix@/include
 | 
				
			||||||
    ;;
 | 
					    ;;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    --cxx)
 | 
					    --cxx)
 | 
				
			||||||
@@ -72,11 +72,11 @@ while test $# -gt 0; do
 | 
				
			|||||||
    ;;
 | 
					    ;;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    --ldflags)
 | 
					    --ldflags)
 | 
				
			||||||
      echo @GRID_LDFLAGS@
 | 
					      echo @GRID_LDFLAGS@ -L@prefix@/lib
 | 
				
			||||||
    ;;
 | 
					    ;;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    --libs)
 | 
					    --libs)
 | 
				
			||||||
      echo @GRID_LIBS@
 | 
					      echo @GRID_LIBS@ -lGrid
 | 
				
			||||||
    ;;
 | 
					    ;;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    --summary)
 | 
					    --summary)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
 | 
					CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
 | 
				
			||||||
../../configure --enable-comms=mpi-auto \
 | 
					../../configure --enable-comms=mpi-auto \
 | 
				
			||||||
--with-lime=$CLIME \
 | 
					--with-lime=$CLIME \
 | 
				
			||||||
--enable-unified=yes \
 | 
					--enable-unified=no \
 | 
				
			||||||
--enable-shm=nvlink \
 | 
					--enable-shm=nvlink \
 | 
				
			||||||
--enable-tracing=timer \
 | 
					--enable-tracing=timer \
 | 
				
			||||||
--enable-accelerator=hip \
 | 
					--enable-accelerator=hip \
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,8 +5,8 @@ module load emacs
 | 
				
			|||||||
#module load gperftools
 | 
					#module load gperftools
 | 
				
			||||||
module load PrgEnv-gnu
 | 
					module load PrgEnv-gnu
 | 
				
			||||||
module load rocm/5.3.0
 | 
					module load rocm/5.3.0
 | 
				
			||||||
module load cray-mpich/8.1.16
 | 
					#module load cray-mpich/8.1.16
 | 
				
			||||||
#module load cray-mpich/8.1.17
 | 
					module load cray-mpich/8.1.17
 | 
				
			||||||
module load gmp
 | 
					module load gmp
 | 
				
			||||||
module load cray-fftw
 | 
					module load cray-fftw
 | 
				
			||||||
module load craype-accel-amd-gfx90a
 | 
					module load craype-accel-amd-gfx90a
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										44
									
								
								systems/Lumi/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										44
									
								
								systems/Lumi/benchmarks/bench2.slurm
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					#!/bin/bash -l
 | 
				
			||||||
 | 
					#SBATCH --job-name=bench_lehner
 | 
				
			||||||
 | 
					#SBATCH --partition=small-g
 | 
				
			||||||
 | 
					#SBATCH --nodes=2
 | 
				
			||||||
 | 
					#SBATCH --ntasks-per-node=8
 | 
				
			||||||
 | 
					#SBATCH --cpus-per-task=7
 | 
				
			||||||
 | 
					#SBATCH --gpus-per-node=8
 | 
				
			||||||
 | 
					#SBATCH --time=00:10:00
 | 
				
			||||||
 | 
					#SBATCH --account=project_465000546
 | 
				
			||||||
 | 
					#SBATCH --gpu-bind=none
 | 
				
			||||||
 | 
					#SBATCH --exclusive
 | 
				
			||||||
 | 
					#SBATCH --mem=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CPU_BIND="map_cpu:48,56,32,40,16,24,1,8"
 | 
				
			||||||
 | 
					echo $CPU_BIND
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cat << EOF > select_gpu
 | 
				
			||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					export GPU_MAP=(0 1 2 3 4 5 6 7)
 | 
				
			||||||
 | 
					export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
 | 
				
			||||||
 | 
					export HIP_VISIBLE_DEVICES=\$GPU
 | 
				
			||||||
 | 
					unset ROCR_VISIBLE_DEVICES
 | 
				
			||||||
 | 
					echo RANK \$SLURM_LOCALID using GPU \$GPU    
 | 
				
			||||||
 | 
					exec \$*
 | 
				
			||||||
 | 
					EOF
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					chmod +x ./select_gpu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					root=/scratch/project_465000546/boylepet/Grid/systems/Lumi
 | 
				
			||||||
 | 
					source ${root}/sourceme.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=7
 | 
				
			||||||
 | 
					export MPICH_GPU_SUPPORT_ENABLED=1
 | 
				
			||||||
 | 
					export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for vol in 16.16.16.64 32.32.32.64  32.32.32.128
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.ov.$vol
 | 
				
			||||||
 | 
					#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol  > log.shm1.ov.$vol
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol  > log.shm0.seq.$vol
 | 
				
			||||||
 | 
					#srun --cpu-bind=${CPU_BIND} ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										30
									
								
								systems/Lumi/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								systems/Lumi/config-command
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
				
			|||||||
 | 
					spack load c-lime
 | 
				
			||||||
 | 
					spack load gmp
 | 
				
			||||||
 | 
					spack load mpfr
 | 
				
			||||||
 | 
					CLIME=`spack find --paths c-lime | grep c-lime| cut -c 15-`
 | 
				
			||||||
 | 
					GMP=`spack find --paths gmp | grep gmp | cut -c 12-`
 | 
				
			||||||
 | 
					MPFR=`spack find --paths mpfr | grep mpfr | cut -c 13-`
 | 
				
			||||||
 | 
					echo clime X$CLIME
 | 
				
			||||||
 | 
					echo gmp X$GMP
 | 
				
			||||||
 | 
					echo mpfr X$MPFR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					../../configure \
 | 
				
			||||||
 | 
					--enable-comms=mpi-auto \
 | 
				
			||||||
 | 
					--with-lime=$CLIME \
 | 
				
			||||||
 | 
					--enable-unified=no \
 | 
				
			||||||
 | 
					--enable-shm=nvlink \
 | 
				
			||||||
 | 
					--enable-accelerator=hip \
 | 
				
			||||||
 | 
					--enable-gen-simd-width=64 \
 | 
				
			||||||
 | 
					--enable-simd=GPU \
 | 
				
			||||||
 | 
					--enable-accelerator-cshift \
 | 
				
			||||||
 | 
					--with-gmp=$GMP \
 | 
				
			||||||
 | 
					--with-mpfr=$MPFR \
 | 
				
			||||||
 | 
					--with-fftw=$FFTW_DIR/.. \
 | 
				
			||||||
 | 
					--disable-fermion-reps \
 | 
				
			||||||
 | 
					--disable-gparity \
 | 
				
			||||||
 | 
					CXX=hipcc MPICXX=mpicxx \
 | 
				
			||||||
 | 
					  CXXFLAGS="-fPIC --offload-arch=gfx90a -I/opt/rocm/include/ -std=c++14 -I/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/include" \
 | 
				
			||||||
 | 
					  LDFLAGS="-L/opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1/lib -lmpi -L/opt/cray/pe/mpich/8.1.23/gtl/lib -lmpi_gtl_hsa -lamdhip64 -fopenmp" 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										5
									
								
								systems/Lumi/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								systems/Lumi/sourceme.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,5 @@
 | 
				
			|||||||
 | 
					source ~/spack/share/spack/setup-env.sh
 | 
				
			||||||
 | 
					module load CrayEnv LUMI/22.12 partition/G  cray-fftw/3.3.10.1 rocm
 | 
				
			||||||
 | 
					spack load c-lime
 | 
				
			||||||
 | 
					spack load gmp
 | 
				
			||||||
 | 
					spack load mpfr
 | 
				
			||||||
@@ -3,8 +3,14 @@ export https_proxy=http://proxy-chain.intel.com:911
 | 
				
			|||||||
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
 | 
					export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
module load intel-release
 | 
					module load intel-release
 | 
				
			||||||
source /opt/intel/oneapi/PVC_setup.sh
 | 
					module load intel-comp-rt/embargo-ci-neo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#source /opt/intel/oneapi/PVC_setup.sh
 | 
				
			||||||
#source /opt/intel/oneapi/ATS_setup.sh
 | 
					#source /opt/intel/oneapi/ATS_setup.sh
 | 
				
			||||||
 | 
					#module load intel-nightly/20230331
 | 
				
			||||||
 | 
					#module load intel-comp-rt/ci-neo-master/026093
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#module load intel/mpich
 | 
				
			||||||
module load intel/mpich/pvc45.3
 | 
					module load intel/mpich/pvc45.3
 | 
				
			||||||
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH
 | 
					export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										46
									
								
								systems/Sunspot/benchmarks/bench.pbs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								systems/Sunspot/benchmarks/bench.pbs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#PBS -l select=1:system=sunspot,place=scatter
 | 
				
			||||||
 | 
					#PBS -A LatticeQCD_aesp_CNDA
 | 
				
			||||||
 | 
					#PBS -l walltime=01:00:00
 | 
				
			||||||
 | 
					#PBS -N dwf
 | 
				
			||||||
 | 
					#PBS -k doe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					HDIR=/home/paboyle/
 | 
				
			||||||
 | 
					module use /soft/testing/modulefiles/
 | 
				
			||||||
 | 
					module load intel-UMD23.05.25593.11/23.05.25593.11
 | 
				
			||||||
 | 
					module load tools/pti-gpu  
 | 
				
			||||||
 | 
					export LD_LIBRARY_PATH=$HDIR/tools/lib64:$LD_LIBRARY_PATH
 | 
				
			||||||
 | 
					export PATH=$HDIR/tools/bin:$PATH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export TZ='/usr/share/zoneinfo/US/Central'
 | 
				
			||||||
 | 
					export OMP_PROC_BIND=spread
 | 
				
			||||||
 | 
					export OMP_NUM_THREADS=3
 | 
				
			||||||
 | 
					unset OMP_PLACES
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cd $PBS_O_WORKDIR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					qsub jobscript.pbs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo Jobid: $PBS_JOBID
 | 
				
			||||||
 | 
					echo Running on host `hostname`
 | 
				
			||||||
 | 
					echo Running on nodes `cat $PBS_NODEFILE`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo NODES
 | 
				
			||||||
 | 
					cat $PBS_NODEFILE
 | 
				
			||||||
 | 
					NNODES=`wc -l < $PBS_NODEFILE`
 | 
				
			||||||
 | 
					NRANKS=12         # Number of MPI ranks per node
 | 
				
			||||||
 | 
					NDEPTH=4          # Number of hardware threads per rank, spacing between MPI ranks on a node
 | 
				
			||||||
 | 
					NTHREADS=$OMP_NUM_THREADS # Number of OMP threads per rank, given to OMP_NUM_THREADS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NTOTRANKS=$(( NNODES * NRANKS ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "NUM_NODES=${NNODES}  TOTAL_RANKS=${NTOTRANKS}  RANKS_PER_NODE=${NRANKS}  THREADS_PER_RANK=${OMP_NUM_THREADS}"
 | 
				
			||||||
 | 
					echo "OMP_PROC_BIND=$OMP_PROC_BIND OMP_PLACES=$OMP_PLACES"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					CMD="mpiexec -np ${NTOTRANKS} -ppn ${NRANKS} -d ${NDEPTH} --cpu-bind=depth -envall \
 | 
				
			||||||
 | 
						     ./gpu_tile_compact.sh \
 | 
				
			||||||
 | 
						./Benchmark_dwf_fp32 --mpi 1.1.2.6 --grid 16.32.64.192 --comms-overlap \
 | 
				
			||||||
 | 
						--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads 32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										52
									
								
								systems/Sunspot/benchmarks/gpu_tile_compact.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										52
									
								
								systems/Sunspot/benchmarks/gpu_tile_compact.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,52 @@
 | 
				
			|||||||
 | 
					#!/bin/bash
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					display_help() {
 | 
				
			||||||
 | 
					  echo " Will map gpu tile to rank in compact and then round-robin fashion"
 | 
				
			||||||
 | 
					  echo " Usage (only work for one node of ATS/PVC):"
 | 
				
			||||||
 | 
					  echo "   mpiexec --np N gpu_tile_compact.sh ./a.out"
 | 
				
			||||||
 | 
					  echo
 | 
				
			||||||
 | 
					  echo " Example 3 GPU of 2 Tiles with 7 Ranks:"
 | 
				
			||||||
 | 
					  echo "   0 Rank 0.0"
 | 
				
			||||||
 | 
					  echo "   1 Rank 0.1"
 | 
				
			||||||
 | 
					  echo "   2 Rank 1.0"
 | 
				
			||||||
 | 
					  echo "   3 Rank 1.1"
 | 
				
			||||||
 | 
					  echo "   4 Rank 2.0"
 | 
				
			||||||
 | 
					  echo "   5 Rank 2.1"
 | 
				
			||||||
 | 
					  echo "   6 Rank 0.0"
 | 
				
			||||||
 | 
					  echo
 | 
				
			||||||
 | 
					  echo " Hacked together by apl@anl.gov, please contact if bug found"
 | 
				
			||||||
 | 
					  exit 1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#This give the exact GPU count i915 knows about and I use udev to only enumerate the devices with physical presence.
 | 
				
			||||||
 | 
					#works? num_gpu=$(/usr/bin/udevadm info /sys/module/i915/drivers/pci\:i915/* |& grep -v Unknown | grep -c "P: /devices")
 | 
				
			||||||
 | 
					num_gpu=6
 | 
				
			||||||
 | 
					num_tile=2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [ "$#" -eq 0 ] || [ "$1" == "--help" ] || [ "$1" == "-h" ] || [ "$num_gpu" = 0 ]; then
 | 
				
			||||||
 | 
					  display_help
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					gpu_id=$(( (PALS_LOCAL_RANKID / num_tile ) % num_gpu ))
 | 
				
			||||||
 | 
					tile_id=$((PALS_LOCAL_RANKID % num_tile))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unset EnableWalkerPartition
 | 
				
			||||||
 | 
					export EnableImplicitScaling=0
 | 
				
			||||||
 | 
					export ZE_ENABLE_PCI_ID_DEVICE_ORDER=1
 | 
				
			||||||
 | 
					export ZE_AFFINITY_MASK=$gpu_id.$tile_id
 | 
				
			||||||
 | 
					export ONEAPI_DEVICE_FILTER=gpu,level_zero
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
 | 
				
			||||||
 | 
					export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
 | 
				
			||||||
 | 
					#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [ $PALS_LOCAL_RANKID = 0 ]
 | 
				
			||||||
 | 
					then
 | 
				
			||||||
 | 
					    onetrace --chrome-device-timeline "$@"
 | 
				
			||||||
 | 
					#    "$@"
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					"$@"
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
							
								
								
									
										16
									
								
								systems/Sunspot/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								systems/Sunspot/config-command
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,16 @@
 | 
				
			|||||||
 | 
					TOOLS=$HOME/tools
 | 
				
			||||||
 | 
					../../configure \
 | 
				
			||||||
 | 
						--enable-simd=GPU \
 | 
				
			||||||
 | 
						--enable-gen-simd-width=64 \
 | 
				
			||||||
 | 
						--enable-comms=mpi-auto \
 | 
				
			||||||
 | 
						--enable-accelerator-cshift \
 | 
				
			||||||
 | 
						--disable-gparity \
 | 
				
			||||||
 | 
						--disable-fermion-reps \
 | 
				
			||||||
 | 
						--enable-shm=nvlink \
 | 
				
			||||||
 | 
						--enable-accelerator=sycl \
 | 
				
			||||||
 | 
						--enable-unified=no \
 | 
				
			||||||
 | 
						MPICXX=mpicxx \
 | 
				
			||||||
 | 
						CXX=icpx \
 | 
				
			||||||
 | 
						LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -lapmidg -L$TOOLS/lib64/" \
 | 
				
			||||||
 | 
						CXXFLAGS="-fiopenmp -fsycl-unnamed-lambda -fsycl -I$INSTALL/include -Wno-tautological-compare -I$HOME/ -I$TOOLS/include"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1,2 +1,4 @@
 | 
				
			|||||||
CXX=mpicxx-openmpi-mp CXXFLAGS=-I/opt/local/include/ LDFLAGS=-L/opt/local/lib/ ../../configure --enable-simd=GEN --enable-debug --enable-comms=mpi --enable-unified=yes
 | 
					BREW=/opt/local/
 | 
				
			||||||
 | 
					MPICXX=mpicxx CXX=c++-12 ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -115,6 +115,7 @@ int main(int argc, char ** argv)
 | 
				
			|||||||
	  if (SE->_permute & 0x2 ) { permute(check[i],tmp,1); tmp=check[i];}
 | 
						  if (SE->_permute & 0x2 ) { permute(check[i],tmp,1); tmp=check[i];}
 | 
				
			||||||
	  if (SE->_permute & 0x4 ) { permute(check[i],tmp,2); tmp=check[i];}
 | 
						  if (SE->_permute & 0x4 ) { permute(check[i],tmp,2); tmp=check[i];}
 | 
				
			||||||
	  if (SE->_permute & 0x8 ) { permute(check[i],tmp,3); tmp=check[i];}
 | 
						  if (SE->_permute & 0x8 ) { permute(check[i],tmp,3); tmp=check[i];}
 | 
				
			||||||
 | 
						  //	  std::cout<<GridLogMessage<<"stencil["<<i<<"] "<< check[i]<< " perm "<<(uint32_t)SE->_permute <<std::endl;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	Real nrmC = norm2(Check);
 | 
						Real nrmC = norm2(Check);
 | 
				
			||||||
@@ -138,18 +139,17 @@ int main(int argc, char ** argv)
 | 
				
			|||||||
	  ddiff = check -bar;
 | 
						  ddiff = check -bar;
 | 
				
			||||||
	  diff =norm2(ddiff);
 | 
						  diff =norm2(ddiff);
 | 
				
			||||||
	  if ( diff > 0){
 | 
						  if ( diff > 0){
 | 
				
			||||||
	    std::cout <<"Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
 | 
						    std::cout <<"Diff at Coor (" << coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]
 | 
				
			||||||
		      <<") " <<check<<" vs "<<bar<<std::endl;
 | 
							      <<") stencil " <<check<<" vs cshift "<<bar<<std::endl;
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
	}}}}
 | 
						}}}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (nrm > 1.0e-4) {
 | 
						if (nrm > 1.0e-4) {
 | 
				
			||||||
	  autoView( check , Check, CpuRead);
 | 
						  autoView( check , Check, CpuRead);
 | 
				
			||||||
	  autoView(   bar ,   Bar, CpuRead);
 | 
						  autoView(   bar ,   Bar, CpuRead);
 | 
				
			||||||
	  for(int i=0;i<check.size();i++){
 | 
						  for(int i=0;i<check.size();i++){
 | 
				
			||||||
	    std::cout << i<<" Check "<<check[i]<< "\n"<<i<<" Bar "<<bar[i]<<std::endl;
 | 
						    std::cout << i<<" ERROR Check \n"<<check[i]<< "\n"<<i<<" Bar \n"<<bar[i]<<std::endl;
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (nrm > 1.0e-4) exit(-1);
 | 
						if (nrm > 1.0e-4) exit(-1);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,7 +53,7 @@ static int readInt(int* argc, char*** argv, std::string&& option, int defaultVal
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
static float readFloat(int* argc, char*** argv, std::string&& option, float defaultValue) {
 | 
					static float readFloat(int* argc, char*** argv, std::string&& option, float defaultValue) {
 | 
				
			||||||
  std::string arg;
 | 
					  std::string arg;
 | 
				
			||||||
  float       ret = defaultValue;
 | 
					  double      ret = defaultValue;
 | 
				
			||||||
  if(checkPresent(argc, argv, option)) {
 | 
					  if(checkPresent(argc, argv, option)) {
 | 
				
			||||||
    arg = getContent(argc, argv, option);
 | 
					    arg = getContent(argc, argv, option);
 | 
				
			||||||
    GridCmdOptionFloat(arg, ret);
 | 
					    GridCmdOptionFloat(arg, ret);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,244 +0,0 @@
 | 
				
			|||||||
    /*************************************************************************************
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Gamma::Algebra Gmu [] = {
 | 
					 | 
				
			||||||
  Gamma::Algebra::GammaX,
 | 
					 | 
				
			||||||
  Gamma::Algebra::GammaY,
 | 
					 | 
				
			||||||
  Gamma::Algebra::GammaZ,
 | 
					 | 
				
			||||||
  Gamma::Algebra::GammaT,
 | 
					 | 
				
			||||||
  Gamma::Algebra::Gamma5
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main (int argc, char ** argv)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  Grid_init(&argc,&argv);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int threads = GridThread::GetThreads();
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Coordinate latt_size   = GridDefaultLatt();
 | 
					 | 
				
			||||||
  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
					 | 
				
			||||||
  Coordinate mpi_layout  = GridDefaultMpi();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int vol = 1;
 | 
					 | 
				
			||||||
  for(int d=0;d<latt_size.size();d++){
 | 
					 | 
				
			||||||
    vol = vol * latt_size[d];
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
 | 
					 | 
				
			||||||
  GridRedBlackCartesian RBGRID(&GRID);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  LatticeComplexD    coor(&GRID);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ComplexD ci(0.0,1.0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  std::vector<int> seeds({1,2,3,4});
 | 
					 | 
				
			||||||
  GridSerialRNG          sRNG;  sRNG.SeedFixedIntegers(seeds); // naughty seeding
 | 
					 | 
				
			||||||
  GridParallelRNG          pRNG(&GRID);
 | 
					 | 
				
			||||||
  pRNG.SeedFixedIntegers(seeds);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  LatticeGaugeFieldD Umu(&GRID);
 | 
					 | 
				
			||||||
  SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Wilson test
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
 | 
					 | 
				
			||||||
    LatticeFermionD    src_p(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    tmp(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    ref(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    result(&GRID);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    RealD mass=0.1;
 | 
					 | 
				
			||||||
    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    Dw.M(src,ref);
 | 
					 | 
				
			||||||
    std::cout << "Norm src "<<norm2(src)<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Norm Dw x src "<<norm2(ref)<<std::endl;
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      FFT theFFT(&GRID);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      ////////////////
 | 
					 | 
				
			||||||
      // operator in Fourier space
 | 
					 | 
				
			||||||
      ////////////////
 | 
					 | 
				
			||||||
      tmp =ref;
 | 
					 | 
				
			||||||
      theFFT.FFT_all_dim(result,tmp,FFT::forward);
 | 
					 | 
				
			||||||
      std::cout<<"FFT[ Dw x src ]  "<< norm2(result)<<std::endl;    
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      tmp = src;
 | 
					 | 
				
			||||||
      theFFT.FFT_all_dim(src_p,tmp,FFT::forward);
 | 
					 | 
				
			||||||
      std::cout<<"FFT[ src      ]  "<< norm2(src_p)<<std::endl;
 | 
					 | 
				
			||||||
      
 | 
					 | 
				
			||||||
      /////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
      // work out the predicted FT from Fourier
 | 
					 | 
				
			||||||
      /////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
      auto FGrid = &GRID;
 | 
					 | 
				
			||||||
      LatticeFermionD    Kinetic(FGrid); Kinetic = Zero();
 | 
					 | 
				
			||||||
      LatticeComplexD    kmu(FGrid); 
 | 
					 | 
				
			||||||
      LatticeInteger     scoor(FGrid); 
 | 
					 | 
				
			||||||
      LatticeComplexD    sk (FGrid); sk = Zero();
 | 
					 | 
				
			||||||
      LatticeComplexD    sk2(FGrid); sk2= Zero();
 | 
					 | 
				
			||||||
      LatticeComplexD    W(FGrid); W= Zero();
 | 
					 | 
				
			||||||
      LatticeComplexD    one(FGrid); one =ComplexD(1.0,0.0);
 | 
					 | 
				
			||||||
      ComplexD ci(0.0,1.0);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      for(int mu=0;mu<Nd;mu++) {
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	LatticeCoordinate(kmu,mu);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kmu = TwoPiL * kmu;
 | 
					 | 
				
			||||||
      
 | 
					 | 
				
			||||||
	sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
 | 
					 | 
				
			||||||
	sk  = sk  +     sin(kmu)    *sin(kmu); 
 | 
					 | 
				
			||||||
      
 | 
					 | 
				
			||||||
	// -1/2 Dw ->  1/2 gmu (eip - emip) = i sinp gmu
 | 
					 | 
				
			||||||
	Kinetic = Kinetic + sin(kmu)*ci*(Gamma(Gmu[mu])*src_p);
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      W = mass + sk2; 
 | 
					 | 
				
			||||||
      Kinetic = Kinetic + W * src_p;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      std::cout<<"Momentum space src         "<< norm2(src_p)<<std::endl;
 | 
					 | 
				
			||||||
      std::cout<<"Momentum space Dw x src    "<< norm2(Kinetic)<<std::endl;
 | 
					 | 
				
			||||||
      std::cout<<"FT[Coordinate space Dw]    "<< norm2(result)<<std::endl;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
      result = result - Kinetic;
 | 
					 | 
				
			||||||
      std::cout<<"diff "<< norm2(result)<<std::endl;
 | 
					 | 
				
			||||||
      
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    std::cout << " =======================================" <<std::endl;
 | 
					 | 
				
			||||||
    std::cout << " Checking FourierFreePropagator x Dw = 1" <<std::endl;
 | 
					 | 
				
			||||||
    std::cout << " =======================================" <<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw src = " <<norm2(src)<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw tmp = " <<norm2(tmp)<<std::endl;
 | 
					 | 
				
			||||||
    Dw.M(src,tmp);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Dw.FreePropagator(tmp,ref,mass);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    std::cout << "Dw ref = " <<norm2(ref)<<std::endl;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    ref = ref - src;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    std::cout << "Dw ref-src = " <<norm2(ref)<<std::endl;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  // Wilson prop
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout<<"****************************************"<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Wilson Mom space 4d propagator \n";
 | 
					 | 
				
			||||||
    std::cout<<"****************************************"<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
 | 
					 | 
				
			||||||
    LatticeFermionD    tmp(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    ref(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    diff(&GRID);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    src=Zero();
 | 
					 | 
				
			||||||
    Coordinate point(4,0); // 0,0,0,0
 | 
					 | 
				
			||||||
    SpinColourVectorD ferm;
 | 
					 | 
				
			||||||
    ferm=Zero();
 | 
					 | 
				
			||||||
    ferm()(0)(0) = ComplexD(1.0);
 | 
					 | 
				
			||||||
    pokeSite(ferm,src,point);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    RealD mass=0.1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Momentum space prop
 | 
					 | 
				
			||||||
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
					 | 
				
			||||||
    Dw.FreePropagator(src,ref,mass) ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma G5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    LatticeFermionD    result(&GRID); 
 | 
					 | 
				
			||||||
    const int sdir=0;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    // Conjugate gradient on normal equations system
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
 | 
					 | 
				
			||||||
    Dw.Mdag(src,tmp);
 | 
					 | 
				
			||||||
    src=tmp;
 | 
					 | 
				
			||||||
    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
 | 
					 | 
				
			||||||
    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
 | 
					 | 
				
			||||||
    CG(HermOp,src,result);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    std::cout << " Taking difference" <<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw result "<<norm2(result)<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    diff = ref - result;
 | 
					 | 
				
			||||||
    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  //Gauge invariance test
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
  {
 | 
					 | 
				
			||||||
    std::cout<<"****************************************"<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Gauge invariance test \n";
 | 
					 | 
				
			||||||
    std::cout<<"****************************************"<<std::endl;
 | 
					 | 
				
			||||||
    LatticeGaugeField     U_GT(&GRID); // Gauge transformed field
 | 
					 | 
				
			||||||
    LatticeColourMatrix   g(&GRID);    // local Gauge xform matrix
 | 
					 | 
				
			||||||
    U_GT = Umu;
 | 
					 | 
				
			||||||
    // Make a random xform to teh gauge field
 | 
					 | 
				
			||||||
    SU<Nc>::RandomGaugeTransform(pRNG,U_GT,g); // Unit gauge
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    LatticeFermionD    src(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    tmp(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    ref(&GRID);
 | 
					 | 
				
			||||||
    LatticeFermionD    diff(&GRID);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // could loop over colors
 | 
					 | 
				
			||||||
    src=Zero();
 | 
					 | 
				
			||||||
    Coordinate point(4,0); // 0,0,0,0
 | 
					 | 
				
			||||||
    SpinColourVectorD ferm;
 | 
					 | 
				
			||||||
    ferm=Zero();
 | 
					 | 
				
			||||||
    ferm()(0)(0) = ComplexD(1.0);
 | 
					 | 
				
			||||||
    pokeSite(ferm,src,point);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    RealD mass=0.1;
 | 
					 | 
				
			||||||
    WilsonFermionD Dw(U_GT,GRID,RBGRID,mass);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Momentum space prop
 | 
					 | 
				
			||||||
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
					 | 
				
			||||||
    Dw.FreePropagator(src,ref,mass) ;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Gamma G5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    LatticeFermionD    result(&GRID); 
 | 
					 | 
				
			||||||
    const int sdir=0;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    // Conjugate gradient on normal equations system
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
 | 
					 | 
				
			||||||
    Dw.Mdag(src,tmp);
 | 
					 | 
				
			||||||
    src=tmp;
 | 
					 | 
				
			||||||
    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
 | 
					 | 
				
			||||||
    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
 | 
					 | 
				
			||||||
    CG(HermOp,src,result);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
    std::cout << " Taking difference" <<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw result "<<norm2(result)<<std::endl;
 | 
					 | 
				
			||||||
    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    diff = ref - result;
 | 
					 | 
				
			||||||
    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  Grid_finalize();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
							
								
								
									
										307
									
								
								tests/core/Test_fft_pf.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										307
									
								
								tests/core/Test_fft_pf.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,307 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    grid` physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_cshift.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Coordinate latt_size   = GridDefaultLatt();
 | 
				
			||||||
 | 
					  Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
 | 
				
			||||||
 | 
					  Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int vol = 1;
 | 
				
			||||||
 | 
					  for(int d=0;d<latt_size.size();d++){
 | 
				
			||||||
 | 
					    vol = vol * latt_size[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					  GridRedBlackCartesian RBGRID(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ComplexD ci(0.0,1.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> seeds({1,2,3,4});
 | 
				
			||||||
 | 
					  GridSerialRNG          sRNG;  sRNG.SeedFixedIntegers(seeds); // naughty seeding
 | 
				
			||||||
 | 
					  GridParallelRNG          pRNG(&GRID);
 | 
				
			||||||
 | 
					  pRNG.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeGaugeFieldD Umu(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // PF prop
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  LatticeFermionD    src(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  gaussian(pRNG,src);
 | 
				
			||||||
 | 
					#if 1
 | 
				
			||||||
 | 
					    Coordinate point(4,0);
 | 
				
			||||||
 | 
					    src=Zero();
 | 
				
			||||||
 | 
					    SpinColourVectorD ferm; gaussian(sRNG,ferm);
 | 
				
			||||||
 | 
					    pokeSite(ferm,src,point);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    ref(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    diff(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int Ls=48+1;
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD M5  =0.8;
 | 
				
			||||||
 | 
					    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Momentum space prop
 | 
				
			||||||
 | 
					    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
				
			||||||
 | 
					    bool fiveD = false; //calculate 4d free propagator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << " Free propagator " <<std::endl;
 | 
				
			||||||
 | 
					    Dov.FreePropagator(src,ref,mass) ;
 | 
				
			||||||
 | 
					    std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Gamma G5(Gamma::Algebra::Gamma5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeFermionD    src5(FGrid); src5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp5(FGrid); 
 | 
				
			||||||
 | 
					    LatticeFermionD    result5(FGrid); result5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    result4(&GRID); 
 | 
				
			||||||
 | 
					    const int sdir=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Import
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
 | 
				
			||||||
 | 
					    Dov.ImportPhysicalFermionSource  (src,src5);
 | 
				
			||||||
 | 
					    std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Conjugate gradient on normal equations system
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
 | 
				
			||||||
 | 
					    Dov.Mdag(src5,tmp5);
 | 
				
			||||||
 | 
					    src5=tmp5;
 | 
				
			||||||
 | 
					    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
 | 
				
			||||||
 | 
					    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
 | 
				
			||||||
 | 
					    CG(HermOp,src5,result5);
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Domain wall physical field propagator
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    Dov.ExportPhysicalFermionSolution(result5,result4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // From DWF4d.pdf :
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // Dov_pf = 2/(1-m) D_cayley_ovlap  [ Page 43 ]
 | 
				
			||||||
 | 
					    // Dinv_cayley_ovlap = 2/(1-m) Dinv_pf 
 | 
				
			||||||
 | 
					    // Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) =>  2/(1-m)^2 Dinv_pf - 1/(1-m) * src   [ Eq.2.67 ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD scale = 2.0/(1.0-mass)/(1.0-mass);
 | 
				
			||||||
 | 
					    result4 = result4 * scale;
 | 
				
			||||||
 | 
					    result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
 | 
				
			||||||
 | 
					    DumpSliceNorm("Src",src);
 | 
				
			||||||
 | 
					    DumpSliceNorm("Grid",result4);
 | 
				
			||||||
 | 
					    DumpSliceNorm("Fourier",ref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    diff = result4- ref;
 | 
				
			||||||
 | 
					    DumpSliceNorm("diff ",diff);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Dwf prop
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    ref(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    diff(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int Ls=48;
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD M5  =0.8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Momentum space prop
 | 
				
			||||||
 | 
					    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
				
			||||||
 | 
					    Dov.FreePropagator(src,ref,mass) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Gamma G5(Gamma::Algebra::Gamma5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeFermionD    src5(FGrid); src5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp5(FGrid); 
 | 
				
			||||||
 | 
					    LatticeFermionD    result5(FGrid); result5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    result4(&GRID); 
 | 
				
			||||||
 | 
					    const int sdir=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Domain wall physical field source; need D_minus
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
						chi_5[0]   = chiralProjectPlus(chi);
 | 
				
			||||||
 | 
						chi_5[Ls-1]= chiralProjectMinus(chi);
 | 
				
			||||||
 | 
					    */      
 | 
				
			||||||
 | 
					    tmp =   (src + G5*src)*0.5;      InsertSlice(tmp,src5,   0,sdir);
 | 
				
			||||||
 | 
					    tmp =   (src - G5*src)*0.5;      InsertSlice(tmp,src5,Ls-1,sdir);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Conjugate gradient on normal equations system
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
 | 
				
			||||||
 | 
					    Dov.Dminus(src5,tmp5);
 | 
				
			||||||
 | 
					    src5=tmp5;
 | 
				
			||||||
 | 
					    Dov.Mdag(src5,tmp5);
 | 
				
			||||||
 | 
					    src5=tmp5;
 | 
				
			||||||
 | 
					    MdagMLinearOperator<OverlapWilsonCayleyTanhFermionD,LatticeFermionD> HermOp(Dov);
 | 
				
			||||||
 | 
					    ConjugateGradient<LatticeFermionD> CG(1.0e-16,10000);
 | 
				
			||||||
 | 
					    CG(HermOp,src5,result5);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Domain wall physical field propagator
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					      psi  = chiralProjectMinus(psi_5[0]);
 | 
				
			||||||
 | 
					      psi += chiralProjectPlus(psi_5[Ls-1]);
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					    ExtractSlice(tmp,result5,0   ,sdir);   result4 =         (tmp-G5*tmp)*0.5;
 | 
				
			||||||
 | 
					    ExtractSlice(tmp,result5,Ls-1,sdir);   result4 = result4+(tmp+G5*tmp)*0.5;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    std::cout << " Taking difference" <<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
 | 
				
			||||||
 | 
					    DumpSliceNorm("Grid",result4);
 | 
				
			||||||
 | 
					    DumpSliceNorm("Fourier",ref);
 | 
				
			||||||
 | 
					    diff = ref - result4;
 | 
				
			||||||
 | 
					    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    DumpSliceNorm("diff",diff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
 | 
				
			||||||
 | 
					    std::cout<<"****************************************"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    ref(&GRID);
 | 
				
			||||||
 | 
					    LatticeFermionD    diff(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int Ls=48+1;
 | 
				
			||||||
 | 
					    GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					    GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD mass=0.1;
 | 
				
			||||||
 | 
					    RealD M5  =0.8;
 | 
				
			||||||
 | 
					    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Momentum space prop
 | 
				
			||||||
 | 
					    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
				
			||||||
 | 
					    bool fiveD = false; //calculate 4d free propagator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << " Free propagator " <<std::endl;
 | 
				
			||||||
 | 
					    Dov.FreePropagator(src,ref,mass) ;
 | 
				
			||||||
 | 
					    std::cout << " Free propagator norm "<< norm2(ref) <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Gamma G5(Gamma::Algebra::Gamma5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    LatticeFermionD    src5(FGrid); src5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    tmp5(FGrid); 
 | 
				
			||||||
 | 
					    LatticeFermionD    result5(FGrid); result5=Zero();
 | 
				
			||||||
 | 
					    LatticeFermionD    result4(&GRID); 
 | 
				
			||||||
 | 
					    const int sdir=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Import
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    std::cout << " Free propagator Import "<< norm2(src) <<std::endl;
 | 
				
			||||||
 | 
					    Dov.ImportPhysicalFermionSource  (src,src5);
 | 
				
			||||||
 | 
					    std::cout << " Free propagator Imported "<< norm2(src5) <<std::endl;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Conjugate gradient on normal equations system
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
 | 
				
			||||||
 | 
					    Dov.Mdag(src5,tmp5);
 | 
				
			||||||
 | 
					    src5=tmp5;
 | 
				
			||||||
 | 
					    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
 | 
				
			||||||
 | 
					    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
 | 
				
			||||||
 | 
					    CG(HermOp,src5,result5);
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Domain wall physical field propagator
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    Dov.ExportPhysicalFermionSolution(result5,result4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // From DWF4d.pdf :
 | 
				
			||||||
 | 
					    //
 | 
				
			||||||
 | 
					    // Dov_pf = 2/(1-m) D_cayley_ovlap  [ Page 43 ]
 | 
				
			||||||
 | 
					    // Dinv_cayley_ovlap = 2/(1-m) Dinv_pf 
 | 
				
			||||||
 | 
					    // Dinv_cayley_surface =1/(1-m) ( Dinv_cayley_ovlap - 1 ) =>  2/(1-m)^2 Dinv_pf - 1/(1-m) * src   [ Eq.2.67 ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RealD scale = 2.0/(1.0-mass)/(1.0-mass);
 | 
				
			||||||
 | 
					    result4 = result4 * scale;
 | 
				
			||||||
 | 
					    result4 = result4 - src*(1.0/(1.0-mass)); // Subtract contact term
 | 
				
			||||||
 | 
					    DumpSliceNorm("Src",src);
 | 
				
			||||||
 | 
					    DumpSliceNorm("Grid",result4);
 | 
				
			||||||
 | 
					    DumpSliceNorm("Fourier",ref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << "Dov result4 "<<norm2(result4)<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << "Dov ref     "<<norm2(ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    diff = result4- ref;
 | 
				
			||||||
 | 
					    DumpSliceNorm("diff ",diff);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -63,7 +63,9 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
  std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
 | 
					  std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // guard as this code fails to compile for Nc != 3
 | 
					  // guard as this code fails to compile for Nc != 3
 | 
				
			||||||
#if (Nc == 3)
 | 
					#if 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << " Printing  Adjoint Generators"<< std::endl;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  SU2Adjoint::printGenerators();
 | 
					  SU2Adjoint::printGenerators();
 | 
				
			||||||
  SU2::testGenerators();
 | 
					  SU2::testGenerators();
 | 
				
			||||||
@@ -149,9 +151,32 @@ int main(int argc, char** argv) {
 | 
				
			|||||||
    pokeLorentz(UrVr,Urmu*Vrmu, mu);
 | 
					    pokeLorentz(UrVr,Urmu*Vrmu, mu);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef typename SU_Adjoint<Nc>::AMatrix AdjointMatrix;
 | 
				
			||||||
  typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr;
 | 
					  typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr;
 | 
				
			||||||
  std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl;
 | 
					  std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************** " << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " MAP BETWEEN FUNDAMENTAL AND ADJOINT CHECK " << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************** " << std::endl;
 | 
				
			||||||
 | 
					  for(int a=0;a<Nc*Nc-1;a++){
 | 
				
			||||||
 | 
					  for(int b=0;b<Nc*Nc-1;b++){
 | 
				
			||||||
 | 
					  for(int c=0;c<Nc*Nc-1;c++){
 | 
				
			||||||
 | 
					    ColourMatrix Ta;
 | 
				
			||||||
 | 
					    ColourMatrix Tb;
 | 
				
			||||||
 | 
					    ColourMatrix Tc;
 | 
				
			||||||
 | 
					    SU3::generator(a, Ta);
 | 
				
			||||||
 | 
					    SU3::generator(b, Tb);
 | 
				
			||||||
 | 
					    SU3::generator(c, Tc);
 | 
				
			||||||
 | 
					    AdjointMatrix TRa;
 | 
				
			||||||
 | 
					    SU3Adjoint::generator(a,TRa);
 | 
				
			||||||
 | 
					    Complex tr1 = trace ( Tc * ( Ta*Tb-Tb*Ta)); // i/2 fabc
 | 
				
			||||||
 | 
					    Complex tr2 = TRa()()(b,c) * Complex(0,1);
 | 
				
			||||||
 | 
					    std::cout << " 2 Tr( Tc[Ta,Tb]) " << 2.0*tr1<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << " - TRa_bc " << tr2<<std::endl;
 | 
				
			||||||
 | 
					    assert(abs( (2.0*tr1-tr2) ) < 1.0e-7);
 | 
				
			||||||
 | 
					    std::cout << "------------------"<<std::endl;
 | 
				
			||||||
 | 
					  }}}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  // Check correspondence of algebra and group transformations
 | 
					  // Check correspondence of algebra and group transformations
 | 
				
			||||||
  // Create a random vector
 | 
					  // Create a random vector
 | 
				
			||||||
  SU3::LatticeAlgebraVector h_adj(grid);
 | 
					  SU3::LatticeAlgebraVector h_adj(grid);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										184
									
								
								tests/debug/Test_padded_cell.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								tests/debug/Test_padded_cell.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,184 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_padded_cell.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/lattice/PaddedCell.h>
 | 
				
			||||||
 | 
					#include <Grid/stencil/GeneralLocalStencil.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace std;
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> void gpermute(vobj & inout,int perm){
 | 
				
			||||||
 | 
					  vobj tmp=inout;
 | 
				
			||||||
 | 
					  if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
 | 
				
			||||||
 | 
					  if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;}
 | 
				
			||||||
 | 
					  if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;}
 | 
				
			||||||
 | 
					  if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Coordinate latt_size  = GridDefaultLatt();
 | 
				
			||||||
 | 
					  Coordinate simd_layout= GridDefaultSimd(Nd,vComplexD::Nsimd());
 | 
				
			||||||
 | 
					  Coordinate mpi_layout = GridDefaultMpi();
 | 
				
			||||||
 | 
					  std::cout << " mpi "<<mpi_layout<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << " simd "<<simd_layout<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << " latt "<<latt_size<<std::endl;
 | 
				
			||||||
 | 
					  GridCartesian GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridParallelRNG   pRNG(&GRID);
 | 
				
			||||||
 | 
					  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
				
			||||||
 | 
					  LatticeGaugeField Umu(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SU<Nc>::HotConfiguration(pRNG,Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
 | 
				
			||||||
 | 
					  LatticeComplex trplaq(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<LatticeColourMatrix> U(Nd, Umu.Grid());
 | 
				
			||||||
 | 
					  for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					    U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Average plaquette "<<plaq<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeComplex cplaq(&GRID); cplaq=Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Create a padded cell of extra padding depth=1
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int depth = 1;
 | 
				
			||||||
 | 
					  PaddedCell Ghost(depth,&GRID);
 | 
				
			||||||
 | 
					  LatticeGaugeField Ughost = Ghost.Exchange(Umu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Temporary debug Hack for single rank sim:
 | 
				
			||||||
 | 
					  // Check the contents of the cell are periodcally replicated
 | 
				
			||||||
 | 
					  // In future ONLY pad those dimensions that are not local to node
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#if 0
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    double diff=0;
 | 
				
			||||||
 | 
					    double n=0;
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView( Ug_v , Ughost, CpuRead);
 | 
				
			||||||
 | 
					    autoView( Ul_v , Umu   , CpuRead);
 | 
				
			||||||
 | 
					  for(int x=0;x<latt_size[0]+2;x++){
 | 
				
			||||||
 | 
					  for(int y=0;y<latt_size[1]+2;y++){
 | 
				
			||||||
 | 
					  for(int z=0;z<latt_size[2]+2;z++){
 | 
				
			||||||
 | 
					  for(int t=0;t<latt_size[3]+2;t++){
 | 
				
			||||||
 | 
					    int lx=(x-1+latt_size[0])%latt_size[0];
 | 
				
			||||||
 | 
					    int ly=(y-1+latt_size[1])%latt_size[1];
 | 
				
			||||||
 | 
					    int lz=(z-1+latt_size[2])%latt_size[2];
 | 
				
			||||||
 | 
					    int lt=(t-1+latt_size[3])%latt_size[3];
 | 
				
			||||||
 | 
					    Coordinate gcoor({x,y,z,t});
 | 
				
			||||||
 | 
					    Coordinate lcoor({lx,ly,lz,lt});
 | 
				
			||||||
 | 
					    LorentzColourMatrix g;
 | 
				
			||||||
 | 
					    LorentzColourMatrix l;
 | 
				
			||||||
 | 
					    peekLocalSite(g,Ug_v,gcoor);
 | 
				
			||||||
 | 
					    peekLocalSite(l,Ul_v,lcoor);
 | 
				
			||||||
 | 
					    g=g-l;
 | 
				
			||||||
 | 
					    assert(norm2(g)==0);
 | 
				
			||||||
 | 
					    diff = diff + norm2(g);
 | 
				
			||||||
 | 
					    n = n + norm2(l);
 | 
				
			||||||
 | 
					  }}}}
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << "padded field check diff "<< diff <<" / "<< n<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << norm2(Ughost)<< " " << norm2(Umu)<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///// Array for the site plaquette
 | 
				
			||||||
 | 
					  GridBase *GhostGrid = Ughost.Grid();
 | 
				
			||||||
 | 
					  LatticeComplex gplaq(GhostGrid); 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<Coordinate> shifts;
 | 
				
			||||||
 | 
					  for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					    for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					      //    Umu(x) Unu(x+mu) Umu^dag(x+nu) Unu^dag(x)
 | 
				
			||||||
 | 
					      Coordinate shift_0(Nd,0);
 | 
				
			||||||
 | 
					      Coordinate shift_mu(Nd,0); shift_mu[mu]=1;
 | 
				
			||||||
 | 
					      Coordinate shift_nu(Nd,0); shift_nu[nu]=1;
 | 
				
			||||||
 | 
					      shifts.push_back(shift_0);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_mu);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_nu);
 | 
				
			||||||
 | 
					      shifts.push_back(shift_0);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  GeneralLocalStencil gStencil(GhostGrid,shifts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  gplaq=Zero();
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView( gp_v , gplaq, CpuWrite);
 | 
				
			||||||
 | 
					    autoView( t_v , trplaq, CpuRead);
 | 
				
			||||||
 | 
					    autoView( U_v , Ughost, CpuRead);
 | 
				
			||||||
 | 
					    for(int ss=0;ss<gp_v.size();ss++){
 | 
				
			||||||
 | 
					      int s=0;
 | 
				
			||||||
 | 
					      for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
						for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  auto SE0 = gStencil.GetEntry(s+0,ss);
 | 
				
			||||||
 | 
						  auto SE1 = gStencil.GetEntry(s+1,ss);
 | 
				
			||||||
 | 
						  auto SE2 = gStencil.GetEntry(s+2,ss);
 | 
				
			||||||
 | 
						  auto SE3 = gStencil.GetEntry(s+3,ss);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						  int o0 = SE0->_offset;
 | 
				
			||||||
 | 
						  int o1 = SE1->_offset;
 | 
				
			||||||
 | 
						  int o2 = SE2->_offset;
 | 
				
			||||||
 | 
						  int o3 = SE3->_offset;
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						  auto U0 = U_v[o0](mu);
 | 
				
			||||||
 | 
						  auto U1 = U_v[o1](nu);
 | 
				
			||||||
 | 
						  auto U2 = adj(U_v[o2](mu));
 | 
				
			||||||
 | 
						  auto U3 = adj(U_v[o3](nu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  gpermute(U0,SE0->_permute);
 | 
				
			||||||
 | 
						  gpermute(U1,SE1->_permute);
 | 
				
			||||||
 | 
						  gpermute(U2,SE2->_permute);
 | 
				
			||||||
 | 
						  gpermute(U3,SE3->_permute);
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
						  gp_v[ss]() =gp_v[ss]() + trace( U0*U1*U2*U3 );
 | 
				
			||||||
 | 
						  s=s+4;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  cplaq = Ghost.Extract(gplaq);
 | 
				
			||||||
 | 
					  RealD vol = cplaq.Grid()->gSites();
 | 
				
			||||||
 | 
					  RealD faces = (Nd * (Nd-1))/2;
 | 
				
			||||||
 | 
					  auto p = TensorRemove(sum(cplaq));
 | 
				
			||||||
 | 
					  auto result = p.real()/vol/faces/Nc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Average plaquette via padded cell "<<result<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Diff "<<result-plaq<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  assert(fabs(result-plaq)<1.0e-8);
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -476,7 +476,9 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  //  ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
 | 
					  //  ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //////////////////// One flavour boundary det  ////////////////////
 | 
					  //////////////////// One flavour boundary det  ////////////////////
 | 
				
			||||||
 | 
					  /*
 | 
				
			||||||
  RationalActionParams OFRp; // Up/down
 | 
					  RationalActionParams OFRp; // Up/down
 | 
				
			||||||
 | 
					  int SP_iters = 3000;
 | 
				
			||||||
  OFRp.lo       = 6.0e-5;
 | 
					  OFRp.lo       = 6.0e-5;
 | 
				
			||||||
  OFRp.hi       = 90.0;
 | 
					  OFRp.hi       = 90.0;
 | 
				
			||||||
  OFRp.inv_pow  = 2;
 | 
					  OFRp.inv_pow  = 2;
 | 
				
			||||||
@@ -489,7 +491,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  //  OFRp.degree   = 16;
 | 
					  //  OFRp.degree   = 16;
 | 
				
			||||||
  OFRp.precision= 80;
 | 
					  OFRp.precision= 80;
 | 
				
			||||||
  OFRp.BoundsCheckFreq=0;
 | 
					  OFRp.BoundsCheckFreq=0;
 | 
				
			||||||
  /*
 | 
					  */
 | 
				
			||||||
  OneFlavourRationalParams OFRp; // Up/down
 | 
					  OneFlavourRationalParams OFRp; // Up/down
 | 
				
			||||||
  OFRp.lo       = 4.0e-5;
 | 
					  OFRp.lo       = 4.0e-5;
 | 
				
			||||||
  OFRp.hi       = 90.0;
 | 
					  OFRp.hi       = 90.0;
 | 
				
			||||||
@@ -499,7 +501,6 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  OFRp.degree   = 18;
 | 
					  OFRp.degree   = 18;
 | 
				
			||||||
  OFRp.precision= 80;
 | 
					  OFRp.precision= 80;
 | 
				
			||||||
  OFRp.BoundsCheckFreq=0;
 | 
					  OFRp.BoundsCheckFreq=0;
 | 
				
			||||||
  */
 | 
					 | 
				
			||||||
  std::vector<RealD> ActionTolByPole({
 | 
					  std::vector<RealD> ActionTolByPole({
 | 
				
			||||||
      1.0e-7,1.0e-8,1.0e-8,1.0e-8,
 | 
					      1.0e-7,1.0e-8,1.0e-8,1.0e-8,
 | 
				
			||||||
      1.0e-8,1.0e-8,1.0e-8,1.0e-8,
 | 
					      1.0e-8,1.0e-8,1.0e-8,1.0e-8,
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										219
									
								
								tests/forces/Test_fthmc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										219
									
								
								tests/forces/Test_fthmc.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,219 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_fthmc.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2022
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <pboyle@bnl.gov>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/smearing/JacobianAction.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace std;
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef MobiusFermionD FermionAction;
 | 
				
			||||||
 | 
					typedef WilsonImplD FimplD;
 | 
				
			||||||
 | 
					typedef WilsonImplD FermionImplPolicy;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Gimpl>
 | 
				
			||||||
 | 
					void ForceTest(Action<LatticeGaugeField> &action,ConfigurationBase<LatticeGaugeField> & smU,MomentumFilterBase<LatticeGaugeField> &Filter)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  LatticeGaugeField U = smU.get_U(false); // unsmeared config
 | 
				
			||||||
 | 
					  GridBase *UGrid = U.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> seeds({1,2,3,5});
 | 
				
			||||||
 | 
					  GridSerialRNG            sRNG;         sRNG.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeColourMatrix Pmu(UGrid); 
 | 
				
			||||||
 | 
					  LatticeGaugeField P(UGrid); 
 | 
				
			||||||
 | 
					  LatticeGaugeField UdSdU(UGrid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "*********************************************************"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Force test for "<<action.action_name()<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "*********************************************************"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  RealD eps=0.01;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Refresh "<<action.action_name()<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Gimpl::generate_momenta(P,sRNG,RNG4);
 | 
				
			||||||
 | 
					  //  Filter.applyFilter(P);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  action.refresh(smU,sRNG,RNG4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RealD S1 = action.S(smU);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Gimpl::update_field(P,U,eps);
 | 
				
			||||||
 | 
					  smU.set_Field(U);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Derivative "<<action.action_name()<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  action.deriv(smU,UdSdU);
 | 
				
			||||||
 | 
					  UdSdU = Ta(UdSdU);
 | 
				
			||||||
 | 
					  //  Filter.applyFilter(UdSdU);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DumpSliceNorm("Force",UdSdU,Nd-1);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Gimpl::update_field(P,U,eps);
 | 
				
			||||||
 | 
					  smU.set_Field(U);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  RealD S2 = action.S(smU);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Use the derivative
 | 
				
			||||||
 | 
					  LatticeComplex dS(UGrid); dS = Zero();
 | 
				
			||||||
 | 
					  for(int mu=0;mu<Nd;mu++){
 | 
				
			||||||
 | 
					    auto UdSdUmu = PeekIndex<LorentzIndex>(UdSdU,mu);
 | 
				
			||||||
 | 
					    Pmu= PeekIndex<LorentzIndex>(P,mu);
 | 
				
			||||||
 | 
					    dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*HMC_MOMENTUM_DENOMINATOR;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  ComplexD dSpred    = sum(dS);
 | 
				
			||||||
 | 
					  RealD diff =  S2-S1-dSpred.real();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "S1 : "<< S1    <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "S2 : "<< S2    <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "dS : "<< S2-S1 <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "dSpred : "<< dSpred.real() <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "diff : "<< diff<<std::endl;
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "*********************************************************"<<std::endl;
 | 
				
			||||||
 | 
					  //  assert(diff<1.0);
 | 
				
			||||||
 | 
					  std::cout<< GridLogMessage << "Done" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "*********************************************************"<<std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << std::setprecision(14);
 | 
				
			||||||
 | 
					  Coordinate latt_size   = GridDefaultLatt();
 | 
				
			||||||
 | 
					  Coordinate mpi_layout  = GridDefaultMpi();
 | 
				
			||||||
 | 
					  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
 | 
				
			||||||
 | 
					  Coordinate shm;
 | 
				
			||||||
 | 
					  GlobalSharedMemory::GetShmDims(mpi_layout,shm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Ls=12;
 | 
				
			||||||
 | 
					  const int Nt = latt_size[3];
 | 
				
			||||||
 | 
					  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
 | 
					  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////// Gauge Field and Gauge Forces ////////////////////////////
 | 
				
			||||||
 | 
					  LatticeGaugeField U(UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if  0
 | 
				
			||||||
 | 
					  FieldMetaData header;
 | 
				
			||||||
 | 
					  std::string file("./ckpoint_lat.2000");
 | 
				
			||||||
 | 
					  NerscIO::readConfiguration(U,header,file);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  std::vector<int> seeds({1,2,3,4,5,6,7,8});
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					  SU<Nc>::HotConfiguration(RNG4,U);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  WilsonGaugeActionR  PlaqAction(6.0);
 | 
				
			||||||
 | 
					  IwasakiGaugeActionR RectAction(2.13);
 | 
				
			||||||
 | 
					  PlaqAction.is_smeared = true;  
 | 
				
			||||||
 | 
					  RectAction.is_smeared = true;  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  // Fermion Action
 | 
				
			||||||
 | 
					  ////////////////////////////////////
 | 
				
			||||||
 | 
					  RealD mass=0.01; 
 | 
				
			||||||
 | 
					  RealD pvmass=1.0; 
 | 
				
			||||||
 | 
					  RealD M5=1.8; 
 | 
				
			||||||
 | 
					  RealD b=1.5;
 | 
				
			||||||
 | 
					  RealD c=0.5;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // Double versions
 | 
				
			||||||
 | 
					  std::vector<Complex> boundary = {1,1,1,-1};
 | 
				
			||||||
 | 
					  FermionAction::ImplParams Params(boundary);
 | 
				
			||||||
 | 
					  FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params);
 | 
				
			||||||
 | 
					  FermionAction PVPeriodic  (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  double StoppingCondition = 1.0e-8;
 | 
				
			||||||
 | 
					  double MaxCGIterations = 50000;
 | 
				
			||||||
 | 
					  ConjugateGradient<LatticeFermion>  CG(StoppingCondition,MaxCGIterations);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TwoFlavourRatioPseudoFermionAction<FimplD> Nf2(PVPeriodic, DdwfPeriodic,CG,CG);
 | 
				
			||||||
 | 
					  Nf2.is_smeared = true;  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Plaquette only FTHMC smearer
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  double rho = 0.1;
 | 
				
			||||||
 | 
					  Smear_Stout<PeriodicGimplR> Smearer(rho);
 | 
				
			||||||
 | 
					  SmearedConfigurationMasked<PeriodicGimplR> SmartConfig(UGrid,2*Nd,Smearer);
 | 
				
			||||||
 | 
					  SmearedConfiguration<PeriodicGimplR> StoutConfig(UGrid,1,Smearer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  JacobianAction<PeriodicGimplR> Jacobian(&SmartConfig);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Run some tests
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  MomentumFilterNone<LatticeGaugeField> FilterNone;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << " *********  FIELD TRANSFORM SMEARING ***** "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmartConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(PlaqAction,SmartConfig,FilterNone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmartConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(RectAction,SmartConfig,FilterNone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmartConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(Jacobian,SmartConfig,FilterNone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SmartConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(Nf2,SmartConfig,FilterNone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << " *********    STOUT SMEARING ***** "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  StoutConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(PlaqAction,StoutConfig,FilterNone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  StoutConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(RectAction,StoutConfig,FilterNone);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  StoutConfig.set_Field(U);
 | 
				
			||||||
 | 
					  ForceTest<GimplTypesR>(Nf2,StoutConfig,FilterNone);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user