mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	@@ -42,7 +42,7 @@
 | 
				
			|||||||
#ifdef __NVCC__REDEFINE__
 | 
					#ifdef __NVCC__REDEFINE__
 | 
				
			||||||
#pragma pop_macro("__CUDACC__")
 | 
					#pragma pop_macro("__CUDACC__")
 | 
				
			||||||
#pragma pop_macro("__NVCC__")
 | 
					#pragma pop_macro("__NVCC__")
 | 
				
			||||||
#pragma pop_macro("GRID_SIMT")
 | 
					#pragma pop_macro("__CUDA_ARCH__")
 | 
				
			||||||
#pragma pop
 | 
					#pragma pop
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,8 +65,7 @@ public:
 | 
				
			|||||||
    MemoryManager::CpuFree((void *)__p,bytes);
 | 
					    MemoryManager::CpuFree((void *)__p,bytes);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // FIXME: hack for the copy constructor, eventually it must be avoided
 | 
					  // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
 | 
				
			||||||
  //void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
 | 
					 | 
				
			||||||
  void construct(pointer __p, const _Tp& __val) { assert(0);};
 | 
					  void construct(pointer __p, const _Tp& __val) { assert(0);};
 | 
				
			||||||
  void construct(pointer __p) { };
 | 
					  void construct(pointer __p) { };
 | 
				
			||||||
  void destroy(pointer __p) { };
 | 
					  void destroy(pointer __p) { };
 | 
				
			||||||
@@ -74,6 +73,9 @@ public:
 | 
				
			|||||||
template<typename _Tp>  inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
					template<typename _Tp>  inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
 | 
				
			||||||
template<typename _Tp>  inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
					template<typename _Tp>  inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Unified virtual memory
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
template<typename _Tp>
 | 
					template<typename _Tp>
 | 
				
			||||||
class uvmAllocator {
 | 
					class uvmAllocator {
 | 
				
			||||||
public: 
 | 
					public: 
 | 
				
			||||||
@@ -109,22 +111,63 @@ public:
 | 
				
			|||||||
    MemoryManager::SharedFree((void *)__p,bytes);
 | 
					    MemoryManager::SharedFree((void *)__p,bytes);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // FIXME: hack for the copy constructor, eventually it must be avoided
 | 
					 | 
				
			||||||
  void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
 | 
					  void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
 | 
				
			||||||
  //void construct(pointer __p, const _Tp& __val) { };
 | 
					 | 
				
			||||||
  void construct(pointer __p) { };
 | 
					  void construct(pointer __p) { };
 | 
				
			||||||
  void destroy(pointer __p) { };
 | 
					  void destroy(pointer __p) { };
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
template<typename _Tp>  inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
 | 
					template<typename _Tp>  inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
 | 
				
			||||||
template<typename _Tp>  inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
 | 
					template<typename _Tp>  inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Device memory
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template<typename _Tp>
 | 
				
			||||||
 | 
					class devAllocator {
 | 
				
			||||||
 | 
					public: 
 | 
				
			||||||
 | 
					  typedef std::size_t     size_type;
 | 
				
			||||||
 | 
					  typedef std::ptrdiff_t  difference_type;
 | 
				
			||||||
 | 
					  typedef _Tp*       pointer;
 | 
				
			||||||
 | 
					  typedef const _Tp* const_pointer;
 | 
				
			||||||
 | 
					  typedef _Tp&       reference;
 | 
				
			||||||
 | 
					  typedef const _Tp& const_reference;
 | 
				
			||||||
 | 
					  typedef _Tp        value_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<typename _Tp1>  struct rebind { typedef devAllocator<_Tp1> other; };
 | 
				
			||||||
 | 
					  devAllocator() throw() { }
 | 
				
			||||||
 | 
					  devAllocator(const devAllocator&) throw() { }
 | 
				
			||||||
 | 
					  template<typename _Tp1> devAllocator(const devAllocator<_Tp1>&) throw() { }
 | 
				
			||||||
 | 
					  ~devAllocator() throw() { }
 | 
				
			||||||
 | 
					  pointer       address(reference __x)       const { return &__x; }
 | 
				
			||||||
 | 
					  size_type  max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  pointer allocate(size_type __n, const void* _p= 0)
 | 
				
			||||||
 | 
					  { 
 | 
				
			||||||
 | 
					    size_type bytes = __n*sizeof(_Tp);
 | 
				
			||||||
 | 
					    profilerAllocate(bytes);
 | 
				
			||||||
 | 
					    _Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
 | 
				
			||||||
 | 
					    assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
 | 
				
			||||||
 | 
					    return ptr;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void deallocate(pointer __p, size_type __n) 
 | 
				
			||||||
 | 
					  { 
 | 
				
			||||||
 | 
					    size_type bytes = __n * sizeof(_Tp);
 | 
				
			||||||
 | 
					    profilerFree(bytes);
 | 
				
			||||||
 | 
					    MemoryManager::AcceleratorFree((void *)__p,bytes);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void construct(pointer __p, const _Tp& __val) { };
 | 
				
			||||||
 | 
					  void construct(pointer __p) { };
 | 
				
			||||||
 | 
					  void destroy(pointer __p) { };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator==(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return true; }
 | 
				
			||||||
 | 
					template<typename _Tp>  inline bool operator!=(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return false; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Template typedefs
 | 
					// Template typedefs
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
template<class T> using commAllocator = uvmAllocator<T>;
 | 
					//template<class T> using commAllocator = devAllocator<T>;
 | 
				
			||||||
template<class T> using Vector     = std::vector<T,uvmAllocator<T> >;           
 | 
					template<class T> using Vector     = std::vector<T,uvmAllocator<T> >;           
 | 
				
			||||||
template<class T> using commVector = std::vector<T,uvmAllocator<T> >;
 | 
					template<class T> using commVector = std::vector<T,devAllocator<T> >;
 | 
				
			||||||
//template<class T> using Matrix     = std::vector<std::vector<T,alignedAllocator<T> > >;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -93,12 +93,12 @@ private:
 | 
				
			|||||||
  static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
 | 
					  static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
 | 
				
			||||||
  static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
 | 
					  static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  static void *AcceleratorAllocate(size_t bytes);
 | 
					 | 
				
			||||||
  static void  AcceleratorFree    (void *ptr,size_t bytes);
 | 
					 | 
				
			||||||
  static void PrintBytes(void);
 | 
					  static void PrintBytes(void);
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
  static void Init(void);
 | 
					  static void Init(void);
 | 
				
			||||||
  static void InitMessage(void);
 | 
					  static void InitMessage(void);
 | 
				
			||||||
 | 
					  static void *AcceleratorAllocate(size_t bytes);
 | 
				
			||||||
 | 
					  static void  AcceleratorFree    (void *ptr,size_t bytes);
 | 
				
			||||||
  static void *SharedAllocate(size_t bytes);
 | 
					  static void *SharedAllocate(size_t bytes);
 | 
				
			||||||
  static void  SharedFree    (void *ptr,size_t bytes);
 | 
					  static void  SharedFree    (void *ptr,size_t bytes);
 | 
				
			||||||
  static void *CpuAllocate(size_t bytes);
 | 
					  static void *CpuAllocate(size_t bytes);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -302,60 +302,28 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
				
			|||||||
					   int bytes)
 | 
										   int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  std::vector<CommsRequest_t> reqs(0);
 | 
					  std::vector<CommsRequest_t> reqs(0);
 | 
				
			||||||
  //    unsigned long  xcrc = crc32(0L, Z_NULL, 0);
 | 
					  unsigned long  xcrc = crc32(0L, Z_NULL, 0);
 | 
				
			||||||
  //    unsigned long  rcrc = crc32(0L, Z_NULL, 0);
 | 
					  unsigned long  rcrc = crc32(0L, Z_NULL, 0);
 | 
				
			||||||
  //    xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
 | 
					
 | 
				
			||||||
  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
					 | 
				
			||||||
  SendToRecvFromComplete(reqs);
 | 
					 | 
				
			||||||
  //    rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
 | 
					 | 
				
			||||||
  //    printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
					 | 
				
			||||||
					   void *recv,
 | 
					 | 
				
			||||||
					   int sender,
 | 
					 | 
				
			||||||
					   int receiver,
 | 
					 | 
				
			||||||
					   int bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  MPI_Status stat;
 | 
					 | 
				
			||||||
  assert(sender != receiver);
 | 
					 | 
				
			||||||
  int tag = sender;
 | 
					 | 
				
			||||||
  if ( _processor == sender ) {
 | 
					 | 
				
			||||||
    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  if ( _processor == receiver ) {
 | 
					 | 
				
			||||||
    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
// Basic Halo comms primitive
 | 
					 | 
				
			||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
					 | 
				
			||||||
						void *xmit,
 | 
					 | 
				
			||||||
						int dest,
 | 
					 | 
				
			||||||
						void *recv,
 | 
					 | 
				
			||||||
						int from,
 | 
					 | 
				
			||||||
						int bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  int myrank = _processor;
 | 
					  int myrank = _processor;
 | 
				
			||||||
  int ierr;
 | 
					  int ierr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
 | 
					  // Enforce no UVM in comms, device or host OK
 | 
				
			||||||
    MPI_Request xrq;
 | 
					  assert(acceleratorIsCommunicable(xmit));
 | 
				
			||||||
    MPI_Request rrq;
 | 
					  assert(acceleratorIsCommunicable(recv));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
					 | 
				
			||||||
    ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    assert(ierr==0);
 | 
					 | 
				
			||||||
    list.push_back(xrq);
 | 
					 | 
				
			||||||
    list.push_back(rrq);
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
  // Give the CPU to MPI immediately; can use threads to overlap optionally
 | 
					  // Give the CPU to MPI immediately; can use threads to overlap optionally
 | 
				
			||||||
 | 
					  //  printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
 | 
				
			||||||
  ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
 | 
					  ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
 | 
				
			||||||
		    recv,bytes,MPI_CHAR,from, from,
 | 
							    recv,bytes,MPI_CHAR,from, from,
 | 
				
			||||||
		    communicator,MPI_STATUS_IGNORE);
 | 
							    communicator,MPI_STATUS_IGNORE);
 | 
				
			||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
 | 
				
			||||||
 | 
					  //  rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
 | 
				
			||||||
 | 
					  //  printf("proc %d SendToRecvFrom %d bytes xcrc %lx rcrc %lx\n",_processor,bytes,xcrc,rcrc); fflush
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					// Basic Halo comms primitive
 | 
				
			||||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
 | 
					double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
 | 
				
			||||||
						     int dest,
 | 
											     int dest,
 | 
				
			||||||
						     void *recv,
 | 
											     void *recv,
 | 
				
			||||||
@@ -411,15 +379,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  return off_node_bytes;
 | 
					  return off_node_bytes;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
 | 
					void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  SendToRecvFromComplete(waitall);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
void CartesianCommunicator::StencilBarrier(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  MPI_Barrier  (ShmComm);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int nreq=list.size();
 | 
					  int nreq=list.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -430,6 +390,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
 | 
				
			|||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
  list.resize(0);
 | 
					  list.resize(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::StencilBarrier(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MPI_Barrier  (ShmComm);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
				
			||||||
 | 
					//{
 | 
				
			||||||
 | 
					//}
 | 
				
			||||||
void CartesianCommunicator::Barrier(void)
 | 
					void CartesianCommunicator::Barrier(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int ierr = MPI_Barrier(communicator);
 | 
					  int ierr = MPI_Barrier(communicator);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,23 +52,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<typename Op, typename T1> 
 | 
					template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> 
 | 
				
			||||||
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
 | 
					auto Cshift(const Expression &expr,int dim,int shift)  -> decltype(closure(expr)) 
 | 
				
			||||||
    -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  return Cshift(closure(expr),dim,shift);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
template <class Op, class T1, class T2>
 | 
					 | 
				
			||||||
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
 | 
					 | 
				
			||||||
  -> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> 
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  return Cshift(closure(expr),dim,shift);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
template <class Op, class T1, class T2, class T3>
 | 
					 | 
				
			||||||
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
 | 
					 | 
				
			||||||
  -> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
 | 
					 | 
				
			||||||
				   eval(0, expr.arg2),
 | 
					 | 
				
			||||||
				   eval(0, expr.arg3)))> 
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return Cshift(closure(expr),dim,shift);
 | 
					  return Cshift(closure(expr),dim,shift);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -76,8 +76,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
 | 
				
			|||||||
    autoView(rhs_v , rhs, AcceleratorRead);
 | 
					    autoView(rhs_v , rhs, AcceleratorRead);
 | 
				
			||||||
    auto buffer_p = & buffer[0];
 | 
					    auto buffer_p = & buffer[0];
 | 
				
			||||||
    auto table = &Cshift_table[0];
 | 
					    auto table = &Cshift_table[0];
 | 
				
			||||||
    accelerator_for(i,ent,1,{
 | 
					    accelerator_for(i,ent,vobj::Nsimd(),{
 | 
				
			||||||
      buffer_p[table[i].first]=rhs_v[table[i].second];
 | 
						coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -185,8 +185,8 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
 | 
				
			|||||||
    autoView( rhs_v, rhs, AcceleratorWrite);
 | 
					    autoView( rhs_v, rhs, AcceleratorWrite);
 | 
				
			||||||
    auto buffer_p = & buffer[0];
 | 
					    auto buffer_p = & buffer[0];
 | 
				
			||||||
    auto table = &Cshift_table[0];
 | 
					    auto table = &Cshift_table[0];
 | 
				
			||||||
    accelerator_for(i,ent,1,{
 | 
					    accelerator_for(i,ent,vobj::Nsimd(),{
 | 
				
			||||||
	rhs_v[table[i].first]=buffer_p[table[i].second];
 | 
						coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -222,6 +222,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
 | 
				
			|||||||
    // Test_cshift_red_black code.
 | 
					    // Test_cshift_red_black code.
 | 
				
			||||||
    //    std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
 | 
					    //    std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
 | 
				
			||||||
    std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
 | 
					    std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
 | 
				
			||||||
 | 
					    assert(0); // This will fail if hit on GPU
 | 
				
			||||||
    autoView( rhs_v, rhs, CpuWrite);
 | 
					    autoView( rhs_v, rhs, CpuWrite);
 | 
				
			||||||
    for(int n=0;n<e1;n++){
 | 
					    for(int n=0;n<e1;n++){
 | 
				
			||||||
      for(int b=0;b<e2;b++){
 | 
					      for(int b=0;b<e2;b++){
 | 
				
			||||||
@@ -282,8 +283,8 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
 | 
				
			|||||||
    autoView(rhs_v , rhs, AcceleratorRead);
 | 
					    autoView(rhs_v , rhs, AcceleratorRead);
 | 
				
			||||||
    autoView(lhs_v , lhs, AcceleratorWrite);
 | 
					    autoView(lhs_v , lhs, AcceleratorWrite);
 | 
				
			||||||
    auto table = &Cshift_table[0];
 | 
					    auto table = &Cshift_table[0];
 | 
				
			||||||
    accelerator_for(i,ent,1,{
 | 
					    accelerator_for(i,ent,vobj::Nsimd(),{
 | 
				
			||||||
      lhs_v[table[i].first]=rhs_v[table[i].second];
 | 
					      coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -37,6 +37,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/lattice/Lattice_reduction.h>
 | 
					#include <Grid/lattice/Lattice_reduction.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_peekpoke.h>
 | 
					#include <Grid/lattice/Lattice_peekpoke.h>
 | 
				
			||||||
//#include <Grid/lattice/Lattice_reality.h>
 | 
					//#include <Grid/lattice/Lattice_reality.h>
 | 
				
			||||||
 | 
					#include <Grid/lattice/Lattice_real_imag.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_comparison_utils.h>
 | 
					#include <Grid/lattice/Lattice_comparison_utils.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_comparison.h>
 | 
					#include <Grid/lattice/Lattice_comparison.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_coordinate.h>
 | 
					#include <Grid/lattice/Lattice_coordinate.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,9 +42,24 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////
 | 
				
			||||||
// Predicated where support
 | 
					// Predicated where support
 | 
				
			||||||
////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#ifdef GRID_SIMT
 | 
				
			||||||
 | 
					// drop to scalar in SIMT; cleaner in fact
 | 
				
			||||||
template <class iobj, class vobj, class robj>
 | 
					template <class iobj, class vobj, class robj>
 | 
				
			||||||
accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
 | 
					accelerator_inline vobj predicatedWhere(const iobj &predicate, 
 | 
				
			||||||
                            const robj &iffalse) {
 | 
										const vobj &iftrue, 
 | 
				
			||||||
 | 
										const robj &iffalse) 
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Integer mask = TensorRemove(predicate);
 | 
				
			||||||
 | 
					  typename std::remove_const<vobj>::type ret= iffalse;
 | 
				
			||||||
 | 
					  if (mask) ret=iftrue;
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					template <class iobj, class vobj, class robj>
 | 
				
			||||||
 | 
					accelerator_inline vobj predicatedWhere(const iobj &predicate, 
 | 
				
			||||||
 | 
										const vobj &iftrue, 
 | 
				
			||||||
 | 
										const robj &iffalse) 
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
  typename std::remove_const<vobj>::type ret;
 | 
					  typename std::remove_const<vobj>::type ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef typename vobj::scalar_object scalar_object;
 | 
					  typedef typename vobj::scalar_object scalar_object;
 | 
				
			||||||
@@ -68,6 +83,7 @@ accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftru
 | 
				
			|||||||
  merge(ret, falsevals);
 | 
					  merge(ret, falsevals);
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////
 | 
				
			||||||
//Specialization of getVectorType for lattices
 | 
					//Specialization of getVectorType for lattices
 | 
				
			||||||
@@ -81,33 +97,62 @@ struct getVectorType<Lattice<T> >{
 | 
				
			|||||||
//--  recursive evaluation of expressions; --
 | 
					//--  recursive evaluation of expressions; --
 | 
				
			||||||
// handle leaves of syntax tree
 | 
					// handle leaves of syntax tree
 | 
				
			||||||
///////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
template<class sobj> accelerator_inline 
 | 
					template<class sobj,
 | 
				
			||||||
 | 
					  typename std::enable_if<!is_lattice<sobj>::value&&!is_lattice_expr<sobj>::value,sobj>::type * = nullptr> 
 | 
				
			||||||
 | 
					accelerator_inline 
 | 
				
			||||||
sobj eval(const uint64_t ss, const sobj &arg)
 | 
					sobj eval(const uint64_t ss, const sobj &arg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return arg;
 | 
					  return arg;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
template <class lobj> accelerator_inline 
 | 
					template <class lobj> accelerator_inline 
 | 
				
			||||||
const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg) 
 | 
					auto eval(const uint64_t ss, const LatticeView<lobj> &arg) -> decltype(arg(ss))
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return arg(ss);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////
 | 
				
			||||||
 | 
					//--  recursive evaluation of expressions; --
 | 
				
			||||||
 | 
					// whole vector return, used only for expression return type inference
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template<class sobj> accelerator_inline 
 | 
				
			||||||
 | 
					sobj vecEval(const uint64_t ss, const sobj &arg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return arg;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template <class lobj> accelerator_inline 
 | 
				
			||||||
 | 
					const lobj & vecEval(const uint64_t ss, const LatticeView<lobj> &arg) 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return arg[ss];
 | 
					  return arg[ss];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// What needs this?
 | 
					 | 
				
			||||||
// Cannot be legal on accelerator
 | 
					 | 
				
			||||||
// Comparison must convert
 | 
					 | 
				
			||||||
#if 1
 | 
					 | 
				
			||||||
template <class lobj> accelerator_inline 
 | 
					 | 
				
			||||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg) 
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  assert(0);
 | 
					 | 
				
			||||||
  auto view = arg.View(AcceleratorRead);
 | 
					 | 
				
			||||||
  return view[ss];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
///////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
// handle nodes in syntax tree- eval one operand
 | 
					// handle nodes in syntax tree- eval one operand
 | 
				
			||||||
 | 
					// vecEval needed (but never called as all expressions offloaded) to infer the return type
 | 
				
			||||||
 | 
					// in SIMT contexts of closure.
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <typename Op, typename T1> accelerator_inline 
 | 
				
			||||||
 | 
					auto vecEval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)  
 | 
				
			||||||
 | 
					  -> decltype(expr.op.func( vecEval(ss, expr.arg1)))
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return expr.op.func( vecEval(ss, expr.arg1) );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					// vecEval two operands
 | 
				
			||||||
 | 
					template <typename Op, typename T1, typename T2> accelerator_inline
 | 
				
			||||||
 | 
					auto vecEval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)  
 | 
				
			||||||
 | 
					  -> decltype(expr.op.func( vecEval(ss,expr.arg1),vecEval(ss,expr.arg2)))
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return expr.op.func( vecEval(ss,expr.arg1), vecEval(ss,expr.arg2) );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					// vecEval three operands
 | 
				
			||||||
 | 
					template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
 | 
				
			||||||
 | 
					auto vecEval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)  
 | 
				
			||||||
 | 
					  -> decltype(expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3)))
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  return expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// handle nodes in syntax tree- eval one operand coalesced
 | 
				
			||||||
///////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////
 | 
				
			||||||
template <typename Op, typename T1> accelerator_inline 
 | 
					template <typename Op, typename T1> accelerator_inline 
 | 
				
			||||||
auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)  
 | 
					auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)  
 | 
				
			||||||
@@ -115,23 +160,41 @@ auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  return expr.op.func( eval(ss, expr.arg1) );
 | 
					  return expr.op.func( eval(ss, expr.arg1) );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
///////////////////////
 | 
					 | 
				
			||||||
// eval two operands
 | 
					// eval two operands
 | 
				
			||||||
///////////////////////
 | 
					 | 
				
			||||||
template <typename Op, typename T1, typename T2> accelerator_inline
 | 
					template <typename Op, typename T1, typename T2> accelerator_inline
 | 
				
			||||||
auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)  
 | 
					auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)  
 | 
				
			||||||
  -> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
 | 
					  -> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
 | 
					  return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
///////////////////////
 | 
					 | 
				
			||||||
// eval three operands
 | 
					// eval three operands
 | 
				
			||||||
///////////////////////
 | 
					 | 
				
			||||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
 | 
					template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
 | 
				
			||||||
auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)  
 | 
					auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)  
 | 
				
			||||||
  -> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)))
 | 
					  -> decltype(expr.op.func(eval(ss, expr.arg1), 
 | 
				
			||||||
 | 
								   eval(ss, expr.arg2), 
 | 
				
			||||||
 | 
								   eval(ss, expr.arg3)))
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3));
 | 
					#ifdef GRID_SIMT
 | 
				
			||||||
 | 
					  // Handles Nsimd (vInteger) != Nsimd(ComplexD)
 | 
				
			||||||
 | 
					  typedef decltype(vecEval(ss, expr.arg2)) rvobj;
 | 
				
			||||||
 | 
					  typedef typename std::remove_reference<rvobj>::type vobj;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Nsimd = vobj::vector_type::Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto vpred = vecEval(ss,expr.arg1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ExtractBuffer<Integer> mask(Nsimd);
 | 
				
			||||||
 | 
					  extract<vInteger, Integer>(TensorRemove(vpred), mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int s = acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					  return expr.op.func(mask[s],
 | 
				
			||||||
 | 
							      eval(ss, expr.arg2), 
 | 
				
			||||||
 | 
							      eval(ss, expr.arg3));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					  return expr.op.func(eval(ss, expr.arg1),
 | 
				
			||||||
 | 
							      eval(ss, expr.arg2), 
 | 
				
			||||||
 | 
							      eval(ss, expr.arg3));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -229,7 +292,7 @@ template <typename Op, typename T1, typename T2> inline
 | 
				
			|||||||
void ExpressionViewOpen(LatticeBinaryExpression<Op, T1, T2> &expr) 
 | 
					void ExpressionViewOpen(LatticeBinaryExpression<Op, T1, T2> &expr) 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  ExpressionViewOpen(expr.arg1);  // recurse AST
 | 
					  ExpressionViewOpen(expr.arg1);  // recurse AST
 | 
				
			||||||
  ExpressionViewOpen(expr.arg2);  // recurse AST
 | 
					  ExpressionViewOpen(expr.arg2);  // rrecurse AST
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template <typename Op, typename T1, typename T2, typename T3>
 | 
					template <typename Op, typename T1, typename T2, typename T3>
 | 
				
			||||||
inline void ExpressionViewOpen(LatticeTrinaryExpression<Op, T1, T2, T3> &expr) 
 | 
					inline void ExpressionViewOpen(LatticeTrinaryExpression<Op, T1, T2, T3> &expr) 
 | 
				
			||||||
@@ -273,9 +336,8 @@ inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
 | 
				
			|||||||
// Unary operators and funcs
 | 
					// Unary operators and funcs
 | 
				
			||||||
////////////////////////////////////////////
 | 
					////////////////////////////////////////////
 | 
				
			||||||
#define GridUnopClass(name, ret)					\
 | 
					#define GridUnopClass(name, ret)					\
 | 
				
			||||||
  template <class arg>							\
 | 
					 | 
				
			||||||
  struct name {								\
 | 
					  struct name {								\
 | 
				
			||||||
    static auto accelerator_inline func(const arg a) -> decltype(ret) { return ret; } \
 | 
					    template<class _arg> static auto accelerator_inline func(const _arg a) -> decltype(ret) { return ret; } \
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GridUnopClass(UnarySub, -a);
 | 
					GridUnopClass(UnarySub, -a);
 | 
				
			||||||
@@ -286,8 +348,6 @@ GridUnopClass(UnaryTrace, trace(a));
 | 
				
			|||||||
GridUnopClass(UnaryTranspose, transpose(a));
 | 
					GridUnopClass(UnaryTranspose, transpose(a));
 | 
				
			||||||
GridUnopClass(UnaryTa, Ta(a));
 | 
					GridUnopClass(UnaryTa, Ta(a));
 | 
				
			||||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
 | 
					GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
 | 
				
			||||||
GridUnopClass(UnaryReal, real(a));
 | 
					 | 
				
			||||||
GridUnopClass(UnaryImag, imag(a));
 | 
					 | 
				
			||||||
GridUnopClass(UnaryToReal, toReal(a));
 | 
					GridUnopClass(UnaryToReal, toReal(a));
 | 
				
			||||||
GridUnopClass(UnaryToComplex, toComplex(a));
 | 
					GridUnopClass(UnaryToComplex, toComplex(a));
 | 
				
			||||||
GridUnopClass(UnaryTimesI, timesI(a));
 | 
					GridUnopClass(UnaryTimesI, timesI(a));
 | 
				
			||||||
@@ -306,10 +366,10 @@ GridUnopClass(UnaryExp, exp(a));
 | 
				
			|||||||
// Binary operators
 | 
					// Binary operators
 | 
				
			||||||
////////////////////////////////////////////
 | 
					////////////////////////////////////////////
 | 
				
			||||||
#define GridBinOpClass(name, combination)			\
 | 
					#define GridBinOpClass(name, combination)			\
 | 
				
			||||||
  template <class left, class right>				\
 | 
					 | 
				
			||||||
  struct name {							\
 | 
					  struct name {							\
 | 
				
			||||||
 | 
					    template <class _left, class _right>			\
 | 
				
			||||||
    static auto accelerator_inline				\
 | 
					    static auto accelerator_inline				\
 | 
				
			||||||
    func(const left &lhs, const right &rhs)			\
 | 
					    func(const _left &lhs, const _right &rhs)			\
 | 
				
			||||||
      -> decltype(combination) const				\
 | 
					      -> decltype(combination) const				\
 | 
				
			||||||
    {								\
 | 
					    {								\
 | 
				
			||||||
      return combination;					\
 | 
					      return combination;					\
 | 
				
			||||||
@@ -329,10 +389,10 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
 | 
				
			|||||||
// Trinary conditional op
 | 
					// Trinary conditional op
 | 
				
			||||||
////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////
 | 
				
			||||||
#define GridTrinOpClass(name, combination)				\
 | 
					#define GridTrinOpClass(name, combination)				\
 | 
				
			||||||
  template <class predicate, class left, class right>			\
 | 
					 | 
				
			||||||
  struct name {								\
 | 
					  struct name {								\
 | 
				
			||||||
 | 
					    template <class _predicate,class _left, class _right>		\
 | 
				
			||||||
    static auto accelerator_inline					\
 | 
					    static auto accelerator_inline					\
 | 
				
			||||||
    func(const predicate &pred, const left &lhs, const right &rhs)	\
 | 
					    func(const _predicate &pred, const _left &lhs, const _right &rhs)	\
 | 
				
			||||||
      -> decltype(combination) const					\
 | 
					      -> decltype(combination) const					\
 | 
				
			||||||
    {									\
 | 
					    {									\
 | 
				
			||||||
      return combination;						\
 | 
					      return combination;						\
 | 
				
			||||||
@@ -340,17 +400,17 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
 | 
				
			|||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GridTrinOpClass(TrinaryWhere,
 | 
					GridTrinOpClass(TrinaryWhere,
 | 
				
			||||||
		(predicatedWhere<predicate, 
 | 
							(predicatedWhere<
 | 
				
			||||||
		 typename std::remove_reference<left>::type,
 | 
							 typename std::remove_reference<_predicate>::type, 
 | 
				
			||||||
		 typename std::remove_reference<right>::type>(pred, lhs,rhs)));
 | 
							 typename std::remove_reference<_left>::type,
 | 
				
			||||||
 | 
							 typename std::remove_reference<_right>::type>(pred, lhs,rhs)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
////////////////////////////////////////////
 | 
					////////////////////////////////////////////
 | 
				
			||||||
// Operator syntactical glue
 | 
					// Operator syntactical glue
 | 
				
			||||||
////////////////////////////////////////////
 | 
					////////////////////////////////////////////
 | 
				
			||||||
 | 
					#define GRID_UNOP(name)   name
 | 
				
			||||||
#define GRID_UNOP(name)   name<decltype(eval(0, arg))>
 | 
					#define GRID_BINOP(name)  name
 | 
				
			||||||
#define GRID_BINOP(name)  name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
					#define GRID_TRINOP(name) name
 | 
				
			||||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GRID_DEF_UNOP(op, name)						\
 | 
					#define GRID_DEF_UNOP(op, name)						\
 | 
				
			||||||
  template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
 | 
					  template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
 | 
				
			||||||
@@ -402,8 +462,6 @@ GRID_DEF_UNOP(trace, UnaryTrace);
 | 
				
			|||||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
 | 
					GRID_DEF_UNOP(transpose, UnaryTranspose);
 | 
				
			||||||
GRID_DEF_UNOP(Ta, UnaryTa);
 | 
					GRID_DEF_UNOP(Ta, UnaryTa);
 | 
				
			||||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
 | 
					GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
 | 
				
			||||||
GRID_DEF_UNOP(real, UnaryReal);
 | 
					 | 
				
			||||||
GRID_DEF_UNOP(imag, UnaryImag);
 | 
					 | 
				
			||||||
GRID_DEF_UNOP(toReal, UnaryToReal);
 | 
					GRID_DEF_UNOP(toReal, UnaryToReal);
 | 
				
			||||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
 | 
					GRID_DEF_UNOP(toComplex, UnaryToComplex);
 | 
				
			||||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
 | 
					GRID_DEF_UNOP(timesI, UnaryTimesI);
 | 
				
			||||||
@@ -436,29 +494,36 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
 | 
				
			|||||||
/////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////
 | 
				
			||||||
template <class Op, class T1>
 | 
					template <class Op, class T1>
 | 
				
			||||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
 | 
					auto closure(const LatticeUnaryExpression<Op, T1> &expr)
 | 
				
			||||||
  -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					  -> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> ret(expr);
 | 
					  Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> ret(expr);
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template <class Op, class T1, class T2>
 | 
					template <class Op, class T1, class T2>
 | 
				
			||||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
 | 
					auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
 | 
				
			||||||
  -> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> 
 | 
					  -> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> ret(expr);
 | 
					  Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> ret(expr);
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template <class Op, class T1, class T2, class T3>
 | 
					template <class Op, class T1, class T2, class T3>
 | 
				
			||||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
 | 
					auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
 | 
				
			||||||
  -> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
 | 
					  -> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
 | 
				
			||||||
				   eval(0, expr.arg2),
 | 
									   vecEval(0, expr.arg2),
 | 
				
			||||||
				   eval(0, expr.arg3)))> 
 | 
									   vecEval(0, expr.arg3)))> 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  Lattice<decltype(expr.op.func(eval(0, expr.arg1),
 | 
					  Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
 | 
				
			||||||
				eval(0, expr.arg2),
 | 
									vecEval(0, expr.arg2),
 | 
				
			||||||
				eval(0, expr.arg3)))>  ret(expr);
 | 
								        vecEval(0, expr.arg3)))>  ret(expr);
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					#define EXPRESSION_CLOSURE(function)					\
 | 
				
			||||||
 | 
					  template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> \
 | 
				
			||||||
 | 
					    auto function(Expression &expr) -> decltype(function(closure(expr))) \
 | 
				
			||||||
 | 
					  {									\
 | 
				
			||||||
 | 
					    return function(closure(expr));					\
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#undef GRID_UNOP
 | 
					#undef GRID_UNOP
 | 
				
			||||||
#undef GRID_BINOP
 | 
					#undef GRID_BINOP
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -123,9 +123,9 @@ public:
 | 
				
			|||||||
    auto exprCopy = expr;
 | 
					    auto exprCopy = expr;
 | 
				
			||||||
    ExpressionViewOpen(exprCopy);
 | 
					    ExpressionViewOpen(exprCopy);
 | 
				
			||||||
    auto me  = View(AcceleratorWriteDiscard);
 | 
					    auto me  = View(AcceleratorWriteDiscard);
 | 
				
			||||||
    accelerator_for(ss,me.size(),1,{
 | 
					    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
				
			||||||
      auto tmp = eval(ss,exprCopy);
 | 
					      auto tmp = eval(ss,exprCopy);
 | 
				
			||||||
      vstream(me[ss],tmp);
 | 
					      coalescedWrite(me[ss],tmp);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
    me.ViewClose();
 | 
					    me.ViewClose();
 | 
				
			||||||
    ExpressionViewClose(exprCopy);
 | 
					    ExpressionViewClose(exprCopy);
 | 
				
			||||||
@@ -146,9 +146,9 @@ public:
 | 
				
			|||||||
    auto exprCopy = expr;
 | 
					    auto exprCopy = expr;
 | 
				
			||||||
    ExpressionViewOpen(exprCopy);
 | 
					    ExpressionViewOpen(exprCopy);
 | 
				
			||||||
    auto me  = View(AcceleratorWriteDiscard);
 | 
					    auto me  = View(AcceleratorWriteDiscard);
 | 
				
			||||||
    accelerator_for(ss,me.size(),1,{
 | 
					    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
				
			||||||
      auto tmp = eval(ss,exprCopy);
 | 
					      auto tmp = eval(ss,exprCopy);
 | 
				
			||||||
      vstream(me[ss],tmp);
 | 
					      coalescedWrite(me[ss],tmp);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
    me.ViewClose();
 | 
					    me.ViewClose();
 | 
				
			||||||
    ExpressionViewClose(exprCopy);
 | 
					    ExpressionViewClose(exprCopy);
 | 
				
			||||||
@@ -168,9 +168,9 @@ public:
 | 
				
			|||||||
    auto exprCopy = expr;
 | 
					    auto exprCopy = expr;
 | 
				
			||||||
    ExpressionViewOpen(exprCopy);
 | 
					    ExpressionViewOpen(exprCopy);
 | 
				
			||||||
    auto me  = View(AcceleratorWriteDiscard);
 | 
					    auto me  = View(AcceleratorWriteDiscard);
 | 
				
			||||||
    accelerator_for(ss,me.size(),1,{
 | 
					    accelerator_for(ss,me.size(),vobj::Nsimd(),{
 | 
				
			||||||
      auto tmp = eval(ss,exprCopy);
 | 
					      auto tmp = eval(ss,exprCopy);
 | 
				
			||||||
      vstream(me[ss],tmp);
 | 
					      coalescedWrite(me[ss],tmp);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
    me.ViewClose();
 | 
					    me.ViewClose();
 | 
				
			||||||
    ExpressionViewClose(exprCopy);
 | 
					    ExpressionViewClose(exprCopy);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -42,34 +42,6 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
typedef iScalar<vInteger> vPredicate ;
 | 
					typedef iScalar<vInteger> vPredicate ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
template <class iobj, class vobj, class robj> accelerator_inline 
 | 
					 | 
				
			||||||
vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, const robj &iffalse) 
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  typename std::remove_const<vobj>::type ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  typedef typename vobj::scalar_object scalar_object;
 | 
					 | 
				
			||||||
  typedef typename vobj::scalar_type scalar_type;
 | 
					 | 
				
			||||||
  typedef typename vobj::vector_type vector_type;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const int Nsimd = vobj::vector_type::Nsimd();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ExtractBuffer<Integer> mask(Nsimd);
 | 
					 | 
				
			||||||
  ExtractBuffer<scalar_object> truevals(Nsimd);
 | 
					 | 
				
			||||||
  ExtractBuffer<scalar_object> falsevals(Nsimd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  extract(iftrue, truevals);
 | 
					 | 
				
			||||||
  extract(iffalse, falsevals);
 | 
					 | 
				
			||||||
  extract<vInteger, Integer>(TensorRemove(predicate), mask);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (int s = 0; s < Nsimd; s++) {
 | 
					 | 
				
			||||||
    if (mask[s]) falsevals[s] = truevals[s];
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  merge(ret, falsevals);
 | 
					 | 
				
			||||||
  return ret;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// compare lattice to lattice
 | 
					// compare lattice to lattice
 | 
				
			||||||
//////////////////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -182,6 +182,14 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
  return;
 | 
					  return;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  autoView(lv,l,CpuRead);
 | 
				
			||||||
 | 
					  peekLocalSite(s,lv,site);
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Must be CPU write view
 | 
					// Must be CPU write view
 | 
				
			||||||
template<class vobj,class sobj>
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
 | 
					inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
 | 
				
			||||||
@@ -210,6 +218,14 @@ inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
 | 
				
			|||||||
  return;
 | 
					  return;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					inline void pokeLocalSite(const sobj &s, Lattice<vobj> &l,Coordinate &site)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  autoView(lv,l,CpuWrite);
 | 
				
			||||||
 | 
					  pokeLocalSite(s,lv,site);
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										79
									
								
								Grid/lattice/Lattice_real_imag.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								Grid/lattice/Lattice_real_imag.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,79 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/lattice/Lattice_reality.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: neo <cossu@post.kek.jp>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#ifndef GRID_LATTICE_REAL_IMAG_H
 | 
				
			||||||
 | 
					#define GRID_LATTICE_REAL_IMAG_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// FIXME .. this is the sector of the code 
 | 
				
			||||||
 | 
					// I am most worried about the directions
 | 
				
			||||||
 | 
					// The choice of burying complex in the SIMD
 | 
				
			||||||
 | 
					// is making the use of "real" and "imag" very cumbersome
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj> inline Lattice<vobj> real(const Lattice<vobj> &lhs){
 | 
				
			||||||
 | 
					  Lattice<vobj> ret(lhs.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  autoView( lhs_v, lhs, AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView( ret_v, ret, AcceleratorWrite);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ret.Checkerboard()=lhs.Checkerboard();
 | 
				
			||||||
 | 
					  accelerator_for( ss, lhs_v.size(), 1, {
 | 
				
			||||||
 | 
					    ret_v[ss] =real(lhs_v[ss]);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class vobj> inline Lattice<vobj> imag(const Lattice<vobj> &lhs){
 | 
				
			||||||
 | 
					  Lattice<vobj> ret(lhs.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  autoView( lhs_v, lhs, AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView( ret_v, ret, AcceleratorWrite);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ret.Checkerboard()=lhs.Checkerboard();
 | 
				
			||||||
 | 
					  accelerator_for( ss, lhs_v.size(), 1, {
 | 
				
			||||||
 | 
					    ret_v[ss] =imag(lhs_v[ss]);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> 
 | 
				
			||||||
 | 
					  auto real(const Expression &expr) -> decltype(real(closure(expr)))		
 | 
				
			||||||
 | 
					{									
 | 
				
			||||||
 | 
					  return real(closure(expr));					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> 
 | 
				
			||||||
 | 
					  auto imag(const Expression &expr) -> decltype(imag(closure(expr)))		
 | 
				
			||||||
 | 
					{									
 | 
				
			||||||
 | 
					  return imag(closure(expr));					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
@@ -208,7 +208,7 @@ public:
 | 
				
			|||||||
  LebesgueOrder LebesgueEvenOdd;
 | 
					  LebesgueOrder LebesgueEvenOdd;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  // Comms buffer
 | 
					  // Comms buffer
 | 
				
			||||||
  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
					  //  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Conserved current utilities
 | 
					  // Conserved current utilities
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -215,7 +215,7 @@ public:
 | 
				
			|||||||
  LebesgueOrder LebesgueEvenOdd;
 | 
					  LebesgueOrder LebesgueEvenOdd;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  // Comms buffer
 | 
					  // Comms buffer
 | 
				
			||||||
  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
					  //  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -59,7 +59,7 @@ public:
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  static inline GaugeLinkField
 | 
					  static inline GaugeLinkField
 | 
				
			||||||
  CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
 | 
					  CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
 | 
				
			||||||
    return Cshift(closure(adj(Link)), mu, -1);
 | 
					    return Cshift(adj(Link), mu, -1);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  static inline GaugeLinkField
 | 
					  static inline GaugeLinkField
 | 
				
			||||||
  CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
 | 
					  CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,23 +53,21 @@ namespace PeriodicBC {
 | 
				
			|||||||
    return Cshift(tmp,mu,-1);// moves towards positive mu
 | 
					    return Cshift(tmp,mu,-1);// moves towards positive mu
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class gauge,typename Op, typename T1> auto
 | 
					  template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
 | 
				
			||||||
    CovShiftForward(const Lattice<gauge> &Link, 
 | 
					    auto  CovShiftForward(const Lattice<gauge> &Link, 
 | 
				
			||||||
			  int mu,
 | 
								  int mu,
 | 
				
			||||||
		    const LatticeUnaryExpression<Op,T1> &expr)
 | 
								  const Expr &expr) -> decltype(closure(expr))
 | 
				
			||||||
    -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
 | 
					    auto arg = closure(expr);
 | 
				
			||||||
    return CovShiftForward(Link,mu,arg);
 | 
					    return CovShiftForward(Link,mu,arg);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  template<class gauge,typename Op, typename T1> auto
 | 
					  template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
 | 
				
			||||||
    CovShiftBackward(const Lattice<gauge> &Link, 
 | 
					    auto  CovShiftBackward(const Lattice<gauge> &Link, 
 | 
				
			||||||
			   int mu,
 | 
								   int mu,
 | 
				
			||||||
		     const LatticeUnaryExpression<Op,T1> &expr)
 | 
								   const Expr &expr) -> decltype(closure(expr))
 | 
				
			||||||
    -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
 | 
					    auto arg = closure(expr);
 | 
				
			||||||
    return CovShiftForward(Link,mu,arg);
 | 
					    return CovShiftBackward(Link,mu,arg);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -142,26 +140,23 @@ namespace ConjugateBC {
 | 
				
			|||||||
    return Cshift(tmp,mu,-1);// moves towards positive mu
 | 
					    return Cshift(tmp,mu,-1);// moves towards positive mu
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<class gauge,typename Op, typename T1> auto
 | 
					  template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
 | 
				
			||||||
    CovShiftForward(const Lattice<gauge> &Link, 
 | 
					    auto  CovShiftForward(const Lattice<gauge> &Link, 
 | 
				
			||||||
			  int mu,
 | 
								  int mu,
 | 
				
			||||||
		    const LatticeUnaryExpression<Op,T1> &expr)
 | 
								  const Expr &expr) -> decltype(closure(expr))
 | 
				
			||||||
    -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
 | 
					    auto arg = closure(expr);
 | 
				
			||||||
    return CovShiftForward(Link,mu,arg);
 | 
					    return CovShiftForward(Link,mu,arg);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  template<class gauge,typename Op, typename T1> auto
 | 
					  template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
 | 
				
			||||||
    CovShiftBackward(const Lattice<gauge> &Link, 
 | 
					    auto  CovShiftBackward(const Lattice<gauge> &Link, 
 | 
				
			||||||
			   int mu,
 | 
								   int mu,
 | 
				
			||||||
		     const LatticeUnaryExpression<Op,T1> &expr)
 | 
								   const Expr &expr)  -> decltype(closure(expr))
 | 
				
			||||||
    -> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> 
 | 
					 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
 | 
					    auto arg = closure(expr);
 | 
				
			||||||
    return CovShiftForward(Link,mu,arg);
 | 
					    return CovShiftBackward(Link,mu,arg);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,14 +47,9 @@ public:
 | 
				
			|||||||
  typedef Lattice<vAMatrixF> LatticeAdjMatrixF;
 | 
					  typedef Lattice<vAMatrixF> LatticeAdjMatrixF;
 | 
				
			||||||
  typedef Lattice<vAMatrixD> LatticeAdjMatrixD;
 | 
					  typedef Lattice<vAMatrixD> LatticeAdjMatrixD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
 | 
					  typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >  LatticeAdjField;
 | 
				
			||||||
  LatticeAdjField;
 | 
					  typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
 | 
				
			||||||
  typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
 | 
					  typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
 | 
				
			||||||
  LatticeAdjFieldF;
 | 
					 | 
				
			||||||
  typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
 | 
					 | 
				
			||||||
  LatticeAdjFieldD;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template <class cplx>
 | 
					  template <class cplx>
 | 
				
			||||||
@@ -128,7 +123,9 @@ public:
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
 | 
					  // Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
 | 
				
			||||||
  static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) {
 | 
					  static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    conformable(h_out, in);
 | 
					    conformable(h_out, in);
 | 
				
			||||||
    h_out = Zero();
 | 
					    h_out = Zero();
 | 
				
			||||||
    AMatrix iTa;
 | 
					    AMatrix iTa;
 | 
				
			||||||
@@ -136,7 +133,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for (int a = 0; a < Dimension; a++) {
 | 
					    for (int a = 0; a < Dimension; a++) {
 | 
				
			||||||
      generator(a, iTa);
 | 
					      generator(a, iTa);
 | 
				
			||||||
      auto tmp = real(trace(iTa * in)) * coefficient;
 | 
					      LatticeComplex tmp = real(trace(iTa * in)) * coefficient;
 | 
				
			||||||
      pokeColour(h_out, tmp, a);
 | 
					      pokeColour(h_out, tmp, a);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -485,7 +485,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // Up staple    ___ ___
 | 
					        // Up staple    ___ ___
 | 
				
			||||||
        //             |       |
 | 
					        //             |       |
 | 
				
			||||||
        tmp = Cshift(closure(adj(U[nu])), nu, -1);
 | 
					        tmp = Cshift(adj(U[nu]), nu, -1);
 | 
				
			||||||
        tmp = adj(U2[mu]) * tmp;
 | 
					        tmp = adj(U2[mu]) * tmp;
 | 
				
			||||||
        tmp = Cshift(tmp, mu, -2);
 | 
					        tmp = Cshift(tmp, mu, -2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -519,7 +519,7 @@ public:
 | 
				
			|||||||
        //
 | 
					        //
 | 
				
			||||||
        //      |  |
 | 
					        //      |  |
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tmp = Cshift(closure(adj(U2[nu])), nu, -2);
 | 
					        tmp = Cshift(adj(U2[nu]), nu, -2);
 | 
				
			||||||
        tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
 | 
					        tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
 | 
				
			||||||
        tmp = U2[nu] * Cshift(tmp, nu, 2);
 | 
					        tmp = U2[nu] * Cshift(tmp, nu, 2);
 | 
				
			||||||
        Stap += Cshift(tmp, mu, 1);
 | 
					        Stap += Cshift(tmp, mu, 1);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -93,6 +93,11 @@ accelerator_inline ComplexF pow(const ComplexF& r,RealF y){ return(std::pow(r,y)
 | 
				
			|||||||
using std::abs;
 | 
					using std::abs;
 | 
				
			||||||
using std::pow;
 | 
					using std::pow;
 | 
				
			||||||
using std::sqrt;
 | 
					using std::sqrt;
 | 
				
			||||||
 | 
					using std::log;
 | 
				
			||||||
 | 
					using std::exp;
 | 
				
			||||||
 | 
					using std::sin;
 | 
				
			||||||
 | 
					using std::cos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
accelerator_inline RealF    conjugate(const RealF  & r){ return r; }
 | 
					accelerator_inline RealF    conjugate(const RealF  & r){ return r; }
 | 
				
			||||||
accelerator_inline RealD    conjugate(const RealD  & r){ return r; }
 | 
					accelerator_inline RealD    conjugate(const RealD  & r){ return r; }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -190,6 +190,36 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
    typedef ComplexD DoublePrecision;
 | 
					    typedef ComplexD DoublePrecision;
 | 
				
			||||||
    typedef ComplexD DoublePrecision2;
 | 
					    typedef ComplexD DoublePrecision2;
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					  template<> struct GridTypeMapper<std::complex<float> > : public GridTypeMapper_Base {
 | 
				
			||||||
 | 
					    typedef std::complex<float> scalar_type;
 | 
				
			||||||
 | 
					    typedef std::complex<double> scalar_typeD;
 | 
				
			||||||
 | 
					    typedef scalar_type vector_type;
 | 
				
			||||||
 | 
					    typedef scalar_typeD vector_typeD;
 | 
				
			||||||
 | 
					    typedef scalar_type tensor_reduced;
 | 
				
			||||||
 | 
					    typedef scalar_type scalar_object;
 | 
				
			||||||
 | 
					    typedef scalar_typeD scalar_objectD;
 | 
				
			||||||
 | 
					    typedef scalar_type Complexified;
 | 
				
			||||||
 | 
					    typedef RealF Realified;
 | 
				
			||||||
 | 
					    typedef scalar_typeD DoublePrecision;
 | 
				
			||||||
 | 
					    typedef scalar_typeD DoublePrecision2;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  template<> struct GridTypeMapper<std::complex<double> > : public GridTypeMapper_Base {
 | 
				
			||||||
 | 
					    typedef std::complex<double> scalar_type;
 | 
				
			||||||
 | 
					    typedef std::complex<double> scalar_typeD;
 | 
				
			||||||
 | 
					    typedef scalar_type vector_type;
 | 
				
			||||||
 | 
					    typedef scalar_typeD vector_typeD;
 | 
				
			||||||
 | 
					    typedef scalar_type tensor_reduced;
 | 
				
			||||||
 | 
					    typedef scalar_type scalar_object;
 | 
				
			||||||
 | 
					    typedef scalar_typeD scalar_objectD;
 | 
				
			||||||
 | 
					    typedef scalar_type Complexified;
 | 
				
			||||||
 | 
					    typedef RealD Realified;
 | 
				
			||||||
 | 
					    typedef scalar_typeD DoublePrecision;
 | 
				
			||||||
 | 
					    typedef scalar_typeD DoublePrecision2;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  template<> struct GridTypeMapper<ComplexD2> : public GridTypeMapper_Base {
 | 
					  template<> struct GridTypeMapper<ComplexD2> : public GridTypeMapper_Base {
 | 
				
			||||||
    typedef ComplexD2 scalar_type;
 | 
					    typedef ComplexD2 scalar_type;
 | 
				
			||||||
    typedef ComplexD2 scalar_typeD;
 | 
					    typedef ComplexD2 scalar_typeD;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
//
 | 
					//
 | 
				
			||||||
// Memory management:
 | 
					// Memory management:
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
 | 
					//    int   acceleratorIsCommunicable(void *pointer);
 | 
				
			||||||
//    void *acceleratorAllocShared(size_t bytes);
 | 
					//    void *acceleratorAllocShared(size_t bytes);
 | 
				
			||||||
//    void acceleratorFreeShared(void *ptr);
 | 
					//    void acceleratorFreeShared(void *ptr);
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
@@ -90,6 +91,7 @@ void     acceleratorInit(void);
 | 
				
			|||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_CUDA
 | 
					#ifdef GRID_CUDA
 | 
				
			||||||
 | 
					#include <cuda.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef __CUDA_ARCH__
 | 
					#ifdef __CUDA_ARCH__
 | 
				
			||||||
#define GRID_SIMT
 | 
					#define GRID_SIMT
 | 
				
			||||||
@@ -165,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
 | 
				
			|||||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
 | 
					inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
 | 
				
			||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
 | 
					inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
 | 
					inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
 | 
				
			||||||
 | 
					inline int  acceleratorIsCommunicable(void *ptr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int uvm;
 | 
				
			||||||
 | 
					  auto 
 | 
				
			||||||
 | 
					  cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
 | 
				
			||||||
 | 
					  assert(cuerr == cudaSuccess );
 | 
				
			||||||
 | 
					  if(uvm) return 0;
 | 
				
			||||||
 | 
					  else    return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////
 | 
					//////////////////////////////////////////////
 | 
				
			||||||
@@ -219,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
 | 
				
			|||||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
 | 
					inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
 | 
				
			||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
 | 
					inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
 | 
					inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
 | 
				
			||||||
 | 
					inline int  acceleratorIsCommunicable(void *ptr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#if 0
 | 
				
			||||||
 | 
					  auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
 | 
				
			||||||
 | 
					  if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
 | 
				
			||||||
 | 
					  else return 0;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  return 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -298,6 +319,7 @@ inline void *acceleratorAllocShared(size_t bytes)
 | 
				
			|||||||
  return malloc(bytes);
 | 
					  return malloc(bytes);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					inline int  acceleratorIsCommunicable(void *ptr){ return 1; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline void *acceleratorAllocDevice(size_t bytes)
 | 
					inline void *acceleratorAllocDevice(size_t bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -352,6 +374,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
 | 
				
			|||||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { memcpy(to,from,bytes);}
 | 
					inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes)  { memcpy(to,from,bytes);}
 | 
				
			||||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
 | 
					inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline int  acceleratorIsCommunicable(void *ptr){ return 1; }
 | 
				
			||||||
#ifdef HAVE_MM_MALLOC_H
 | 
					#ifdef HAVE_MM_MALLOC_H
 | 
				
			||||||
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
 | 
					inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
 | 
				
			||||||
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
 | 
					inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -99,10 +99,10 @@ inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_nd
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  os << "[";
 | 
					  os << "[";
 | 
				
			||||||
  for(int s=0;s<v.size();s++) {
 | 
					  for(int s=0;s<v.size();s++) {
 | 
				
			||||||
    os << v[s] << " ";
 | 
					    os << v[s];
 | 
				
			||||||
 | 
					    if( s < (v.size()-1) ){
 | 
				
			||||||
 | 
					      os << " ";
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  if (v.size() > 0) {
 | 
					 | 
				
			||||||
    os << "\b";
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  os << "]";
 | 
					  os << "]";
 | 
				
			||||||
  return os;
 | 
					  return os;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -74,90 +74,6 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  std::vector<double> t_time(Nloop);
 | 
					  std::vector<double> t_time(Nloop);
 | 
				
			||||||
  time_statistics timestat;
 | 
					  time_statistics timestat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
 | 
					 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					 | 
				
			||||||
  header();
 | 
					 | 
				
			||||||
  for(int lat=8;lat<=maxlat;lat+=4){
 | 
					 | 
				
			||||||
    for(int Ls=8;Ls<=8;Ls*=2){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      Coordinate latt_size  ({lat*mpi_layout[0],
 | 
					 | 
				
			||||||
	                      lat*mpi_layout[1],
 | 
					 | 
				
			||||||
      			      lat*mpi_layout[2],
 | 
					 | 
				
			||||||
      			      lat*mpi_layout[3]});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
					 | 
				
			||||||
      RealD Nrank = Grid._Nprocessors;
 | 
					 | 
				
			||||||
      RealD Nnode = Grid.NodeCount();
 | 
					 | 
				
			||||||
      RealD ppn = Nrank/Nnode;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);	
 | 
					 | 
				
			||||||
      std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      int ncomm;
 | 
					 | 
				
			||||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
					 | 
				
			||||||
      for(int mu=0;mu<8;mu++){
 | 
					 | 
				
			||||||
	xbuf[mu].resize(lat*lat*lat*Ls);
 | 
					 | 
				
			||||||
	rbuf[mu].resize(lat*lat*lat*Ls);
 | 
					 | 
				
			||||||
	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      for(int i=0;i<Nloop;i++){
 | 
					 | 
				
			||||||
      double start=usecond();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	std::vector<CommsRequest_t> requests;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ncomm=0;
 | 
					 | 
				
			||||||
	for(int mu=0;mu<4;mu++){
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	  if (mpi_layout[mu]>1 ) {
 | 
					 | 
				
			||||||
	  
 | 
					 | 
				
			||||||
	    ncomm++;
 | 
					 | 
				
			||||||
	    int comm_proc=1;
 | 
					 | 
				
			||||||
	    int xmit_to_rank;
 | 
					 | 
				
			||||||
	    int recv_from_rank;
 | 
					 | 
				
			||||||
	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
					 | 
				
			||||||
	    Grid.SendToRecvFromBegin(requests,
 | 
					 | 
				
			||||||
				   (void *)&xbuf[mu][0],
 | 
					 | 
				
			||||||
				   xmit_to_rank,
 | 
					 | 
				
			||||||
				   (void *)&rbuf[mu][0],
 | 
					 | 
				
			||||||
				   recv_from_rank,
 | 
					 | 
				
			||||||
				   bytes);
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	    comm_proc = mpi_layout[mu]-1;
 | 
					 | 
				
			||||||
	  
 | 
					 | 
				
			||||||
	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
					 | 
				
			||||||
	    Grid.SendToRecvFromBegin(requests,
 | 
					 | 
				
			||||||
				     (void *)&xbuf[mu+4][0],
 | 
					 | 
				
			||||||
				     xmit_to_rank,
 | 
					 | 
				
			||||||
				     (void *)&rbuf[mu+4][0],
 | 
					 | 
				
			||||||
				     recv_from_rank,
 | 
					 | 
				
			||||||
				     bytes);
 | 
					 | 
				
			||||||
	  
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	Grid.SendToRecvFromComplete(requests);
 | 
					 | 
				
			||||||
	Grid.Barrier();
 | 
					 | 
				
			||||||
	double stop=usecond();
 | 
					 | 
				
			||||||
	t_time[i] = stop-start; // microseconds
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      timestat.statistics(t_time);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      double dbytes    = bytes*ppn;
 | 
					 | 
				
			||||||
      double xbytes    = dbytes*2.0*ncomm;
 | 
					 | 
				
			||||||
      double rbytes    = xbytes;
 | 
					 | 
				
			||||||
      double bidibytes = xbytes+rbytes;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
 | 
					 | 
				
			||||||
               <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
 | 
					 | 
				
			||||||
               <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
 | 
					 | 
				
			||||||
               <<xbytes/timestat.max <<" "<< xbytes/timestat.min  
 | 
					 | 
				
			||||||
               << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
 | 
					 | 
				
			||||||
               << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }    
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
@@ -206,26 +122,22 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
	    {
 | 
						    {
 | 
				
			||||||
	      std::vector<CommsRequest_t> requests;
 | 
						      std::vector<CommsRequest_t> requests;
 | 
				
			||||||
	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
						      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
				
			||||||
	      Grid.SendToRecvFromBegin(requests,
 | 
						      Grid.SendToRecvFrom((void *)&xbuf[mu][0],
 | 
				
			||||||
				       (void *)&xbuf[mu][0],
 | 
					 | 
				
			||||||
				  xmit_to_rank,
 | 
									  xmit_to_rank,
 | 
				
			||||||
				  (void *)&rbuf[mu][0],
 | 
									  (void *)&rbuf[mu][0],
 | 
				
			||||||
				  recv_from_rank,
 | 
									  recv_from_rank,
 | 
				
			||||||
				  bytes);
 | 
									  bytes);
 | 
				
			||||||
	      Grid.SendToRecvFromComplete(requests);
 | 
					 | 
				
			||||||
	    }
 | 
						    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	    comm_proc = mpi_layout[mu]-1;
 | 
						    comm_proc = mpi_layout[mu]-1;
 | 
				
			||||||
	    {
 | 
						    {
 | 
				
			||||||
	      std::vector<CommsRequest_t> requests;
 | 
						      std::vector<CommsRequest_t> requests;
 | 
				
			||||||
	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
						      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
				
			||||||
	      Grid.SendToRecvFromBegin(requests,
 | 
						      Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
 | 
				
			||||||
				       (void *)&xbuf[mu+4][0],
 | 
					 | 
				
			||||||
				  xmit_to_rank,
 | 
									  xmit_to_rank,
 | 
				
			||||||
				  (void *)&rbuf[mu+4][0],
 | 
									  (void *)&rbuf[mu+4][0],
 | 
				
			||||||
				  recv_from_rank,
 | 
									  recv_from_rank,
 | 
				
			||||||
				  bytes);
 | 
									  bytes);
 | 
				
			||||||
	      Grid.SendToRecvFromComplete(requests);
 | 
					 | 
				
			||||||
	    }
 | 
						    }
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -89,6 +89,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	  std::cout << GridLogMessage;
 | 
						  std::cout << GridLogMessage;
 | 
				
			||||||
	  std::cout << latt_size;
 | 
						  std::cout << latt_size;
 | 
				
			||||||
 | 
						  std::cout << "\t\t";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	  GridCartesian           Grid(latt_size,simd_layout,mpi_layout);
 | 
						  GridCartesian           Grid(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
	  GridRedBlackCartesian RBGrid(&Grid);
 | 
						  GridRedBlackCartesian RBGrid(&Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
 | 
				
			|||||||
case ${ac_ACCELERATOR} in
 | 
					case ${ac_ACCELERATOR} in
 | 
				
			||||||
    cuda)
 | 
					    cuda)
 | 
				
			||||||
      echo CUDA acceleration
 | 
					      echo CUDA acceleration
 | 
				
			||||||
 | 
					      LIBS="${LIBS} -lcuda"
 | 
				
			||||||
      AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
 | 
					      AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
 | 
				
			||||||
    sycl)
 | 
					    sycl)
 | 
				
			||||||
      echo SYCL acceleration
 | 
					      echo SYCL acceleration
 | 
				
			||||||
@@ -323,7 +324,6 @@ case ${CXXTEST} in
 | 
				
			|||||||
#    CXXLD="nvcc -v -link"
 | 
					#    CXXLD="nvcc -v -link"
 | 
				
			||||||
    CXX="${CXXBASE} -x cu "
 | 
					    CXX="${CXXBASE} -x cu "
 | 
				
			||||||
    CXXLD="${CXXBASE} -link"
 | 
					    CXXLD="${CXXBASE} -link"
 | 
				
			||||||
#    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
 | 
					 | 
				
			||||||
    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
 | 
					    CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
 | 
				
			||||||
    if test $ac_openmp = yes; then
 | 
					    if test $ac_openmp = yes; then
 | 
				
			||||||
       CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
 | 
					       CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
 | 
				
			||||||
@@ -483,8 +483,7 @@ case ${ac_SHM} in
 | 
				
			|||||||
     LDFLAGS_CPY=$LDFLAGS
 | 
					     LDFLAGS_CPY=$LDFLAGS
 | 
				
			||||||
     CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
 | 
					     CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
 | 
				
			||||||
     LDFLAGS="$AM_LDFLAGS $LDFLAGS"
 | 
					     LDFLAGS="$AM_LDFLAGS $LDFLAGS"
 | 
				
			||||||
     AC_SEARCH_LIBS([shm_unlink], [rt], [],
 | 
					     AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
 | 
				
			||||||
                    [AC_MSG_ERROR("no library found for shm_unlink")])
 | 
					 | 
				
			||||||
     CXXFLAGS=$CXXFLAGS_CPY
 | 
					     CXXFLAGS=$CXXFLAGS_CPY
 | 
				
			||||||
     LDFLAGS=$LDFLAGS_CPY
 | 
					     LDFLAGS=$LDFLAGS_CPY
 | 
				
			||||||
     ;;
 | 
					     ;;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,14 +47,14 @@ public:
 | 
				
			|||||||
                          bool , b,
 | 
					                          bool , b,
 | 
				
			||||||
                          std::vector<double>, array,
 | 
					                          std::vector<double>, array,
 | 
				
			||||||
                          std::vector<std::vector<double> >, twodimarray,
 | 
					                          std::vector<std::vector<double> >, twodimarray,
 | 
				
			||||||
			  std::vector<std::vector<std::vector<Complex> > >, cmplx3darray,
 | 
								  std::vector<std::vector<std::vector<std::complex<double>> > >, cmplx3darray,
 | 
				
			||||||
			  SpinColourMatrix, scm
 | 
								  SpinColourMatrix, scm
 | 
				
			||||||
                          );
 | 
					                          );
 | 
				
			||||||
  myclass() {}
 | 
					  myclass() {}
 | 
				
			||||||
  myclass(int i)
 | 
					  myclass(int i)
 | 
				
			||||||
  : array(4,5.1)
 | 
					  : array(4,5.1)
 | 
				
			||||||
  , twodimarray(3,std::vector<double>(5, 1.23456))
 | 
					  , twodimarray(3,std::vector<double>(5, 1.23456))
 | 
				
			||||||
  , cmplx3darray(3,std::vector<std::vector<Complex>>(5, std::vector<Complex>(7, Complex(1.2, 3.4))))
 | 
					  , cmplx3darray(3,std::vector<std::vector<std::complex<double>>>(5, std::vector<std::complex<double>>(7, std::complex<double>(1.2, 3.4))))
 | 
				
			||||||
  , ve(2, myenum::blue)
 | 
					  , ve(2, myenum::blue)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    e=myenum::red;
 | 
					    e=myenum::red;
 | 
				
			||||||
@@ -121,11 +121,11 @@ namespace Eigen {
 | 
				
			|||||||
// Perform I/O tests on a range of tensor types
 | 
					// Perform I/O tests on a range of tensor types
 | 
				
			||||||
// Test coverage: scalars, complex and GridVectors in single, double and default precision
 | 
					// Test coverage: scalars, complex and GridVectors in single, double and default precision
 | 
				
			||||||
class TensorIO : public Serializable {
 | 
					class TensorIO : public Serializable {
 | 
				
			||||||
  using TestScalar = ComplexD;
 | 
					  using TestScalar = std::complex<double>;
 | 
				
			||||||
  using SR3 = Eigen::Sizes<9,4,2>;
 | 
					  using SR3 = Eigen::Sizes<9,4,2>;
 | 
				
			||||||
  using SR5 = Eigen::Sizes<5,4,3,2,1>;
 | 
					  using SR5 = Eigen::Sizes<5,4,3,2,1>;
 | 
				
			||||||
  using ESO = Eigen::StorageOptions;
 | 
					  using ESO = Eigen::StorageOptions;
 | 
				
			||||||
  using TensorRank3  = Eigen::Tensor<ComplexF, 3, ESO::RowMajor>;
 | 
					  using TensorRank3  = Eigen::Tensor<std::complex<float>, 3, ESO::RowMajor>;
 | 
				
			||||||
  using TensorR5     = Eigen::TensorFixedSize<Real, SR5>;
 | 
					  using TensorR5     = Eigen::TensorFixedSize<Real, SR5>;
 | 
				
			||||||
  using TensorR5Alt  = Eigen::TensorFixedSize<Real, SR5, ESO::RowMajor>;
 | 
					  using TensorR5Alt  = Eigen::TensorFixedSize<Real, SR5, ESO::RowMajor>;
 | 
				
			||||||
  using Tensor942    = Eigen::TensorFixedSize<TestScalar, SR3, ESO::RowMajor>;
 | 
					  using Tensor942    = Eigen::TensorFixedSize<TestScalar, SR3, ESO::RowMajor>;
 | 
				
			||||||
@@ -134,8 +134,8 @@ class TensorIO : public Serializable {
 | 
				
			|||||||
  using LSCTensor    = Eigen::TensorFixedSize<SpinColourMatrix, Eigen::Sizes<6,5>>;
 | 
					  using LSCTensor    = Eigen::TensorFixedSize<SpinColourMatrix, Eigen::Sizes<6,5>>;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  static const Real       FlagR;
 | 
					  static const Real       FlagR;
 | 
				
			||||||
  static const Complex    Flag;
 | 
					  static const std::complex<double>   Flag;
 | 
				
			||||||
  static const ComplexF   FlagF;
 | 
					  static const std::complex<float>    FlagF;
 | 
				
			||||||
  static const TestScalar FlagTS;
 | 
					  static const TestScalar FlagTS;
 | 
				
			||||||
  static const char * const pszFilePrefix;
 | 
					  static const char * const pszFilePrefix;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -230,8 +230,8 @@ public:
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const Real                 TensorIO::FlagR {1};
 | 
					const Real                 TensorIO::FlagR {1};
 | 
				
			||||||
const Complex              TensorIO::Flag  {1,-1};
 | 
					const std::complex<double> TensorIO::Flag  {1,-1};
 | 
				
			||||||
const ComplexF             TensorIO::FlagF {1,-1};
 | 
					const std::complex<float>  TensorIO::FlagF {1,-1};
 | 
				
			||||||
const TensorIO::TestScalar TensorIO::FlagTS{1,-1};
 | 
					const TensorIO::TestScalar TensorIO::FlagTS{1,-1};
 | 
				
			||||||
const char * const         TensorIO::pszFilePrefix = "tensor_";
 | 
					const char * const         TensorIO::pszFilePrefix = "tensor_";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -137,7 +137,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
      LatticeReal iscalar(&Fine);
 | 
					      LatticeReal iscalar(&Fine);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      SpinMatrix GammaFive;
 | 
					      SpinMatrix GammaFive;
 | 
				
			||||||
      iSpinMatrix<vComplex> iGammaFive;
 | 
					 | 
				
			||||||
      ColourMatrix cmat;
 | 
					      ColourMatrix cmat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      random(FineRNG, Foo);
 | 
					      random(FineRNG, Foo);
 | 
				
			||||||
@@ -283,7 +282,6 @@ int main(int argc, char **argv) {
 | 
				
			|||||||
      cMat = mydouble * cMat;
 | 
					      cMat = mydouble * cMat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      sMat = adj(sMat);          // LatticeSpinMatrix adjoint
 | 
					      sMat = adj(sMat);          // LatticeSpinMatrix adjoint
 | 
				
			||||||
      sMat = iGammaFive * sMat;  // SpinMatrix * LatticeSpinMatrix
 | 
					 | 
				
			||||||
      sMat = GammaFive * sMat;   // SpinMatrix * LatticeSpinMatrix
 | 
					      sMat = GammaFive * sMat;   // SpinMatrix * LatticeSpinMatrix
 | 
				
			||||||
      scMat = adj(scMat);
 | 
					      scMat = adj(scMat);
 | 
				
			||||||
      cMat = adj(cMat);
 | 
					      cMat = adj(cMat);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -261,11 +261,11 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  pickCheckerboard(Odd ,phi_o,phi);
 | 
					  pickCheckerboard(Odd ,phi_o,phi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
 | 
					  SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
 | 
				
			||||||
  HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
 | 
					  HermOpEO.MpcDagMpc(chi_e,dchi_e);
 | 
				
			||||||
  HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
 | 
					  HermOpEO.MpcDagMpc(chi_o,dchi_o);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
 | 
					  HermOpEO.MpcDagMpc(phi_e,dphi_e);
 | 
				
			||||||
  HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
 | 
					  HermOpEO.MpcDagMpc(phi_o,dphi_o);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pDce = innerProduct(phi_e,dchi_e);
 | 
					  pDce = innerProduct(phi_e,dchi_e);
 | 
				
			||||||
  pDco = innerProduct(phi_o,dchi_o);
 | 
					  pDco = innerProduct(phi_o,dchi_o);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										80
									
								
								tests/core/Test_where.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								tests/core/Test_where.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_poisson_fft.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					 ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int N=16;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<int> latt_size  ({N,4,4});
 | 
				
			||||||
 | 
					  std::vector<int> simd_layout({vComplexD::Nsimd(),1,1});
 | 
				
			||||||
 | 
					  std::vector<int> mpi_layout ({1,1,1});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int vol = 1;
 | 
				
			||||||
 | 
					  int nd  = latt_size.size();
 | 
				
			||||||
 | 
					  for(int d=0;d<nd;d++){
 | 
				
			||||||
 | 
					    vol = vol * latt_size[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeComplexD      zz(&GRID);
 | 
				
			||||||
 | 
					  LatticeInteger     coor(&GRID);
 | 
				
			||||||
 | 
					  LatticeComplexD  rn(&GRID);
 | 
				
			||||||
 | 
					  LatticeComplexD  sl(&GRID);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  zz  = ComplexD(0.0,0.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridParallelRNG RNG(&GRID);
 | 
				
			||||||
 | 
					  RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));  
 | 
				
			||||||
 | 
					  gaussian(RNG,rn);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RealD nn=norm2(rn);
 | 
				
			||||||
 | 
					  for(int mu=0;mu<nd;mu++){
 | 
				
			||||||
 | 
					    RealD ns=0.0;
 | 
				
			||||||
 | 
					    for(int t=0;t<latt_size[mu];t++){
 | 
				
			||||||
 | 
					      LatticeCoordinate(coor,mu);
 | 
				
			||||||
 | 
					      sl=where(coor==Integer(t),rn,zz);
 | 
				
			||||||
 | 
					      std::cout <<GridLogMessage<< " sl " << sl<<std::endl;
 | 
				
			||||||
 | 
					      std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
 | 
				
			||||||
 | 
					      ns=ns+norm2(sl);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" " << nn-ns<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -122,7 +122,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
 | 
					  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Subspace Aggregates(Coarse5d,FGrid,cb);
 | 
					  Subspace Aggregates(Coarse5d,FGrid,cb);
 | 
				
			||||||
  Aggregates.CreateSubspaceRandom(RNG5);
 | 
					  //  Aggregates.CreateSubspaceRandom(RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  subspace=Aggregates.subspace;
 | 
					  subspace=Aggregates.subspace;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -163,7 +163,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  LittleDiracOp.M(c_src,c_res);
 | 
					  LittleDiracOp.M(c_src,c_res);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout<<GridLogMessage<<"Testing hermiticity explicitly by inspecting matrix elements"<<std::endl;
 | 
					  std::cout<<GridLogMessage<<"Testing hermiticity explicitly by inspecting matrix elements"<<std::endl;
 | 
				
			||||||
  LittleDiracOp.AssertHermitian();
 | 
					  //  LittleDiracOp.AssertHermitian();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl;
 | 
					  std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl;
 | 
				
			||||||
  CoarseVector phi(Coarse5d);
 | 
					  CoarseVector phi(Coarse5d);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -311,7 +311,7 @@ int main (int argc, char ** argv) {
 | 
				
			|||||||
  std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl;
 | 
					  std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl;
 | 
				
			||||||
  assert(Nm2 >= Nm1);
 | 
					  assert(Nm2 >= Nm1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const int nbasis= 70;
 | 
					  const int nbasis= 32;
 | 
				
			||||||
  CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd);
 | 
					  CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl;
 | 
					  std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl;
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										420
									
								
								tests/solver/Test_dwf_hdcr_2level.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										420
									
								
								tests/solver/Test_dwf_hdcr_2level.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,420 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_dwf_hdcr.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Antonin Portelli <antonin.portelli@me.com>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
 | 
				
			||||||
 | 
					#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace std;
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					/* Params
 | 
				
			||||||
 | 
					 * Grid: 
 | 
				
			||||||
 | 
					 * block1(4)
 | 
				
			||||||
 | 
					 * block2(4)
 | 
				
			||||||
 | 
					 * 
 | 
				
			||||||
 | 
					 * Subspace
 | 
				
			||||||
 | 
					 * * Fine  : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
 | 
				
			||||||
 | 
					 * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 * Smoother:
 | 
				
			||||||
 | 
					 * * Fine: Cheby(hi, lo, order)            --  60,0.5,10
 | 
				
			||||||
 | 
					 * * Coarse: Cheby(hi, lo, order)          --  12,0.1,4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 * Lanczos:
 | 
				
			||||||
 | 
					 * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order))   24,36,24,0.002,4.0,61 
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					RealD InverseApproximation(RealD x){
 | 
				
			||||||
 | 
					  return 1.0/x;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  typedef LinearOperatorBase<Field>                            FineOperator;
 | 
				
			||||||
 | 
					  Matrix         & _SmootherMatrix;
 | 
				
			||||||
 | 
					  FineOperator   & _SmootherOperator;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Chebyshev<Field> Cheby;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
 | 
				
			||||||
 | 
					    _SmootherOperator(SmootherOperator),
 | 
				
			||||||
 | 
					    _SmootherMatrix(SmootherMatrix),
 | 
				
			||||||
 | 
					    Cheby(_lo,_hi,_ord,InverseApproximation)
 | 
				
			||||||
 | 
					  {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void operator() (const Field &in, Field &out) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    Field tmp(in.Grid());
 | 
				
			||||||
 | 
					    MdagMLinearOperator<Matrix,Field>   MdagMOp(_SmootherMatrix); 
 | 
				
			||||||
 | 
					    _SmootherOperator.AdjOp(in,tmp);
 | 
				
			||||||
 | 
					    Cheby(MdagMOp,tmp,out);         
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  typedef LinearOperatorBase<Field>                            FineOperator;
 | 
				
			||||||
 | 
					  Matrix         & SmootherMatrix;
 | 
				
			||||||
 | 
					  FineOperator   & SmootherOperator;
 | 
				
			||||||
 | 
					  RealD tol;
 | 
				
			||||||
 | 
					  RealD shift;
 | 
				
			||||||
 | 
					  int   maxit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
 | 
				
			||||||
 | 
					    shift(_shift),tol(_tol),maxit(_maxit),
 | 
				
			||||||
 | 
					    SmootherOperator(_SmootherOperator),
 | 
				
			||||||
 | 
					    SmootherMatrix(_SmootherMatrix)
 | 
				
			||||||
 | 
					  {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void operator() (const Field &in, Field &out) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    ZeroGuesser<Field> Guess;
 | 
				
			||||||
 | 
					    ConjugateGradient<Field>  CG(tol,maxit,false);
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					    Field src(in.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
 | 
				
			||||||
 | 
					    SmootherOperator.AdjOp(in,src);
 | 
				
			||||||
 | 
					    Guess(src,out);
 | 
				
			||||||
 | 
					    CG(MdagMOp,src,out); 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field>
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  typedef LinearOperatorBase<Field>                            FineOperator;
 | 
				
			||||||
 | 
					  Matrix         & SmootherMatrix;
 | 
				
			||||||
 | 
					  RealD tol;
 | 
				
			||||||
 | 
					  RealD shift;
 | 
				
			||||||
 | 
					  int   maxit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RedBlackSmoother(RealD _shift,RealD _tol,int _maxit,Matrix &_SmootherMatrix) :
 | 
				
			||||||
 | 
					    shift(_shift),tol(_tol),maxit(_maxit),
 | 
				
			||||||
 | 
					    SmootherMatrix(_SmootherMatrix)
 | 
				
			||||||
 | 
					  {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void operator() (const Field &in, Field &out) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
 | 
				
			||||||
 | 
					    ConjugateGradient<Field>  CG(tol,maxit,false);
 | 
				
			||||||
 | 
					    out =Zero();
 | 
				
			||||||
 | 
					    SchurRedBlackDiagMooeeSolve<Field> RBSolver(CG);
 | 
				
			||||||
 | 
					    RBSolver(SmootherMatrix,in,out); 
 | 
				
			||||||
 | 
					    std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
 | 
				
			||||||
 | 
					class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
 | 
				
			||||||
 | 
					  typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
 | 
				
			||||||
 | 
					  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
 | 
				
			||||||
 | 
					  typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
 | 
				
			||||||
 | 
					  typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField    FineField;
 | 
				
			||||||
 | 
					  typedef LinearOperatorBase<FineField>                            FineOperator;
 | 
				
			||||||
 | 
					  typedef LinearFunction    <FineField>                            FineSmoother;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Aggregates     & _Aggregates;
 | 
				
			||||||
 | 
					  CoarseOperator & _CoarseOperator;
 | 
				
			||||||
 | 
					  Matrix         & _FineMatrix;
 | 
				
			||||||
 | 
					  FineOperator   & _FineOperator;
 | 
				
			||||||
 | 
					  Guesser        & _Guess;
 | 
				
			||||||
 | 
					  FineSmoother   & _Smoother1;
 | 
				
			||||||
 | 
					  FineSmoother   & _Smoother2;
 | 
				
			||||||
 | 
					  CoarseSolver   & _CoarseSolve;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int    level;  void Level(int lv) {level = lv; };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, 
 | 
				
			||||||
 | 
								  FineOperator &Fine,Matrix &FineMatrix,
 | 
				
			||||||
 | 
								  FineSmoother &Smoother1,
 | 
				
			||||||
 | 
								  FineSmoother &Smoother2,
 | 
				
			||||||
 | 
								  Guesser &Guess_,
 | 
				
			||||||
 | 
								  CoarseSolver &CoarseSolve_)
 | 
				
			||||||
 | 
					    : _Aggregates(Agg),
 | 
				
			||||||
 | 
					      _CoarseOperator(Coarse),
 | 
				
			||||||
 | 
					      _FineOperator(Fine),
 | 
				
			||||||
 | 
					      _FineMatrix(FineMatrix),
 | 
				
			||||||
 | 
					      _Smoother1(Smoother1),
 | 
				
			||||||
 | 
					      _Smoother2(Smoother2),
 | 
				
			||||||
 | 
					      _Guess(Guess_),
 | 
				
			||||||
 | 
					      _CoarseSolve(CoarseSolve_),
 | 
				
			||||||
 | 
					      level(1)  {  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, 
 | 
				
			||||||
 | 
								  FineOperator &Fine,Matrix &FineMatrix,
 | 
				
			||||||
 | 
								  FineSmoother &Smoother,
 | 
				
			||||||
 | 
								  Guesser &Guess_,
 | 
				
			||||||
 | 
								  CoarseSolver &CoarseSolve_)
 | 
				
			||||||
 | 
					    : _Aggregates(Agg),
 | 
				
			||||||
 | 
					      _CoarseOperator(Coarse),
 | 
				
			||||||
 | 
					      _FineOperator(Fine),
 | 
				
			||||||
 | 
					      _FineMatrix(FineMatrix),
 | 
				
			||||||
 | 
					      _Smoother1(Smoother),
 | 
				
			||||||
 | 
					      _Smoother2(Smoother),
 | 
				
			||||||
 | 
					      _Guess(Guess_),
 | 
				
			||||||
 | 
					      _CoarseSolve(CoarseSolve_),
 | 
				
			||||||
 | 
					      level(1)  {  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void operator()(const FineField &in, FineField & out) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    CoarseVector Csrc(_CoarseOperator.Grid());
 | 
				
			||||||
 | 
					    CoarseVector Csol(_CoarseOperator.Grid()); 
 | 
				
			||||||
 | 
					    FineField vec1(in.Grid());
 | 
				
			||||||
 | 
					    FineField vec2(in.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    double t;
 | 
				
			||||||
 | 
					    // Fine Smoother
 | 
				
			||||||
 | 
					    t=-usecond();
 | 
				
			||||||
 | 
					    _Smoother1(in,out);
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Update the residual
 | 
				
			||||||
 | 
					    _FineOperator.Op(out,vec1);  sub(vec1, in ,vec1);   
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Fine to Coarse 
 | 
				
			||||||
 | 
					    t=-usecond();
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace  (Csrc,vec1);
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Coarse correction
 | 
				
			||||||
 | 
					    t=-usecond();
 | 
				
			||||||
 | 
					    _CoarseSolve(Csrc,Csol);
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Coarse to Fine
 | 
				
			||||||
 | 
					    t=-usecond();
 | 
				
			||||||
 | 
					    _Aggregates.PromoteFromSubspace(Csol,vec1); 
 | 
				
			||||||
 | 
					    add(out,out,vec1);
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Residual
 | 
				
			||||||
 | 
					    _FineOperator.Op(out,vec1);  sub(vec1 ,in , vec1);  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Fine Smoother
 | 
				
			||||||
 | 
					    t=-usecond();
 | 
				
			||||||
 | 
					    _Smoother2(vec1,vec2);
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    add( out,out,vec2);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Ls=16;
 | 
				
			||||||
 | 
					  const int rLs=8;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Construct a coarsened grid; utility for this?
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  std::vector<int> block ({2,2,2,2});
 | 
				
			||||||
 | 
					  const int nbasis= 32;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto clatt = GridDefaultLatt();
 | 
				
			||||||
 | 
					  for(int d=0;d<clatt.size();d++){
 | 
				
			||||||
 | 
					    clatt[d] = clatt[d]/block[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
 | 
				
			||||||
 | 
					  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 | 
					  std::vector<int> seeds5({5,6,7,8});
 | 
				
			||||||
 | 
					  std::vector<int> cseeds({5,6,7,8});
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG5(FGrid);   RNG5.SeedFixedIntegers(seeds5);
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG4(UGrid);   RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					  GridParallelRNG          CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
 | 
				
			||||||
 | 
					  LatticeFermion    src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
 | 
				
			||||||
 | 
					  LatticeFermion result(FGrid); 
 | 
				
			||||||
 | 
					  LatticeGaugeField Umu(UGrid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FieldMetaData header;
 | 
				
			||||||
 | 
					  std::string file("./ckpoint_lat");
 | 
				
			||||||
 | 
					  NerscIO::readConfiguration(Umu,header,file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  RealD mass=0.001;
 | 
				
			||||||
 | 
					  RealD M5=1.8;
 | 
				
			||||||
 | 
					  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef Aggregation<vSpinColourVector,vTComplex,nbasis>              Subspace;
 | 
				
			||||||
 | 
					  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>          CoarseOperator;
 | 
				
			||||||
 | 
					  typedef CoarseOperator::CoarseVector                                 CoarseVector;
 | 
				
			||||||
 | 
					  typedef CoarseOperator::siteVector siteVector;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Subspace Aggregates(Coarse5d,FGrid,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert ( (nbasis & 0x1)==0);
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int nb=nbasis/2;
 | 
				
			||||||
 | 
					    //    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,100,0.0);// 18s
 | 
				
			||||||
 | 
					    //   rAggregates.CreateSubspaceChebyshev(RNG5,rHermDefOp,nb,60.0,0.05,500,200,150,0.0);// 15.7 23iter
 | 
				
			||||||
 | 
					    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,150,0.0);//
 | 
				
			||||||
 | 
					    // pad out the rAggregates.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,150,0.0);// 19s 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,200,0.0); 15.2s
 | 
				
			||||||
 | 
					    //    Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,200,0.0); 16.3s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for(int n=0;n<nb;n++){
 | 
				
			||||||
 | 
					      G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    LatticeFermion A(FGrid);
 | 
				
			||||||
 | 
					    LatticeFermion B(FGrid);
 | 
				
			||||||
 | 
					    for(int n=0;n<nb;n++){
 | 
				
			||||||
 | 
					      A = Aggregates.subspace[n];
 | 
				
			||||||
 | 
					      B = Aggregates.subspace[n+nb];
 | 
				
			||||||
 | 
					      Aggregates.subspace[n]   = A+B; // 1+G5 // eigen value of G5R5 is +1
 | 
				
			||||||
 | 
					      Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis>    Level1Op;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MdagMLinearOperator<Level1Op,CoarseVector> IRLHermOp(LDOp);
 | 
				
			||||||
 | 
					  Chebyshev<CoarseVector> IRLCheby(0.002,12.,151);
 | 
				
			||||||
 | 
					  FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,IRLHermOp);
 | 
				
			||||||
 | 
					  PlainHermOp<CoarseVector> IRLOp    (IRLHermOp);
 | 
				
			||||||
 | 
					  int Nk=48;
 | 
				
			||||||
 | 
					  int Nm=64;
 | 
				
			||||||
 | 
					  int Nstop=48;
 | 
				
			||||||
 | 
					  int Nconv;
 | 
				
			||||||
 | 
					  ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<RealD>          eval(Nm);
 | 
				
			||||||
 | 
					  std::vector<CoarseVector>   evec(Nm,Coarse5d);
 | 
				
			||||||
 | 
					  CoarseVector c_src(Coarse5d);
 | 
				
			||||||
 | 
					  gaussian(CRNG,c_src);
 | 
				
			||||||
 | 
					  IRL.calc(eval,evec,c_src,Nconv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  ConjugateGradient<CoarseVector>  CoarseCG(0.01,1000);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ConjugateGradient<CoarseVector>  CoarseCG(0.02,1000);// 14.7s
 | 
				
			||||||
 | 
					  DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
 | 
				
			||||||
 | 
					  NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  c_src=1.0;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Fine        PowerMethod           "<< std::endl;
 | 
				
			||||||
 | 
					  PowerMethod<LatticeFermion>       PM;   PM(HermDefOp,src);
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << " Coarse       PowerMethod           "<< std::endl;
 | 
				
			||||||
 | 
					  MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
 | 
				
			||||||
 | 
					  PowerMethod<CoarseVector>        cPM;  cPM(PosdefLdop,c_src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Building 2 level Multigrid            "<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  typedef MultiGridPreconditioner<vSpinColourVector,  vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> >   TwoLevelMG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 72 iter 63s
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.1,60.0,20,HermIndefOp,Ddwf); // 66 iter 69s
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,20,HermIndefOp,Ddwf); // 63 iter 65  s
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,20,HermIndefOp,Ddwf); // 69, 70
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,14,HermIndefOp,Ddwf); // 77
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // 23 iter 15.9s
 | 
				
			||||||
 | 
					  //  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 20, 16.9s
 | 
				
			||||||
 | 
					  ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); // 21, 15.6s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.05,0.01,20,HermIndefOp,Ddwf);
 | 
				
			||||||
 | 
					  //  RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
 | 
				
			||||||
 | 
					  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
 | 
				
			||||||
 | 
					  TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
 | 
				
			||||||
 | 
								    HermIndefOp,Ddwf,
 | 
				
			||||||
 | 
								    FineSmoother,
 | 
				
			||||||
 | 
								    DeflCoarseGuesser,
 | 
				
			||||||
 | 
								    DeflCoarseCGNE);
 | 
				
			||||||
 | 
					  TwoLevelPrecon.Level(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
 | 
				
			||||||
 | 
					  PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,TwoLevelPrecon,16,16);
 | 
				
			||||||
 | 
					  l1PGCR.Level(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Calling 2 level Multigrid            "<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  result=Zero();
 | 
				
			||||||
 | 
					  l1PGCR(src,result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Fine CG prec DiagMooee "<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ConjugateGradient<LatticeFermion>          FineCG(1.0e-8,10000);
 | 
				
			||||||
 | 
					  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> FineDiagMooee(Ddwf); //  M_ee - Meo Moo^-1 Moe 
 | 
				
			||||||
 | 
					  LatticeFermion f_src_e(FrbGrid); f_src_e=1.0;
 | 
				
			||||||
 | 
					  LatticeFermion f_res_e(FrbGrid); f_res_e=Zero();
 | 
				
			||||||
 | 
					  FineCG(FineDiagMooee,f_src_e,f_res_e);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Done "<< std::endl;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user