mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 11:15:55 +01:00
Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
commit
ecd3f890f5
@ -48,7 +48,7 @@
|
||||
#ifdef __NVCC__REDEFINE__
|
||||
#pragma pop_macro("__CUDACC__")
|
||||
#pragma pop_macro("__NVCC__")
|
||||
#pragma pop_macro("GRID_SIMT")
|
||||
#pragma pop_macro("__CUDA_ARCH__")
|
||||
#pragma pop
|
||||
#endif
|
||||
|
||||
|
@ -65,8 +65,7 @@ public:
|
||||
MemoryManager::CpuFree((void *)__p,bytes);
|
||||
}
|
||||
|
||||
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||
//void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
||||
void construct(pointer __p, const _Tp& __val) { assert(0);};
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
@ -74,6 +73,9 @@ public:
|
||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Unified virtual memory
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename _Tp>
|
||||
class uvmAllocator {
|
||||
public:
|
||||
@ -109,22 +111,63 @@ public:
|
||||
MemoryManager::SharedFree((void *)__p,bytes);
|
||||
}
|
||||
|
||||
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||
//void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Device memory
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename _Tp>
|
||||
class devAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef devAllocator<_Tp1> other; };
|
||||
devAllocator() throw() { }
|
||||
devAllocator(const devAllocator&) throw() { }
|
||||
template<typename _Tp1> devAllocator(const devAllocator<_Tp1>&) throw() { }
|
||||
~devAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
_Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
|
||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n)
|
||||
{
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
profilerFree(bytes);
|
||||
MemoryManager::AcceleratorFree((void *)__p,bytes);
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> using commAllocator = uvmAllocator<T>;
|
||||
//template<class T> using commAllocator = devAllocator<T>;
|
||||
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
||||
template<class T> using commVector = std::vector<T,uvmAllocator<T> >;
|
||||
//template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||
template<class T> using commVector = std::vector<T,devAllocator<T> >;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -93,12 +93,12 @@ private:
|
||||
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
|
||||
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
|
||||
|
||||
static void *AcceleratorAllocate(size_t bytes);
|
||||
static void AcceleratorFree (void *ptr,size_t bytes);
|
||||
static void PrintBytes(void);
|
||||
public:
|
||||
static void Init(void);
|
||||
static void InitMessage(void);
|
||||
static void *AcceleratorAllocate(size_t bytes);
|
||||
static void AcceleratorFree (void *ptr,size_t bytes);
|
||||
static void *SharedAllocate(size_t bytes);
|
||||
static void SharedFree (void *ptr,size_t bytes);
|
||||
static void *CpuAllocate(size_t bytes);
|
||||
|
@ -302,60 +302,28 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int bytes)
|
||||
{
|
||||
std::vector<CommsRequest_t> reqs(0);
|
||||
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
|
||||
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
|
||||
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
|
||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||
SendToRecvFromComplete(reqs);
|
||||
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
|
||||
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
|
||||
}
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int sender,
|
||||
int receiver,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Status stat;
|
||||
assert(sender != receiver);
|
||||
int tag = sender;
|
||||
if ( _processor == sender ) {
|
||||
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||
}
|
||||
if ( _processor == receiver ) {
|
||||
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||
}
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
unsigned long xcrc = crc32(0L, Z_NULL, 0);
|
||||
unsigned long rcrc = crc32(0L, Z_NULL, 0);
|
||||
|
||||
int myrank = _processor;
|
||||
int ierr;
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
// Enforce no UVM in comms, device or host OK
|
||||
assert(acceleratorIsCommunicable(xmit));
|
||||
assert(acceleratorIsCommunicable(recv));
|
||||
|
||||
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
|
||||
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
||||
recv,bytes,MPI_CHAR,from, from,
|
||||
communicator,MPI_STATUS_IGNORE);
|
||||
assert(ierr==0);
|
||||
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
} else {
|
||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
||||
recv,bytes,MPI_CHAR,from, from,
|
||||
communicator,MPI_STATUS_IGNORE);
|
||||
assert(ierr==0);
|
||||
}
|
||||
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
|
||||
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
|
||||
// printf("proc %d SendToRecvFrom %d bytes xcrc %lx rcrc %lx\n",_processor,bytes,xcrc,rcrc); fflush
|
||||
}
|
||||
|
||||
// Basic Halo comms primitive
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
@ -411,15 +379,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
{
|
||||
int nreq=list.size();
|
||||
|
||||
@ -430,6 +390,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
||||
assert(ierr==0);
|
||||
list.resize(0);
|
||||
}
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
//{
|
||||
//}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
|
@ -52,23 +52,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<typename Op, typename T1>
|
||||
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))>
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto Cshift(const Expression &expr,int dim,int shift) -> decltype(closure(expr))
|
||||
{
|
||||
return Cshift(closure(expr),dim,shift);
|
||||
}
|
||||
|
@ -76,8 +76,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
auto buffer_p = & buffer[0];
|
||||
auto table = &Cshift_table[0];
|
||||
accelerator_for(i,ent,1,{
|
||||
buffer_p[table[i].first]=rhs_v[table[i].second];
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -185,8 +185,8 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
||||
autoView( rhs_v, rhs, AcceleratorWrite);
|
||||
auto buffer_p = & buffer[0];
|
||||
auto table = &Cshift_table[0];
|
||||
accelerator_for(i,ent,1,{
|
||||
rhs_v[table[i].first]=buffer_p[table[i].second];
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -222,6 +222,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
||||
// Test_cshift_red_black code.
|
||||
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
|
||||
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
||||
assert(0); // This will fail if hit on GPU
|
||||
autoView( rhs_v, rhs, CpuWrite);
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
@ -282,8 +283,8 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
||||
autoView(rhs_v , rhs, AcceleratorRead);
|
||||
autoView(lhs_v , lhs, AcceleratorWrite);
|
||||
auto table = &Cshift_table[0];
|
||||
accelerator_for(i,ent,1,{
|
||||
lhs_v[table[i].first]=rhs_v[table[i].second];
|
||||
accelerator_for(i,ent,vobj::Nsimd(),{
|
||||
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/lattice/Lattice_reduction.h>
|
||||
#include <Grid/lattice/Lattice_peekpoke.h>
|
||||
//#include <Grid/lattice/Lattice_reality.h>
|
||||
#include <Grid/lattice/Lattice_real_imag.h>
|
||||
#include <Grid/lattice/Lattice_comparison_utils.h>
|
||||
#include <Grid/lattice/Lattice_comparison.h>
|
||||
#include <Grid/lattice/Lattice_coordinate.h>
|
||||
|
@ -42,9 +42,24 @@ NAMESPACE_BEGIN(Grid);
|
||||
////////////////////////////////////////////////////
|
||||
// Predicated where support
|
||||
////////////////////////////////////////////////////
|
||||
#ifdef GRID_SIMT
|
||||
// drop to scalar in SIMT; cleaner in fact
|
||||
template <class iobj, class vobj, class robj>
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||
const robj &iffalse) {
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate,
|
||||
const vobj &iftrue,
|
||||
const robj &iffalse)
|
||||
{
|
||||
Integer mask = TensorRemove(predicate);
|
||||
typename std::remove_const<vobj>::type ret= iffalse;
|
||||
if (mask) ret=iftrue;
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
template <class iobj, class vobj, class robj>
|
||||
accelerator_inline vobj predicatedWhere(const iobj &predicate,
|
||||
const vobj &iftrue,
|
||||
const robj &iffalse)
|
||||
{
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
@ -68,6 +83,7 @@ accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftru
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
//Specialization of getVectorType for lattices
|
||||
@ -81,33 +97,62 @@ struct getVectorType<Lattice<T> >{
|
||||
//-- recursive evaluation of expressions; --
|
||||
// handle leaves of syntax tree
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj> accelerator_inline
|
||||
template<class sobj,
|
||||
typename std::enable_if<!is_lattice<sobj>::value&&!is_lattice_expr<sobj>::value,sobj>::type * = nullptr>
|
||||
accelerator_inline
|
||||
sobj eval(const uint64_t ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
auto eval(const uint64_t ss, const LatticeView<lobj> &arg) -> decltype(arg(ss))
|
||||
{
|
||||
return arg(ss);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
//-- recursive evaluation of expressions; --
|
||||
// whole vector return, used only for expression return type inference
|
||||
///////////////////////////////////////////////////
|
||||
template<class sobj> accelerator_inline
|
||||
sobj vecEval(const uint64_t ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & vecEval(const uint64_t ss, const LatticeView<lobj> &arg)
|
||||
{
|
||||
return arg[ss];
|
||||
}
|
||||
|
||||
// What needs this?
|
||||
// Cannot be legal on accelerator
|
||||
// Comparison must convert
|
||||
#if 1
|
||||
template <class lobj> accelerator_inline
|
||||
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
|
||||
{
|
||||
assert(0);
|
||||
auto view = arg.View(AcceleratorRead);
|
||||
return view[ss];
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand
|
||||
// vecEval needed (but never called as all expressions offloaded) to infer the return type
|
||||
// in SIMT contexts of closure.
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> decltype(expr.op.func( vecEval(ss, expr.arg1)))
|
||||
{
|
||||
return expr.op.func( vecEval(ss, expr.arg1) );
|
||||
}
|
||||
// vecEval two operands
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( vecEval(ss,expr.arg1),vecEval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( vecEval(ss,expr.arg1), vecEval(ss,expr.arg2) );
|
||||
}
|
||||
// vecEval three operands
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto vecEval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3)))
|
||||
{
|
||||
return expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// handle nodes in syntax tree- eval one operand coalesced
|
||||
///////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
@ -115,23 +160,41 @@ auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
|
||||
{
|
||||
return expr.op.func( eval(ss, expr.arg1) );
|
||||
}
|
||||
///////////////////////
|
||||
// eval two operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
|
||||
{
|
||||
return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
|
||||
}
|
||||
///////////////////////
|
||||
// eval three operands
|
||||
///////////////////////
|
||||
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
|
||||
auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)))
|
||||
-> decltype(expr.op.func(eval(ss, expr.arg1),
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3)))
|
||||
{
|
||||
return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3));
|
||||
#ifdef GRID_SIMT
|
||||
// Handles Nsimd (vInteger) != Nsimd(ComplexD)
|
||||
typedef decltype(vecEval(ss, expr.arg2)) rvobj;
|
||||
typedef typename std::remove_reference<rvobj>::type vobj;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
|
||||
auto vpred = vecEval(ss,expr.arg1);
|
||||
|
||||
ExtractBuffer<Integer> mask(Nsimd);
|
||||
extract<vInteger, Integer>(TensorRemove(vpred), mask);
|
||||
|
||||
int s = acceleratorSIMTlane(Nsimd);
|
||||
return expr.op.func(mask[s],
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3));
|
||||
#else
|
||||
return expr.op.func(eval(ss, expr.arg1),
|
||||
eval(ss, expr.arg2),
|
||||
eval(ss, expr.arg3));
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@ -229,7 +292,7 @@ template <typename Op, typename T1, typename T2> inline
|
||||
void ExpressionViewOpen(LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
{
|
||||
ExpressionViewOpen(expr.arg1); // recurse AST
|
||||
ExpressionViewOpen(expr.arg2); // recurse AST
|
||||
ExpressionViewOpen(expr.arg2); // rrecurse AST
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void ExpressionViewOpen(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
@ -273,9 +336,8 @@ inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
#define GridUnopClass(name, ret) \
|
||||
template <class arg> \
|
||||
struct name { \
|
||||
static auto accelerator_inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||
template<class _arg> static auto accelerator_inline func(const _arg a) -> decltype(ret) { return ret; } \
|
||||
};
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
@ -286,8 +348,6 @@ GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryReal, real(a));
|
||||
GridUnopClass(UnaryImag, imag(a));
|
||||
GridUnopClass(UnaryToReal, toReal(a));
|
||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
@ -306,10 +366,10 @@ GridUnopClass(UnaryExp, exp(a));
|
||||
// Binary operators
|
||||
////////////////////////////////////////////
|
||||
#define GridBinOpClass(name, combination) \
|
||||
template <class left, class right> \
|
||||
struct name { \
|
||||
template <class _left, class _right> \
|
||||
static auto accelerator_inline \
|
||||
func(const left &lhs, const right &rhs) \
|
||||
func(const _left &lhs, const _right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
@ -329,10 +389,10 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
// Trinary conditional op
|
||||
////////////////////////////////////////////////////
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
template <class predicate, class left, class right> \
|
||||
struct name { \
|
||||
template <class _predicate,class _left, class _right> \
|
||||
static auto accelerator_inline \
|
||||
func(const predicate &pred, const left &lhs, const right &rhs) \
|
||||
func(const _predicate &pred, const _left &lhs, const _right &rhs) \
|
||||
-> decltype(combination) const \
|
||||
{ \
|
||||
return combination; \
|
||||
@ -340,17 +400,17 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
};
|
||||
|
||||
GridTrinOpClass(TrinaryWhere,
|
||||
(predicatedWhere<predicate,
|
||||
typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,rhs)));
|
||||
(predicatedWhere<
|
||||
typename std::remove_reference<_predicate>::type,
|
||||
typename std::remove_reference<_left>::type,
|
||||
typename std::remove_reference<_right>::type>(pred, lhs,rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_UNOP(name) name
|
||||
#define GRID_BINOP(name) name
|
||||
#define GRID_TRINOP(name) name
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
|
||||
@ -402,8 +462,6 @@ GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(real, UnaryReal);
|
||||
GRID_DEF_UNOP(imag, UnaryImag);
|
||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
@ -436,29 +494,36 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> ret(expr);
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> ret(expr);
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))>
|
||||
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))>
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1),
|
||||
eval(0, expr.arg2),
|
||||
eval(0, expr.arg3)))> ret(expr);
|
||||
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
|
||||
vecEval(0, expr.arg2),
|
||||
vecEval(0, expr.arg3)))> ret(expr);
|
||||
return ret;
|
||||
}
|
||||
#define EXPRESSION_CLOSURE(function) \
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> \
|
||||
auto function(Expression &expr) -> decltype(function(closure(expr))) \
|
||||
{ \
|
||||
return function(closure(expr)); \
|
||||
}
|
||||
|
||||
|
||||
#undef GRID_UNOP
|
||||
#undef GRID_BINOP
|
||||
|
@ -123,9 +123,9 @@ public:
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
@ -146,9 +146,9 @@ public:
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
@ -168,9 +168,9 @@ public:
|
||||
auto exprCopy = expr;
|
||||
ExpressionViewOpen(exprCopy);
|
||||
auto me = View(AcceleratorWriteDiscard);
|
||||
accelerator_for(ss,me.size(),1,{
|
||||
accelerator_for(ss,me.size(),vobj::Nsimd(),{
|
||||
auto tmp = eval(ss,exprCopy);
|
||||
vstream(me[ss],tmp);
|
||||
coalescedWrite(me[ss],tmp);
|
||||
});
|
||||
me.ViewClose();
|
||||
ExpressionViewClose(exprCopy);
|
||||
|
@ -42,34 +42,6 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
typedef iScalar<vInteger> vPredicate ;
|
||||
|
||||
/*
|
||||
template <class iobj, class vobj, class robj> accelerator_inline
|
||||
vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, const robj &iffalse)
|
||||
{
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
|
||||
ExtractBuffer<Integer> mask(Nsimd);
|
||||
ExtractBuffer<scalar_object> truevals(Nsimd);
|
||||
ExtractBuffer<scalar_object> falsevals(Nsimd);
|
||||
|
||||
extract(iftrue, truevals);
|
||||
extract(iffalse, falsevals);
|
||||
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||
|
||||
for (int s = 0; s < Nsimd; s++) {
|
||||
if (mask[s]) falsevals[s] = truevals[s];
|
||||
}
|
||||
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -182,6 +182,14 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
|
||||
|
||||
return;
|
||||
};
|
||||
template<class vobj,class sobj>
|
||||
inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
|
||||
{
|
||||
autoView(lv,l,CpuRead);
|
||||
peekLocalSite(s,lv,site);
|
||||
return;
|
||||
};
|
||||
|
||||
// Must be CPU write view
|
||||
template<class vobj,class sobj>
|
||||
inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
|
||||
@ -210,6 +218,14 @@ inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
|
||||
return;
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
inline void pokeLocalSite(const sobj &s, Lattice<vobj> &l,Coordinate &site)
|
||||
{
|
||||
autoView(lv,l,CpuWrite);
|
||||
pokeLocalSite(s,lv,site);
|
||||
return;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
||||
|
79
Grid/lattice/Lattice_real_imag.h
Normal file
79
Grid/lattice/Lattice_real_imag.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REAL_IMAG_H
|
||||
#define GRID_LATTICE_REAL_IMAG_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class vobj> inline Lattice<vobj> real(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] =real(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
template<class vobj> inline Lattice<vobj> imag(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs.Grid());
|
||||
|
||||
autoView( lhs_v, lhs, AcceleratorRead);
|
||||
autoView( ret_v, ret, AcceleratorWrite);
|
||||
|
||||
ret.Checkerboard()=lhs.Checkerboard();
|
||||
accelerator_for( ss, lhs_v.size(), 1, {
|
||||
ret_v[ss] =imag(lhs_v[ss]);
|
||||
});
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto real(const Expression &expr) -> decltype(real(closure(expr)))
|
||||
{
|
||||
return real(closure(expr));
|
||||
}
|
||||
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
|
||||
auto imag(const Expression &expr) -> decltype(imag(closure(expr)))
|
||||
{
|
||||
return imag(closure(expr));
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -208,7 +208,7 @@ public:
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Conserved current utilities
|
||||
|
@ -215,7 +215,7 @@ public:
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
|
||||
|
||||
};
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
}
|
||||
static inline GaugeLinkField
|
||||
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
||||
return Cshift(closure(adj(Link)), mu, -1);
|
||||
return Cshift(adj(Link), mu, -1);
|
||||
}
|
||||
static inline GaugeLinkField
|
||||
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
||||
|
@ -53,23 +53,21 @@ namespace PeriodicBC {
|
||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||
}
|
||||
|
||||
template<class gauge,typename Op, typename T1> auto
|
||||
CovShiftForward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const LatticeUnaryExpression<Op,T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||
auto CovShiftForward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Expr &expr) -> decltype(closure(expr))
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||
auto arg = closure(expr);
|
||||
return CovShiftForward(Link,mu,arg);
|
||||
}
|
||||
template<class gauge,typename Op, typename T1> auto
|
||||
CovShiftBackward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const LatticeUnaryExpression<Op,T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||
auto CovShiftBackward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Expr &expr) -> decltype(closure(expr))
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||
return CovShiftForward(Link,mu,arg);
|
||||
auto arg = closure(expr);
|
||||
return CovShiftBackward(Link,mu,arg);
|
||||
}
|
||||
|
||||
}
|
||||
@ -142,26 +140,23 @@ namespace ConjugateBC {
|
||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||
}
|
||||
|
||||
template<class gauge,typename Op, typename T1> auto
|
||||
CovShiftForward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const LatticeUnaryExpression<Op,T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||
auto CovShiftForward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Expr &expr) -> decltype(closure(expr))
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||
auto arg = closure(expr);
|
||||
return CovShiftForward(Link,mu,arg);
|
||||
}
|
||||
template<class gauge,typename Op, typename T1> auto
|
||||
CovShiftBackward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const LatticeUnaryExpression<Op,T1> &expr)
|
||||
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
|
||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||
auto CovShiftBackward(const Lattice<gauge> &Link,
|
||||
int mu,
|
||||
const Expr &expr) -> decltype(closure(expr))
|
||||
{
|
||||
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
|
||||
return CovShiftForward(Link,mu,arg);
|
||||
auto arg = closure(expr);
|
||||
return CovShiftBackward(Link,mu,arg);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -39,7 +39,7 @@ public:
|
||||
typedef iSUnAdjointMatrix<ComplexF> AMatrixF;
|
||||
typedef iSUnAdjointMatrix<ComplexD> AMatrixD;
|
||||
|
||||
typedef iSUnAdjointMatrix<vComplex> vAMatrix;
|
||||
typedef iSUnAdjointMatrix<vComplex> vAMatrix;
|
||||
typedef iSUnAdjointMatrix<vComplexF> vAMatrixF;
|
||||
typedef iSUnAdjointMatrix<vComplexD> vAMatrixD;
|
||||
|
||||
@ -47,14 +47,9 @@ public:
|
||||
typedef Lattice<vAMatrixF> LatticeAdjMatrixF;
|
||||
typedef Lattice<vAMatrixD> LatticeAdjMatrixD;
|
||||
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
|
||||
LatticeAdjField;
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
|
||||
LatticeAdjFieldF;
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
|
||||
LatticeAdjFieldD;
|
||||
|
||||
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> > LatticeAdjField;
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
|
||||
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
|
||||
|
||||
|
||||
template <class cplx>
|
||||
@ -128,7 +123,9 @@ public:
|
||||
}
|
||||
|
||||
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
|
||||
static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) {
|
||||
static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0)
|
||||
{
|
||||
|
||||
conformable(h_out, in);
|
||||
h_out = Zero();
|
||||
AMatrix iTa;
|
||||
@ -136,7 +133,7 @@ public:
|
||||
|
||||
for (int a = 0; a < Dimension; a++) {
|
||||
generator(a, iTa);
|
||||
auto tmp = real(trace(iTa * in)) * coefficient;
|
||||
LatticeComplex tmp = real(trace(iTa * in)) * coefficient;
|
||||
pokeColour(h_out, tmp, a);
|
||||
}
|
||||
}
|
||||
|
@ -485,7 +485,7 @@ public:
|
||||
|
||||
// Up staple ___ ___
|
||||
// | |
|
||||
tmp = Cshift(closure(adj(U[nu])), nu, -1);
|
||||
tmp = Cshift(adj(U[nu]), nu, -1);
|
||||
tmp = adj(U2[mu]) * tmp;
|
||||
tmp = Cshift(tmp, mu, -2);
|
||||
|
||||
@ -519,7 +519,7 @@ public:
|
||||
//
|
||||
// | |
|
||||
|
||||
tmp = Cshift(closure(adj(U2[nu])), nu, -2);
|
||||
tmp = Cshift(adj(U2[nu]), nu, -2);
|
||||
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
|
||||
tmp = U2[nu] * Cshift(tmp, nu, 2);
|
||||
Stap += Cshift(tmp, mu, 1);
|
||||
|
@ -93,6 +93,11 @@ accelerator_inline ComplexF pow(const ComplexF& r,RealF y){ return(std::pow(r,y)
|
||||
using std::abs;
|
||||
using std::pow;
|
||||
using std::sqrt;
|
||||
using std::log;
|
||||
using std::exp;
|
||||
using std::sin;
|
||||
using std::cos;
|
||||
|
||||
|
||||
accelerator_inline RealF conjugate(const RealF & r){ return r; }
|
||||
accelerator_inline RealD conjugate(const RealD & r){ return r; }
|
||||
|
@ -190,6 +190,36 @@ NAMESPACE_BEGIN(Grid);
|
||||
typedef ComplexD DoublePrecision;
|
||||
typedef ComplexD DoublePrecision2;
|
||||
};
|
||||
|
||||
#ifdef GRID_CUDA
|
||||
template<> struct GridTypeMapper<std::complex<float> > : public GridTypeMapper_Base {
|
||||
typedef std::complex<float> scalar_type;
|
||||
typedef std::complex<double> scalar_typeD;
|
||||
typedef scalar_type vector_type;
|
||||
typedef scalar_typeD vector_typeD;
|
||||
typedef scalar_type tensor_reduced;
|
||||
typedef scalar_type scalar_object;
|
||||
typedef scalar_typeD scalar_objectD;
|
||||
typedef scalar_type Complexified;
|
||||
typedef RealF Realified;
|
||||
typedef scalar_typeD DoublePrecision;
|
||||
typedef scalar_typeD DoublePrecision2;
|
||||
};
|
||||
template<> struct GridTypeMapper<std::complex<double> > : public GridTypeMapper_Base {
|
||||
typedef std::complex<double> scalar_type;
|
||||
typedef std::complex<double> scalar_typeD;
|
||||
typedef scalar_type vector_type;
|
||||
typedef scalar_typeD vector_typeD;
|
||||
typedef scalar_type tensor_reduced;
|
||||
typedef scalar_type scalar_object;
|
||||
typedef scalar_typeD scalar_objectD;
|
||||
typedef scalar_type Complexified;
|
||||
typedef RealD Realified;
|
||||
typedef scalar_typeD DoublePrecision;
|
||||
typedef scalar_typeD DoublePrecision2;
|
||||
};
|
||||
#endif
|
||||
|
||||
template<> struct GridTypeMapper<ComplexD2> : public GridTypeMapper_Base {
|
||||
typedef ComplexD2 scalar_type;
|
||||
typedef ComplexD2 scalar_typeD;
|
||||
|
@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
//
|
||||
// Memory management:
|
||||
//
|
||||
// int acceleratorIsCommunicable(void *pointer);
|
||||
// void *acceleratorAllocShared(size_t bytes);
|
||||
// void acceleratorFreeShared(void *ptr);
|
||||
//
|
||||
@ -90,6 +91,7 @@ void acceleratorInit(void);
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_CUDA
|
||||
#include <cuda.h>
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
#define GRID_SIMT
|
||||
@ -165,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
{
|
||||
int uvm;
|
||||
auto
|
||||
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
|
||||
assert(cuerr == cudaSuccess );
|
||||
if(uvm) return 0;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////
|
||||
@ -219,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
{
|
||||
#if 0
|
||||
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
|
||||
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
|
||||
else return 0;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -294,6 +315,7 @@ inline void *acceleratorAllocShared(size_t bytes)
|
||||
}
|
||||
return ptr;
|
||||
};
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
|
||||
inline void *acceleratorAllocDevice(size_t bytes)
|
||||
{
|
||||
@ -348,6 +370,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||
|
@ -99,10 +99,10 @@ inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_nd
|
||||
{
|
||||
os << "[";
|
||||
for(int s=0;s<v.size();s++) {
|
||||
os << v[s] << " ";
|
||||
}
|
||||
if (v.size() > 0) {
|
||||
os << "\b";
|
||||
os << v[s];
|
||||
if( s < (v.size()-1) ){
|
||||
os << " ";
|
||||
}
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
|
@ -74,90 +74,6 @@ int main (int argc, char ** argv)
|
||||
std::vector<double> t_time(Nloop);
|
||||
time_statistics timestat;
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
header();
|
||||
for(int lat=8;lat<=maxlat;lat+=4){
|
||||
for(int Ls=8;Ls<=8;Ls*=2){
|
||||
|
||||
Coordinate latt_size ({lat*mpi_layout[0],
|
||||
lat*mpi_layout[1],
|
||||
lat*mpi_layout[2],
|
||||
lat*mpi_layout[3]});
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
RealD Nrank = Grid._Nprocessors;
|
||||
RealD Nnode = Grid.NodeCount();
|
||||
RealD ppn = Nrank/Nnode;
|
||||
|
||||
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
|
||||
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
|
||||
|
||||
int ncomm;
|
||||
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||
for(int mu=0;mu<8;mu++){
|
||||
xbuf[mu].resize(lat*lat*lat*Ls);
|
||||
rbuf[mu].resize(lat*lat*lat*Ls);
|
||||
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
|
||||
}
|
||||
|
||||
for(int i=0;i<Nloop;i++){
|
||||
double start=usecond();
|
||||
|
||||
std::vector<CommsRequest_t> requests;
|
||||
|
||||
ncomm=0;
|
||||
for(int mu=0;mu<4;mu++){
|
||||
|
||||
if (mpi_layout[mu]>1 ) {
|
||||
|
||||
ncomm++;
|
||||
int comm_proc=1;
|
||||
int xmit_to_rank;
|
||||
int recv_from_rank;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
|
||||
}
|
||||
}
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.Barrier();
|
||||
double stop=usecond();
|
||||
t_time[i] = stop-start; // microseconds
|
||||
}
|
||||
|
||||
timestat.statistics(t_time);
|
||||
|
||||
double dbytes = bytes*ppn;
|
||||
double xbytes = dbytes*2.0*ncomm;
|
||||
double rbytes = xbytes;
|
||||
double bidibytes = xbytes+rbytes;
|
||||
|
||||
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
|
||||
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
|
||||
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
|
||||
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
|
||||
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
|
||||
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||
@ -206,26 +122,22 @@ int main (int argc, char ** argv)
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
}
|
||||
|
||||
comm_proc = mpi_layout[mu]-1;
|
||||
{
|
||||
std::vector<CommsRequest_t> requests;
|
||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
Grid.SendToRecvFromBegin(requests,
|
||||
(void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
Grid.SendToRecvFromComplete(requests);
|
||||
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
|
||||
xmit_to_rank,
|
||||
(void *)&rbuf[mu+4][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -89,6 +89,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout << GridLogMessage;
|
||||
std::cout << latt_size;
|
||||
std::cout << "\t\t";
|
||||
|
||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
|
||||
case ${ac_ACCELERATOR} in
|
||||
cuda)
|
||||
echo CUDA acceleration
|
||||
LIBS="${LIBS} -lcuda"
|
||||
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
|
||||
sycl)
|
||||
echo SYCL acceleration
|
||||
@ -323,7 +324,6 @@ case ${CXXTEST} in
|
||||
# CXXLD="nvcc -v -link"
|
||||
CXX="${CXXBASE} -x cu "
|
||||
CXXLD="${CXXBASE} -link"
|
||||
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
|
||||
if test $ac_openmp = yes; then
|
||||
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
|
||||
@ -484,8 +484,7 @@ case ${ac_SHM} in
|
||||
LDFLAGS_CPY=$LDFLAGS
|
||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||
AC_SEARCH_LIBS([shm_unlink], [rt], [],
|
||||
[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||
AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
|
||||
CXXFLAGS=$CXXFLAGS_CPY
|
||||
LDFLAGS=$LDFLAGS_CPY
|
||||
;;
|
||||
|
@ -47,14 +47,14 @@ public:
|
||||
bool , b,
|
||||
std::vector<double>, array,
|
||||
std::vector<std::vector<double> >, twodimarray,
|
||||
std::vector<std::vector<std::vector<Complex> > >, cmplx3darray,
|
||||
std::vector<std::vector<std::vector<std::complex<double>> > >, cmplx3darray,
|
||||
SpinColourMatrix, scm
|
||||
);
|
||||
myclass() {}
|
||||
myclass(int i)
|
||||
: array(4,5.1)
|
||||
, twodimarray(3,std::vector<double>(5, 1.23456))
|
||||
, cmplx3darray(3,std::vector<std::vector<Complex>>(5, std::vector<Complex>(7, Complex(1.2, 3.4))))
|
||||
, cmplx3darray(3,std::vector<std::vector<std::complex<double>>>(5, std::vector<std::complex<double>>(7, std::complex<double>(1.2, 3.4))))
|
||||
, ve(2, myenum::blue)
|
||||
{
|
||||
e=myenum::red;
|
||||
@ -121,11 +121,11 @@ namespace Eigen {
|
||||
// Perform I/O tests on a range of tensor types
|
||||
// Test coverage: scalars, complex and GridVectors in single, double and default precision
|
||||
class TensorIO : public Serializable {
|
||||
using TestScalar = ComplexD;
|
||||
using TestScalar = std::complex<double>;
|
||||
using SR3 = Eigen::Sizes<9,4,2>;
|
||||
using SR5 = Eigen::Sizes<5,4,3,2,1>;
|
||||
using ESO = Eigen::StorageOptions;
|
||||
using TensorRank3 = Eigen::Tensor<ComplexF, 3, ESO::RowMajor>;
|
||||
using TensorRank3 = Eigen::Tensor<std::complex<float>, 3, ESO::RowMajor>;
|
||||
using TensorR5 = Eigen::TensorFixedSize<Real, SR5>;
|
||||
using TensorR5Alt = Eigen::TensorFixedSize<Real, SR5, ESO::RowMajor>;
|
||||
using Tensor942 = Eigen::TensorFixedSize<TestScalar, SR3, ESO::RowMajor>;
|
||||
@ -134,8 +134,8 @@ class TensorIO : public Serializable {
|
||||
using LSCTensor = Eigen::TensorFixedSize<SpinColourMatrix, Eigen::Sizes<6,5>>;
|
||||
|
||||
static const Real FlagR;
|
||||
static const Complex Flag;
|
||||
static const ComplexF FlagF;
|
||||
static const std::complex<double> Flag;
|
||||
static const std::complex<float> FlagF;
|
||||
static const TestScalar FlagTS;
|
||||
static const char * const pszFilePrefix;
|
||||
|
||||
@ -230,8 +230,8 @@ public:
|
||||
};
|
||||
|
||||
const Real TensorIO::FlagR {1};
|
||||
const Complex TensorIO::Flag {1,-1};
|
||||
const ComplexF TensorIO::FlagF {1,-1};
|
||||
const std::complex<double> TensorIO::Flag {1,-1};
|
||||
const std::complex<float> TensorIO::FlagF {1,-1};
|
||||
const TensorIO::TestScalar TensorIO::FlagTS{1,-1};
|
||||
const char * const TensorIO::pszFilePrefix = "tensor_";
|
||||
|
||||
|
@ -137,7 +137,6 @@ int main(int argc, char **argv) {
|
||||
LatticeReal iscalar(&Fine);
|
||||
|
||||
SpinMatrix GammaFive;
|
||||
iSpinMatrix<vComplex> iGammaFive;
|
||||
ColourMatrix cmat;
|
||||
|
||||
random(FineRNG, Foo);
|
||||
@ -283,7 +282,6 @@ int main(int argc, char **argv) {
|
||||
cMat = mydouble * cMat;
|
||||
|
||||
sMat = adj(sMat); // LatticeSpinMatrix adjoint
|
||||
sMat = iGammaFive * sMat; // SpinMatrix * LatticeSpinMatrix
|
||||
sMat = GammaFive * sMat; // SpinMatrix * LatticeSpinMatrix
|
||||
scMat = adj(scMat);
|
||||
cMat = adj(cMat);
|
||||
|
@ -261,11 +261,11 @@ int main (int argc, char ** argv)
|
||||
pickCheckerboard(Odd ,phi_o,phi);
|
||||
|
||||
SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
|
||||
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
|
||||
HermOpEO.MpcDagMpc(chi_e,dchi_e);
|
||||
HermOpEO.MpcDagMpc(chi_o,dchi_o);
|
||||
|
||||
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
|
||||
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
|
||||
HermOpEO.MpcDagMpc(phi_e,dphi_e);
|
||||
HermOpEO.MpcDagMpc(phi_o,dphi_o);
|
||||
|
||||
pDce = innerProduct(phi_e,dchi_e);
|
||||
pDco = innerProduct(phi_o,dchi_o);
|
||||
|
80
tests/core/Test_where.cc
Normal file
80
tests/core/Test_where.cc
Normal file
@ -0,0 +1,80 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_poisson_fft.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
int N=16;
|
||||
|
||||
std::vector<int> latt_size ({N,4,4});
|
||||
std::vector<int> simd_layout({vComplexD::Nsimd(),1,1});
|
||||
std::vector<int> mpi_layout ({1,1,1});
|
||||
|
||||
int vol = 1;
|
||||
int nd = latt_size.size();
|
||||
for(int d=0;d<nd;d++){
|
||||
vol = vol * latt_size[d];
|
||||
}
|
||||
|
||||
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
LatticeComplexD zz(&GRID);
|
||||
LatticeInteger coor(&GRID);
|
||||
LatticeComplexD rn(&GRID);
|
||||
LatticeComplexD sl(&GRID);
|
||||
|
||||
zz = ComplexD(0.0,0.0);
|
||||
|
||||
GridParallelRNG RNG(&GRID);
|
||||
RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
gaussian(RNG,rn);
|
||||
|
||||
RealD nn=norm2(rn);
|
||||
for(int mu=0;mu<nd;mu++){
|
||||
RealD ns=0.0;
|
||||
for(int t=0;t<latt_size[mu];t++){
|
||||
LatticeCoordinate(coor,mu);
|
||||
sl=where(coor==Integer(t),rn,zz);
|
||||
std::cout <<GridLogMessage<< " sl " << sl<<std::endl;
|
||||
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
|
||||
ns=ns+norm2(sl);
|
||||
}
|
||||
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" " << nn-ns<<std::endl;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -122,7 +122,7 @@ int main (int argc, char ** argv)
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
|
||||
Subspace Aggregates(Coarse5d,FGrid,cb);
|
||||
Aggregates.CreateSubspaceRandom(RNG5);
|
||||
// Aggregates.CreateSubspaceRandom(RNG5);
|
||||
|
||||
subspace=Aggregates.subspace;
|
||||
|
||||
@ -163,7 +163,7 @@ int main (int argc, char ** argv)
|
||||
LittleDiracOp.M(c_src,c_res);
|
||||
|
||||
std::cout<<GridLogMessage<<"Testing hermiticity explicitly by inspecting matrix elements"<<std::endl;
|
||||
LittleDiracOp.AssertHermitian();
|
||||
// LittleDiracOp.AssertHermitian();
|
||||
|
||||
std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl;
|
||||
CoarseVector phi(Coarse5d);
|
||||
|
@ -311,7 +311,7 @@ int main (int argc, char ** argv) {
|
||||
std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl;
|
||||
assert(Nm2 >= Nm1);
|
||||
|
||||
const int nbasis= 70;
|
||||
const int nbasis= 32;
|
||||
CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd);
|
||||
|
||||
std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl;
|
||||
|
420
tests/solver/Test_dwf_hdcr_2level.cc
Normal file
420
tests/solver/Test_dwf_hdcr_2level.cc
Normal file
@ -0,0 +1,420 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_hdcr.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
/* Params
|
||||
* Grid:
|
||||
* block1(4)
|
||||
* block2(4)
|
||||
*
|
||||
* Subspace
|
||||
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||
|
||||
* Smoother:
|
||||
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||
|
||||
* Lanczos:
|
||||
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||
*/
|
||||
RealD InverseApproximation(RealD x){
|
||||
return 1.0/x;
|
||||
}
|
||||
|
||||
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
Matrix & _SmootherMatrix;
|
||||
FineOperator & _SmootherOperator;
|
||||
|
||||
Chebyshev<Field> Cheby;
|
||||
|
||||
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||
_SmootherOperator(SmootherOperator),
|
||||
_SmootherMatrix(SmootherMatrix),
|
||||
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||
{};
|
||||
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
Field tmp(in.Grid());
|
||||
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||
_SmootherOperator.AdjOp(in,tmp);
|
||||
Cheby(MdagMOp,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
Matrix & SmootherMatrix;
|
||||
FineOperator & SmootherOperator;
|
||||
RealD tol;
|
||||
RealD shift;
|
||||
int maxit;
|
||||
|
||||
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||
shift(_shift),tol(_tol),maxit(_maxit),
|
||||
SmootherOperator(_SmootherOperator),
|
||||
SmootherMatrix(_SmootherMatrix)
|
||||
{};
|
||||
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
ZeroGuesser<Field> Guess;
|
||||
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||
|
||||
Field src(in.Grid());
|
||||
|
||||
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||
SmootherOperator.AdjOp(in,src);
|
||||
Guess(src,out);
|
||||
CG(MdagMOp,src,out);
|
||||
}
|
||||
};
|
||||
template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
Matrix & SmootherMatrix;
|
||||
RealD tol;
|
||||
RealD shift;
|
||||
int maxit;
|
||||
|
||||
RedBlackSmoother(RealD _shift,RealD _tol,int _maxit,Matrix &_SmootherMatrix) :
|
||||
shift(_shift),tol(_tol),maxit(_maxit),
|
||||
SmootherMatrix(_SmootherMatrix)
|
||||
{};
|
||||
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
|
||||
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||
out =Zero();
|
||||
SchurRedBlackDiagMooeeSolve<Field> RBSolver(CG);
|
||||
RBSolver(SmootherMatrix,in,out);
|
||||
std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||
public:
|
||||
|
||||
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||
typedef LinearOperatorBase<FineField> FineOperator;
|
||||
typedef LinearFunction <FineField> FineSmoother;
|
||||
|
||||
Aggregates & _Aggregates;
|
||||
CoarseOperator & _CoarseOperator;
|
||||
Matrix & _FineMatrix;
|
||||
FineOperator & _FineOperator;
|
||||
Guesser & _Guess;
|
||||
FineSmoother & _Smoother1;
|
||||
FineSmoother & _Smoother2;
|
||||
CoarseSolver & _CoarseSolve;
|
||||
|
||||
int level; void Level(int lv) {level = lv; };
|
||||
|
||||
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||
|
||||
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||
FineOperator &Fine,Matrix &FineMatrix,
|
||||
FineSmoother &Smoother1,
|
||||
FineSmoother &Smoother2,
|
||||
Guesser &Guess_,
|
||||
CoarseSolver &CoarseSolve_)
|
||||
: _Aggregates(Agg),
|
||||
_CoarseOperator(Coarse),
|
||||
_FineOperator(Fine),
|
||||
_FineMatrix(FineMatrix),
|
||||
_Smoother1(Smoother1),
|
||||
_Smoother2(Smoother2),
|
||||
_Guess(Guess_),
|
||||
_CoarseSolve(CoarseSolve_),
|
||||
level(1) { }
|
||||
|
||||
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||
FineOperator &Fine,Matrix &FineMatrix,
|
||||
FineSmoother &Smoother,
|
||||
Guesser &Guess_,
|
||||
CoarseSolver &CoarseSolve_)
|
||||
: _Aggregates(Agg),
|
||||
_CoarseOperator(Coarse),
|
||||
_FineOperator(Fine),
|
||||
_FineMatrix(FineMatrix),
|
||||
_Smoother1(Smoother),
|
||||
_Smoother2(Smoother),
|
||||
_Guess(Guess_),
|
||||
_CoarseSolve(CoarseSolve_),
|
||||
level(1) { }
|
||||
|
||||
virtual void operator()(const FineField &in, FineField & out)
|
||||
{
|
||||
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||
CoarseVector Csol(_CoarseOperator.Grid());
|
||||
FineField vec1(in.Grid());
|
||||
FineField vec2(in.Grid());
|
||||
|
||||
double t;
|
||||
// Fine Smoother
|
||||
t=-usecond();
|
||||
_Smoother1(in,out);
|
||||
t+=usecond();
|
||||
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Update the residual
|
||||
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||
|
||||
// Fine to Coarse
|
||||
t=-usecond();
|
||||
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||
t+=usecond();
|
||||
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Coarse correction
|
||||
t=-usecond();
|
||||
_CoarseSolve(Csrc,Csol);
|
||||
t+=usecond();
|
||||
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Coarse to Fine
|
||||
t=-usecond();
|
||||
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||
add(out,out,vec1);
|
||||
t+=usecond();
|
||||
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Residual
|
||||
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||
|
||||
// Fine Smoother
|
||||
t=-usecond();
|
||||
_Smoother2(vec1,vec2);
|
||||
t+=usecond();
|
||||
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
add( out,out,vec2);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=16;
|
||||
const int rLs=8;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Construct a coarsened grid; utility for this?
|
||||
///////////////////////////////////////////////////
|
||||
std::vector<int> block ({2,2,2,2});
|
||||
const int nbasis= 32;
|
||||
|
||||
auto clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/block[d];
|
||||
}
|
||||
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||
LatticeFermion result(FGrid);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./ckpoint_lat");
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
RealD mass=0.001;
|
||||
RealD M5=1.8;
|
||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||
typedef CoarseOperator::siteVector siteVector;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||
|
||||
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||
|
||||
assert ( (nbasis & 0x1)==0);
|
||||
{
|
||||
int nb=nbasis/2;
|
||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,100,0.0);// 18s
|
||||
// rAggregates.CreateSubspaceChebyshev(RNG5,rHermDefOp,nb,60.0,0.05,500,200,150,0.0);// 15.7 23iter
|
||||
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,150,0.0);//
|
||||
// pad out the rAggregates.
|
||||
|
||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,150,0.0);// 19s
|
||||
|
||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,200,0.0); 15.2s
|
||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,200,0.0); 16.3s
|
||||
|
||||
for(int n=0;n<nb;n++){
|
||||
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||
}
|
||||
LatticeFermion A(FGrid);
|
||||
LatticeFermion B(FGrid);
|
||||
for(int n=0;n<nb;n++){
|
||||
A = Aggregates.subspace[n];
|
||||
B = Aggregates.subspace[n+nb];
|
||||
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||
|
||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||
|
||||
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
|
||||
MdagMLinearOperator<Level1Op,CoarseVector> IRLHermOp(LDOp);
|
||||
Chebyshev<CoarseVector> IRLCheby(0.002,12.,151);
|
||||
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,IRLHermOp);
|
||||
PlainHermOp<CoarseVector> IRLOp (IRLHermOp);
|
||||
int Nk=48;
|
||||
int Nm=64;
|
||||
int Nstop=48;
|
||||
int Nconv;
|
||||
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||
CoarseVector c_src(Coarse5d);
|
||||
gaussian(CRNG,c_src);
|
||||
IRL.calc(eval,evec,c_src,Nconv);
|
||||
|
||||
// ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
|
||||
|
||||
ConjugateGradient<CoarseVector> CoarseCG(0.02,1000);// 14.7s
|
||||
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||
NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
|
||||
|
||||
c_src=1.0;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Building 2 level Multigrid "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||
|
||||
// MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 72 iter 63s
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.1,60.0,20,HermIndefOp,Ddwf); // 66 iter 69s
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,20,HermIndefOp,Ddwf); // 63 iter 65 s
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,20,HermIndefOp,Ddwf); // 69, 70
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,14,HermIndefOp,Ddwf); // 77
|
||||
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // 23 iter 15.9s
|
||||
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 20, 16.9s
|
||||
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); // 21, 15.6s
|
||||
|
||||
// MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.05,0.01,20,HermIndefOp,Ddwf);
|
||||
// RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf);
|
||||
|
||||
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
|
||||
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
|
||||
HermIndefOp,Ddwf,
|
||||
FineSmoother,
|
||||
DeflCoarseGuesser,
|
||||
DeflCoarseCGNE);
|
||||
TwoLevelPrecon.Level(1);
|
||||
|
||||
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
|
||||
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,TwoLevelPrecon,16,16);
|
||||
l1PGCR.Level(1);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
result=Zero();
|
||||
l1PGCR(src,result);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Fine CG prec DiagMooee "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
|
||||
|
||||
ConjugateGradient<LatticeFermion> FineCG(1.0e-8,10000);
|
||||
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe
|
||||
LatticeFermion f_src_e(FrbGrid); f_src_e=1.0;
|
||||
LatticeFermion f_res_e(FrbGrid); f_res_e=Zero();
|
||||
FineCG(FineDiagMooee,f_src_e,f_res_e);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
Grid_finalize();
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user