1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 11:15:55 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2020-09-16 02:30:14 +01:00
commit ecd3f890f5
32 changed files with 930 additions and 341 deletions

View File

@ -48,7 +48,7 @@
#ifdef __NVCC__REDEFINE__
#pragma pop_macro("__CUDACC__")
#pragma pop_macro("__NVCC__")
#pragma pop_macro("GRID_SIMT")
#pragma pop_macro("__CUDA_ARCH__")
#pragma pop
#endif

View File

@ -65,8 +65,7 @@ public:
MemoryManager::CpuFree((void *)__p,bytes);
}
// FIXME: hack for the copy constructor, eventually it must be avoided
//void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
void construct(pointer __p, const _Tp& __val) { assert(0);};
void construct(pointer __p) { };
void destroy(pointer __p) { };
@ -74,6 +73,9 @@ public:
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
//////////////////////////////////////////////////////////////////////////////////////
// Unified virtual memory
//////////////////////////////////////////////////////////////////////////////////////
template<typename _Tp>
class uvmAllocator {
public:
@ -109,22 +111,63 @@ public:
MemoryManager::SharedFree((void *)__p,bytes);
}
// FIXME: hack for the copy constructor, eventually it must be avoided
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
//void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { };
void destroy(pointer __p) { };
};
template<typename _Tp> inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
////////////////////////////////////////////////////////////////////////////////
// Device memory
////////////////////////////////////////////////////////////////////////////////
template<typename _Tp>
class devAllocator {
public:
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef _Tp* pointer;
typedef const _Tp* const_pointer;
typedef _Tp& reference;
typedef const _Tp& const_reference;
typedef _Tp value_type;
template<typename _Tp1> struct rebind { typedef devAllocator<_Tp1> other; };
devAllocator() throw() { }
devAllocator(const devAllocator&) throw() { }
template<typename _Tp1> devAllocator(const devAllocator<_Tp1>&) throw() { }
~devAllocator() throw() { }
pointer address(reference __x) const { return &__x; }
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
pointer allocate(size_type __n, const void* _p= 0)
{
size_type bytes = __n*sizeof(_Tp);
profilerAllocate(bytes);
_Tp *ptr = (_Tp*) MemoryManager::AcceleratorAllocate(bytes);
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
return ptr;
}
void deallocate(pointer __p, size_type __n)
{
size_type bytes = __n * sizeof(_Tp);
profilerFree(bytes);
MemoryManager::AcceleratorFree((void *)__p,bytes);
}
void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { };
void destroy(pointer __p) { };
};
template<typename _Tp> inline bool operator==(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool operator!=(const devAllocator<_Tp>&, const devAllocator<_Tp>&){ return false; }
////////////////////////////////////////////////////////////////////////////////
// Template typedefs
////////////////////////////////////////////////////////////////////////////////
template<class T> using commAllocator = uvmAllocator<T>;
//template<class T> using commAllocator = devAllocator<T>;
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
template<class T> using commVector = std::vector<T,uvmAllocator<T> >;
//template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
template<class T> using commVector = std::vector<T,devAllocator<T> >;
NAMESPACE_END(Grid);

View File

@ -93,12 +93,12 @@ private:
static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
static void *AcceleratorAllocate(size_t bytes);
static void AcceleratorFree (void *ptr,size_t bytes);
static void PrintBytes(void);
public:
static void Init(void);
static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes);
static void AcceleratorFree (void *ptr,size_t bytes);
static void *SharedAllocate(size_t bytes);
static void SharedFree (void *ptr,size_t bytes);
static void *CpuAllocate(size_t bytes);

View File

@ -302,60 +302,28 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
unsigned long xcrc = crc32(0L, Z_NULL, 0);
unsigned long rcrc = crc32(0L, Z_NULL, 0);
int myrank = _processor;
int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq;
MPI_Request rrq;
// Enforce no UVM in comms, device or host OK
assert(acceleratorIsCommunicable(xmit));
assert(acceleratorIsCommunicable(recv));
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
// Give the CPU to MPI immediately; can use threads to overlap optionally
// printf("proc %d SendToRecvFrom %d bytes Sendrecv \n",_processor,bytes);
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes xcrc %lx rcrc %lx\n",_processor,bytes,xcrc,rcrc); fflush
}
// Basic Halo comms primitive
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int dest,
void *recv,
@ -411,15 +379,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
return off_node_bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{
SendToRecvFromComplete(waitall);
}
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Barrier (ShmComm);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
{
int nreq=list.size();
@ -430,6 +390,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
assert(ierr==0);
list.resize(0);
}
void CartesianCommunicator::StencilBarrier(void)
{
MPI_Barrier (ShmComm);
}
//void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
//{
//}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);

View File

@ -52,23 +52,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<typename Op, typename T1>
auto Cshift(const LatticeUnaryExpression<Op,T1> &expr,int dim,int shift)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
{
return Cshift(closure(expr),dim,shift);
}
template <class Op, class T1, class T2>
auto Cshift(const LatticeBinaryExpression<Op,T1,T2> &expr,int dim,int shift)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
{
return Cshift(closure(expr),dim,shift);
}
template <class Op, class T1, class T2, class T3>
auto Cshift(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr,int dim,int shift)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
eval(0, expr.arg2),
eval(0, expr.arg3)))>
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
auto Cshift(const Expression &expr,int dim,int shift) -> decltype(closure(expr))
{
return Cshift(closure(expr),dim,shift);
}

View File

@ -76,8 +76,8 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
autoView(rhs_v , rhs, AcceleratorRead);
auto buffer_p = & buffer[0];
auto table = &Cshift_table[0];
accelerator_for(i,ent,1,{
buffer_p[table[i].first]=rhs_v[table[i].second];
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
});
}
}
@ -185,8 +185,8 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
autoView( rhs_v, rhs, AcceleratorWrite);
auto buffer_p = & buffer[0];
auto table = &Cshift_table[0];
accelerator_for(i,ent,1,{
rhs_v[table[i].first]=buffer_p[table[i].second];
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
});
}
}
@ -222,6 +222,7 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
// Test_cshift_red_black code.
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
assert(0); // This will fail if hit on GPU
autoView( rhs_v, rhs, CpuWrite);
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
@ -282,8 +283,8 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
autoView(rhs_v , rhs, AcceleratorRead);
autoView(lhs_v , lhs, AcceleratorWrite);
auto table = &Cshift_table[0];
accelerator_for(i,ent,1,{
lhs_v[table[i].first]=rhs_v[table[i].second];
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
});
}
}

View File

@ -37,6 +37,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_reduction.h>
#include <Grid/lattice/Lattice_peekpoke.h>
//#include <Grid/lattice/Lattice_reality.h>
#include <Grid/lattice/Lattice_real_imag.h>
#include <Grid/lattice/Lattice_comparison_utils.h>
#include <Grid/lattice/Lattice_comparison.h>
#include <Grid/lattice/Lattice_coordinate.h>

View File

@ -42,9 +42,24 @@ NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////
// Predicated where support
////////////////////////////////////////////////////
#ifdef GRID_SIMT
// drop to scalar in SIMT; cleaner in fact
template <class iobj, class vobj, class robj>
accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
const robj &iffalse) {
accelerator_inline vobj predicatedWhere(const iobj &predicate,
const vobj &iftrue,
const robj &iffalse)
{
Integer mask = TensorRemove(predicate);
typename std::remove_const<vobj>::type ret= iffalse;
if (mask) ret=iftrue;
return ret;
}
#else
template <class iobj, class vobj, class robj>
accelerator_inline vobj predicatedWhere(const iobj &predicate,
const vobj &iftrue,
const robj &iffalse)
{
typename std::remove_const<vobj>::type ret;
typedef typename vobj::scalar_object scalar_object;
@ -68,6 +83,7 @@ accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftru
merge(ret, falsevals);
return ret;
}
#endif
/////////////////////////////////////////////////////
//Specialization of getVectorType for lattices
@ -81,33 +97,62 @@ struct getVectorType<Lattice<T> >{
//-- recursive evaluation of expressions; --
// handle leaves of syntax tree
///////////////////////////////////////////////////
template<class sobj> accelerator_inline
template<class sobj,
typename std::enable_if<!is_lattice<sobj>::value&&!is_lattice_expr<sobj>::value,sobj>::type * = nullptr>
accelerator_inline
sobj eval(const uint64_t ss, const sobj &arg)
{
return arg;
}
template <class lobj> accelerator_inline
const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
auto eval(const uint64_t ss, const LatticeView<lobj> &arg) -> decltype(arg(ss))
{
return arg(ss);
}
////////////////////////////////////////////
//-- recursive evaluation of expressions; --
// whole vector return, used only for expression return type inference
///////////////////////////////////////////////////
template<class sobj> accelerator_inline
sobj vecEval(const uint64_t ss, const sobj &arg)
{
return arg;
}
template <class lobj> accelerator_inline
const lobj & vecEval(const uint64_t ss, const LatticeView<lobj> &arg)
{
return arg[ss];
}
// What needs this?
// Cannot be legal on accelerator
// Comparison must convert
#if 1
template <class lobj> accelerator_inline
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
{
assert(0);
auto view = arg.View(AcceleratorRead);
return view[ss];
}
#endif
///////////////////////////////////////////////////
// handle nodes in syntax tree- eval one operand
// vecEval needed (but never called as all expressions offloaded) to infer the return type
// in SIMT contexts of closure.
///////////////////////////////////////////////////
template <typename Op, typename T1> accelerator_inline
auto vecEval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
-> decltype(expr.op.func( vecEval(ss, expr.arg1)))
{
return expr.op.func( vecEval(ss, expr.arg1) );
}
// vecEval two operands
template <typename Op, typename T1, typename T2> accelerator_inline
auto vecEval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
-> decltype(expr.op.func( vecEval(ss,expr.arg1),vecEval(ss,expr.arg2)))
{
return expr.op.func( vecEval(ss,expr.arg1), vecEval(ss,expr.arg2) );
}
// vecEval three operands
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
auto vecEval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
-> decltype(expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3)))
{
return expr.op.func(vecEval(ss, expr.arg1), vecEval(ss, expr.arg2), vecEval(ss, expr.arg3));
}
///////////////////////////////////////////////////
// handle nodes in syntax tree- eval one operand coalesced
///////////////////////////////////////////////////
template <typename Op, typename T1> accelerator_inline
auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
@ -115,23 +160,41 @@ auto eval(const uint64_t ss, const LatticeUnaryExpression<Op, T1> &expr)
{
return expr.op.func( eval(ss, expr.arg1) );
}
///////////////////////
// eval two operands
///////////////////////
template <typename Op, typename T1, typename T2> accelerator_inline
auto eval(const uint64_t ss, const LatticeBinaryExpression<Op, T1, T2> &expr)
-> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2)))
{
return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) );
}
///////////////////////
// eval three operands
///////////////////////
template <typename Op, typename T1, typename T2, typename T3> accelerator_inline
auto eval(const uint64_t ss, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
-> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)))
-> decltype(expr.op.func(eval(ss, expr.arg1),
eval(ss, expr.arg2),
eval(ss, expr.arg3)))
{
return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3));
#ifdef GRID_SIMT
// Handles Nsimd (vInteger) != Nsimd(ComplexD)
typedef decltype(vecEval(ss, expr.arg2)) rvobj;
typedef typename std::remove_reference<rvobj>::type vobj;
const int Nsimd = vobj::vector_type::Nsimd();
auto vpred = vecEval(ss,expr.arg1);
ExtractBuffer<Integer> mask(Nsimd);
extract<vInteger, Integer>(TensorRemove(vpred), mask);
int s = acceleratorSIMTlane(Nsimd);
return expr.op.func(mask[s],
eval(ss, expr.arg2),
eval(ss, expr.arg3));
#else
return expr.op.func(eval(ss, expr.arg1),
eval(ss, expr.arg2),
eval(ss, expr.arg3));
#endif
}
//////////////////////////////////////////////////////////////////////////
@ -229,7 +292,7 @@ template <typename Op, typename T1, typename T2> inline
void ExpressionViewOpen(LatticeBinaryExpression<Op, T1, T2> &expr)
{
ExpressionViewOpen(expr.arg1); // recurse AST
ExpressionViewOpen(expr.arg2); // recurse AST
ExpressionViewOpen(expr.arg2); // rrecurse AST
}
template <typename Op, typename T1, typename T2, typename T3>
inline void ExpressionViewOpen(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
@ -273,9 +336,8 @@ inline void ExpressionViewClose(LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
// Unary operators and funcs
////////////////////////////////////////////
#define GridUnopClass(name, ret) \
template <class arg> \
struct name { \
static auto accelerator_inline func(const arg a) -> decltype(ret) { return ret; } \
template<class _arg> static auto accelerator_inline func(const _arg a) -> decltype(ret) { return ret; } \
};
GridUnopClass(UnarySub, -a);
@ -286,8 +348,6 @@ GridUnopClass(UnaryTrace, trace(a));
GridUnopClass(UnaryTranspose, transpose(a));
GridUnopClass(UnaryTa, Ta(a));
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
GridUnopClass(UnaryReal, real(a));
GridUnopClass(UnaryImag, imag(a));
GridUnopClass(UnaryToReal, toReal(a));
GridUnopClass(UnaryToComplex, toComplex(a));
GridUnopClass(UnaryTimesI, timesI(a));
@ -306,10 +366,10 @@ GridUnopClass(UnaryExp, exp(a));
// Binary operators
////////////////////////////////////////////
#define GridBinOpClass(name, combination) \
template <class left, class right> \
struct name { \
template <class _left, class _right> \
static auto accelerator_inline \
func(const left &lhs, const right &rhs) \
func(const _left &lhs, const _right &rhs) \
-> decltype(combination) const \
{ \
return combination; \
@ -329,10 +389,10 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
// Trinary conditional op
////////////////////////////////////////////////////
#define GridTrinOpClass(name, combination) \
template <class predicate, class left, class right> \
struct name { \
template <class _predicate,class _left, class _right> \
static auto accelerator_inline \
func(const predicate &pred, const left &lhs, const right &rhs) \
func(const _predicate &pred, const _left &lhs, const _right &rhs) \
-> decltype(combination) const \
{ \
return combination; \
@ -340,17 +400,17 @@ GridBinOpClass(BinaryOrOr, lhs || rhs);
};
GridTrinOpClass(TrinaryWhere,
(predicatedWhere<predicate,
typename std::remove_reference<left>::type,
typename std::remove_reference<right>::type>(pred, lhs,rhs)));
(predicatedWhere<
typename std::remove_reference<_predicate>::type,
typename std::remove_reference<_left>::type,
typename std::remove_reference<_right>::type>(pred, lhs,rhs)));
////////////////////////////////////////////
// Operator syntactical glue
////////////////////////////////////////////
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
#define GRID_UNOP(name) name
#define GRID_BINOP(name) name
#define GRID_TRINOP(name) name
#define GRID_DEF_UNOP(op, name) \
template <typename T1, typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value,T1>::type * = nullptr> \
@ -402,8 +462,6 @@ GRID_DEF_UNOP(trace, UnaryTrace);
GRID_DEF_UNOP(transpose, UnaryTranspose);
GRID_DEF_UNOP(Ta, UnaryTa);
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
GRID_DEF_UNOP(real, UnaryReal);
GRID_DEF_UNOP(imag, UnaryImag);
GRID_DEF_UNOP(toReal, UnaryToReal);
GRID_DEF_UNOP(toComplex, UnaryToComplex);
GRID_DEF_UNOP(timesI, UnaryTimesI);
@ -436,29 +494,36 @@ GRID_DEF_TRINOP(where, TrinaryWhere);
/////////////////////////////////////////////////////////////
template <class Op, class T1>
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))>
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> ret(expr);
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1)))> ret(expr);
return ret;
}
template <class Op, class T1, class T2>
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))>
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))>
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1),eval(0, expr.arg2)))> ret(expr);
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),vecEval(0, expr.arg2)))> ret(expr);
return ret;
}
template <class Op, class T1, class T2, class T3>
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1),
eval(0, expr.arg2),
eval(0, expr.arg3)))>
-> Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
vecEval(0, expr.arg2),
vecEval(0, expr.arg3)))>
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1),
eval(0, expr.arg2),
eval(0, expr.arg3)))> ret(expr);
Lattice<decltype(expr.op.func(vecEval(0, expr.arg1),
vecEval(0, expr.arg2),
vecEval(0, expr.arg3)))> ret(expr);
return ret;
}
#define EXPRESSION_CLOSURE(function) \
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr> \
auto function(Expression &expr) -> decltype(function(closure(expr))) \
{ \
return function(closure(expr)); \
}
#undef GRID_UNOP
#undef GRID_BINOP

View File

@ -123,9 +123,9 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
accelerator_for(ss,me.size(),1,{
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
vstream(me[ss],tmp);
coalescedWrite(me[ss],tmp);
});
me.ViewClose();
ExpressionViewClose(exprCopy);
@ -146,9 +146,9 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
accelerator_for(ss,me.size(),1,{
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
vstream(me[ss],tmp);
coalescedWrite(me[ss],tmp);
});
me.ViewClose();
ExpressionViewClose(exprCopy);
@ -168,9 +168,9 @@ public:
auto exprCopy = expr;
ExpressionViewOpen(exprCopy);
auto me = View(AcceleratorWriteDiscard);
accelerator_for(ss,me.size(),1,{
accelerator_for(ss,me.size(),vobj::Nsimd(),{
auto tmp = eval(ss,exprCopy);
vstream(me[ss],tmp);
coalescedWrite(me[ss],tmp);
});
me.ViewClose();
ExpressionViewClose(exprCopy);

View File

@ -42,34 +42,6 @@ NAMESPACE_BEGIN(Grid);
typedef iScalar<vInteger> vPredicate ;
/*
template <class iobj, class vobj, class robj> accelerator_inline
vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, const robj &iffalse)
{
typename std::remove_const<vobj>::type ret;
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
const int Nsimd = vobj::vector_type::Nsimd();
ExtractBuffer<Integer> mask(Nsimd);
ExtractBuffer<scalar_object> truevals(Nsimd);
ExtractBuffer<scalar_object> falsevals(Nsimd);
extract(iftrue, truevals);
extract(iffalse, falsevals);
extract<vInteger, Integer>(TensorRemove(predicate), mask);
for (int s = 0; s < Nsimd; s++) {
if (mask[s]) falsevals[s] = truevals[s];
}
merge(ret, falsevals);
return ret;
}
*/
//////////////////////////////////////////////////////////////////////////
// compare lattice to lattice
//////////////////////////////////////////////////////////////////////////

View File

@ -182,6 +182,14 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
return;
};
template<class vobj,class sobj>
inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site)
{
autoView(lv,l,CpuRead);
peekLocalSite(s,lv,site);
return;
};
// Must be CPU write view
template<class vobj,class sobj>
inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
@ -210,6 +218,14 @@ inline void pokeLocalSite(const sobj &s,LatticeView<vobj> &l,Coordinate &site)
return;
};
template<class vobj,class sobj>
inline void pokeLocalSite(const sobj &s, Lattice<vobj> &l,Coordinate &site)
{
autoView(lv,l,CpuWrite);
pokeLocalSite(s,lv,site);
return;
};
NAMESPACE_END(Grid);
#endif

View File

@ -0,0 +1,79 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_reality.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LATTICE_REAL_IMAG_H
#define GRID_LATTICE_REAL_IMAG_H
// FIXME .. this is the sector of the code
// I am most worried about the directions
// The choice of burying complex in the SIMD
// is making the use of "real" and "imag" very cumbersome
NAMESPACE_BEGIN(Grid);
template<class vobj> inline Lattice<vobj> real(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs.Grid());
autoView( lhs_v, lhs, AcceleratorRead);
autoView( ret_v, ret, AcceleratorWrite);
ret.Checkerboard()=lhs.Checkerboard();
accelerator_for( ss, lhs_v.size(), 1, {
ret_v[ss] =real(lhs_v[ss]);
});
return ret;
};
template<class vobj> inline Lattice<vobj> imag(const Lattice<vobj> &lhs){
Lattice<vobj> ret(lhs.Grid());
autoView( lhs_v, lhs, AcceleratorRead);
autoView( ret_v, ret, AcceleratorWrite);
ret.Checkerboard()=lhs.Checkerboard();
accelerator_for( ss, lhs_v.size(), 1, {
ret_v[ss] =imag(lhs_v[ss]);
});
return ret;
};
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
auto real(const Expression &expr) -> decltype(real(closure(expr)))
{
return real(closure(expr));
}
template<class Expression,typename std::enable_if<is_lattice_expr<Expression>::value,void>::type * = nullptr>
auto imag(const Expression &expr) -> decltype(imag(closure(expr)))
{
return imag(closure(expr));
}
NAMESPACE_END(Grid);
#endif

View File

@ -208,7 +208,7 @@ public:
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities

View File

@ -215,7 +215,7 @@ public:
LebesgueOrder LebesgueEvenOdd;
// Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
};

View File

@ -59,7 +59,7 @@ public:
}
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
return Cshift(closure(adj(Link)), mu, -1);
return Cshift(adj(Link), mu, -1);
}
static inline GaugeLinkField
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {

View File

@ -53,23 +53,21 @@ namespace PeriodicBC {
return Cshift(tmp,mu,-1);// moves towards positive mu
}
template<class gauge,typename Op, typename T1> auto
CovShiftForward(const Lattice<gauge> &Link,
int mu,
const LatticeUnaryExpression<Op,T1> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftForward(const Lattice<gauge> &Link,
int mu,
const Expr &expr) -> decltype(closure(expr))
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
auto arg = closure(expr);
return CovShiftForward(Link,mu,arg);
}
template<class gauge,typename Op, typename T1> auto
CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const LatticeUnaryExpression<Op,T1> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const Expr &expr) -> decltype(closure(expr))
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
return CovShiftForward(Link,mu,arg);
auto arg = closure(expr);
return CovShiftBackward(Link,mu,arg);
}
}
@ -142,26 +140,23 @@ namespace ConjugateBC {
return Cshift(tmp,mu,-1);// moves towards positive mu
}
template<class gauge,typename Op, typename T1> auto
CovShiftForward(const Lattice<gauge> &Link,
int mu,
const LatticeUnaryExpression<Op,T1> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftForward(const Lattice<gauge> &Link,
int mu,
const Expr &expr) -> decltype(closure(expr))
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
auto arg = closure(expr);
return CovShiftForward(Link,mu,arg);
}
template<class gauge,typename Op, typename T1> auto
CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const LatticeUnaryExpression<Op,T1> &expr)
-> Lattice<decltype(expr.op.func(eval(0, expr.arg1)))>
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
auto CovShiftBackward(const Lattice<gauge> &Link,
int mu,
const Expr &expr) -> decltype(closure(expr))
{
Lattice<decltype(expr.op.func(eval(0, expr.arg1)))> arg(expr);
return CovShiftForward(Link,mu,arg);
auto arg = closure(expr);
return CovShiftBackward(Link,mu,arg);
}
}

View File

@ -39,7 +39,7 @@ public:
typedef iSUnAdjointMatrix<ComplexF> AMatrixF;
typedef iSUnAdjointMatrix<ComplexD> AMatrixD;
typedef iSUnAdjointMatrix<vComplex> vAMatrix;
typedef iSUnAdjointMatrix<vComplex> vAMatrix;
typedef iSUnAdjointMatrix<vComplexF> vAMatrixF;
typedef iSUnAdjointMatrix<vComplexD> vAMatrixD;
@ -47,14 +47,9 @@ public:
typedef Lattice<vAMatrixF> LatticeAdjMatrixF;
typedef Lattice<vAMatrixD> LatticeAdjMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeAdjField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeAdjFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeAdjFieldD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> > LatticeAdjField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> > LatticeAdjFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> > LatticeAdjFieldD;
template <class cplx>
@ -128,7 +123,9 @@ public:
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) {
static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0)
{
conformable(h_out, in);
h_out = Zero();
AMatrix iTa;
@ -136,7 +133,7 @@ public:
for (int a = 0; a < Dimension; a++) {
generator(a, iTa);
auto tmp = real(trace(iTa * in)) * coefficient;
LatticeComplex tmp = real(trace(iTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}

View File

@ -485,7 +485,7 @@ public:
// Up staple ___ ___
// | |
tmp = Cshift(closure(adj(U[nu])), nu, -1);
tmp = Cshift(adj(U[nu]), nu, -1);
tmp = adj(U2[mu]) * tmp;
tmp = Cshift(tmp, mu, -2);
@ -519,7 +519,7 @@ public:
//
// | |
tmp = Cshift(closure(adj(U2[nu])), nu, -2);
tmp = Cshift(adj(U2[nu]), nu, -2);
tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp);
tmp = U2[nu] * Cshift(tmp, nu, 2);
Stap += Cshift(tmp, mu, 1);

View File

@ -93,6 +93,11 @@ accelerator_inline ComplexF pow(const ComplexF& r,RealF y){ return(std::pow(r,y)
using std::abs;
using std::pow;
using std::sqrt;
using std::log;
using std::exp;
using std::sin;
using std::cos;
accelerator_inline RealF conjugate(const RealF & r){ return r; }
accelerator_inline RealD conjugate(const RealD & r){ return r; }

View File

@ -190,6 +190,36 @@ NAMESPACE_BEGIN(Grid);
typedef ComplexD DoublePrecision;
typedef ComplexD DoublePrecision2;
};
#ifdef GRID_CUDA
template<> struct GridTypeMapper<std::complex<float> > : public GridTypeMapper_Base {
typedef std::complex<float> scalar_type;
typedef std::complex<double> scalar_typeD;
typedef scalar_type vector_type;
typedef scalar_typeD vector_typeD;
typedef scalar_type tensor_reduced;
typedef scalar_type scalar_object;
typedef scalar_typeD scalar_objectD;
typedef scalar_type Complexified;
typedef RealF Realified;
typedef scalar_typeD DoublePrecision;
typedef scalar_typeD DoublePrecision2;
};
template<> struct GridTypeMapper<std::complex<double> > : public GridTypeMapper_Base {
typedef std::complex<double> scalar_type;
typedef std::complex<double> scalar_typeD;
typedef scalar_type vector_type;
typedef scalar_typeD vector_typeD;
typedef scalar_type tensor_reduced;
typedef scalar_type scalar_object;
typedef scalar_typeD scalar_objectD;
typedef scalar_type Complexified;
typedef RealD Realified;
typedef scalar_typeD DoublePrecision;
typedef scalar_typeD DoublePrecision2;
};
#endif
template<> struct GridTypeMapper<ComplexD2> : public GridTypeMapper_Base {
typedef ComplexD2 scalar_type;
typedef ComplexD2 scalar_typeD;

View File

@ -70,6 +70,7 @@ NAMESPACE_BEGIN(Grid);
//
// Memory management:
//
// int acceleratorIsCommunicable(void *pointer);
// void *acceleratorAllocShared(size_t bytes);
// void acceleratorFreeShared(void *ptr);
//
@ -90,6 +91,7 @@ void acceleratorInit(void);
//////////////////////////////////////////////
#ifdef GRID_CUDA
#include <cuda.h>
#ifdef __CUDA_ARCH__
#define GRID_SIMT
@ -165,6 +167,16 @@ inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
inline int acceleratorIsCommunicable(void *ptr)
{
int uvm;
auto
cuerr = cuPointerGetAttribute( &uvm, CU_POINTER_ATTRIBUTE_IS_MANAGED, (CUdeviceptr) ptr);
assert(cuerr == cudaSuccess );
if(uvm) return 0;
else return 1;
}
#endif
//////////////////////////////////////////////
@ -219,6 +231,15 @@ inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();}
inline int acceleratorIsCommunicable(void *ptr)
{
#if 0
auto uvm = cl::sycl::usm::get_pointer_type(ptr, theGridAccelerator->get_context());
if ( uvm = cl::sycl::usm::alloc::shared ) return 1;
else return 0;
#endif
return 1;
}
#endif
@ -294,6 +315,7 @@ inline void *acceleratorAllocShared(size_t bytes)
}
return ptr;
};
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
inline void *acceleratorAllocDevice(size_t bytes)
{
@ -348,6 +370,7 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { memcpy(to,from,bytes);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ memcpy(to,from,bytes);}
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
#ifdef HAVE_MM_MALLOC_H
inline void *acceleratorAllocShared(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
inline void *acceleratorAllocDevice(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};

View File

@ -99,10 +99,10 @@ inline std::ostream & operator<<(std::ostream &os, const AcceleratorVector<T,_nd
{
os << "[";
for(int s=0;s<v.size();s++) {
os << v[s] << " ";
}
if (v.size() > 0) {
os << "\b";
os << v[s];
if( s < (v.size()-1) ){
os << " ";
}
}
os << "]";
return os;

View File

@ -74,90 +74,6 @@ int main (int argc, char ** argv)
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header();
for(int lat=8;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=8;Ls*=2){
Coordinate latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<Vector<HalfSpinColourVectorD> > xbuf(8);
std::vector<Vector<HalfSpinColourVectorD> > rbuf(8);
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int mu=0;mu<8;mu++){
xbuf[mu].resize(lat*lat*lat*Ls);
rbuf[mu].resize(lat*lat*lat*Ls);
// std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
}
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.SendToRecvFromComplete(requests);
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
timestat.statistics(t_time);
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
@ -206,26 +122,22 @@ int main (int argc, char ** argv)
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
Grid.SendToRecvFromComplete(requests);
Grid.SendToRecvFrom((void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
}
comm_proc = mpi_layout[mu]-1;
{
std::vector<CommsRequest_t> requests;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
Grid.SendToRecvFromComplete(requests);
Grid.SendToRecvFrom((void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
}

View File

@ -89,6 +89,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage;
std::cout << latt_size;
std::cout << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);

View File

@ -154,6 +154,7 @@ AC_ARG_ENABLE([accelerator],
case ${ac_ACCELERATOR} in
cuda)
echo CUDA acceleration
LIBS="${LIBS} -lcuda"
AC_DEFINE([GRID_CUDA],[1],[Use CUDA offload]);;
sycl)
echo SYCL acceleration
@ -323,7 +324,6 @@ case ${CXXTEST} in
# CXXLD="nvcc -v -link"
CXX="${CXXBASE} -x cu "
CXXLD="${CXXBASE} -link"
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
@ -484,8 +484,7 @@ case ${ac_SHM} in
LDFLAGS_CPY=$LDFLAGS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_SEARCH_LIBS([shm_unlink], [rt], [],
[AC_MSG_ERROR("no library found for shm_unlink")])
AC_SEARCH_LIBS([shm_unlink], [rt], [],[AC_MSG_ERROR("no library found for shm_unlink")])
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
;;

View File

@ -47,14 +47,14 @@ public:
bool , b,
std::vector<double>, array,
std::vector<std::vector<double> >, twodimarray,
std::vector<std::vector<std::vector<Complex> > >, cmplx3darray,
std::vector<std::vector<std::vector<std::complex<double>> > >, cmplx3darray,
SpinColourMatrix, scm
);
myclass() {}
myclass(int i)
: array(4,5.1)
, twodimarray(3,std::vector<double>(5, 1.23456))
, cmplx3darray(3,std::vector<std::vector<Complex>>(5, std::vector<Complex>(7, Complex(1.2, 3.4))))
, cmplx3darray(3,std::vector<std::vector<std::complex<double>>>(5, std::vector<std::complex<double>>(7, std::complex<double>(1.2, 3.4))))
, ve(2, myenum::blue)
{
e=myenum::red;
@ -121,11 +121,11 @@ namespace Eigen {
// Perform I/O tests on a range of tensor types
// Test coverage: scalars, complex and GridVectors in single, double and default precision
class TensorIO : public Serializable {
using TestScalar = ComplexD;
using TestScalar = std::complex<double>;
using SR3 = Eigen::Sizes<9,4,2>;
using SR5 = Eigen::Sizes<5,4,3,2,1>;
using ESO = Eigen::StorageOptions;
using TensorRank3 = Eigen::Tensor<ComplexF, 3, ESO::RowMajor>;
using TensorRank3 = Eigen::Tensor<std::complex<float>, 3, ESO::RowMajor>;
using TensorR5 = Eigen::TensorFixedSize<Real, SR5>;
using TensorR5Alt = Eigen::TensorFixedSize<Real, SR5, ESO::RowMajor>;
using Tensor942 = Eigen::TensorFixedSize<TestScalar, SR3, ESO::RowMajor>;
@ -134,8 +134,8 @@ class TensorIO : public Serializable {
using LSCTensor = Eigen::TensorFixedSize<SpinColourMatrix, Eigen::Sizes<6,5>>;
static const Real FlagR;
static const Complex Flag;
static const ComplexF FlagF;
static const std::complex<double> Flag;
static const std::complex<float> FlagF;
static const TestScalar FlagTS;
static const char * const pszFilePrefix;
@ -230,8 +230,8 @@ public:
};
const Real TensorIO::FlagR {1};
const Complex TensorIO::Flag {1,-1};
const ComplexF TensorIO::FlagF {1,-1};
const std::complex<double> TensorIO::Flag {1,-1};
const std::complex<float> TensorIO::FlagF {1,-1};
const TensorIO::TestScalar TensorIO::FlagTS{1,-1};
const char * const TensorIO::pszFilePrefix = "tensor_";

View File

@ -137,7 +137,6 @@ int main(int argc, char **argv) {
LatticeReal iscalar(&Fine);
SpinMatrix GammaFive;
iSpinMatrix<vComplex> iGammaFive;
ColourMatrix cmat;
random(FineRNG, Foo);
@ -283,7 +282,6 @@ int main(int argc, char **argv) {
cMat = mydouble * cMat;
sMat = adj(sMat); // LatticeSpinMatrix adjoint
sMat = iGammaFive * sMat; // SpinMatrix * LatticeSpinMatrix
sMat = GammaFive * sMat; // SpinMatrix * LatticeSpinMatrix
scMat = adj(scMat);
cMat = adj(cMat);

View File

@ -261,11 +261,11 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,phi_o,phi);
SchurDiagMooeeOperator<NaiveStaggeredFermionR,FermionField> HermOpEO(Ds);
HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
HermOpEO.MpcDagMpc(chi_e,dchi_e);
HermOpEO.MpcDagMpc(chi_o,dchi_o);
HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2);
HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2);
HermOpEO.MpcDagMpc(phi_e,dphi_e);
HermOpEO.MpcDagMpc(phi_o,dphi_o);
pDce = innerProduct(phi_e,dchi_e);
pDco = innerProduct(phi_o,dchi_o);

80
tests/core/Test_where.cc Normal file
View File

@ -0,0 +1,80 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_poisson_fft.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int N=16;
std::vector<int> latt_size ({N,4,4});
std::vector<int> simd_layout({vComplexD::Nsimd(),1,1});
std::vector<int> mpi_layout ({1,1,1});
int vol = 1;
int nd = latt_size.size();
for(int d=0;d<nd;d++){
vol = vol * latt_size[d];
}
GridCartesian GRID(latt_size,simd_layout,mpi_layout);
LatticeComplexD zz(&GRID);
LatticeInteger coor(&GRID);
LatticeComplexD rn(&GRID);
LatticeComplexD sl(&GRID);
zz = ComplexD(0.0,0.0);
GridParallelRNG RNG(&GRID);
RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
gaussian(RNG,rn);
RealD nn=norm2(rn);
for(int mu=0;mu<nd;mu++){
RealD ns=0.0;
for(int t=0;t<latt_size[mu];t++){
LatticeCoordinate(coor,mu);
sl=where(coor==Integer(t),rn,zz);
std::cout <<GridLogMessage<< " sl " << sl<<std::endl;
std::cout <<GridLogMessage<<" slice "<<t<<" " << norm2(sl)<<std::endl;
ns=ns+norm2(sl);
}
std::cout <<GridLogMessage <<" sliceNorm" <<mu<<" "<< nn <<" "<<ns<<" " << nn-ns<<std::endl;
}
Grid_finalize();
}

View File

@ -122,7 +122,7 @@ int main (int argc, char ** argv)
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
Subspace Aggregates(Coarse5d,FGrid,cb);
Aggregates.CreateSubspaceRandom(RNG5);
// Aggregates.CreateSubspaceRandom(RNG5);
subspace=Aggregates.subspace;
@ -163,7 +163,7 @@ int main (int argc, char ** argv)
LittleDiracOp.M(c_src,c_res);
std::cout<<GridLogMessage<<"Testing hermiticity explicitly by inspecting matrix elements"<<std::endl;
LittleDiracOp.AssertHermitian();
// LittleDiracOp.AssertHermitian();
std::cout<<GridLogMessage << "Testing Hermiticity stochastically "<< std::endl;
CoarseVector phi(Coarse5d);

View File

@ -311,7 +311,7 @@ int main (int argc, char ** argv) {
std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl;
assert(Nm2 >= Nm1);
const int nbasis= 70;
const int nbasis= 32;
CoarseFineIRL<vSpinColourVector,vTComplex,nbasis> IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd);
std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl;

View File

@ -0,0 +1,420 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_hdcr.cc
Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
using namespace std;
using namespace Grid;
/* Params
* Grid:
* block1(4)
* block2(4)
*
* Subspace
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
* Smoother:
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
* Lanczos:
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
*/
RealD InverseApproximation(RealD x){
return 1.0/x;
}
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
Chebyshev<Field> Cheby;
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
_SmootherOperator(SmootherOperator),
_SmootherMatrix(SmootherMatrix),
Cheby(_lo,_hi,_ord,InverseApproximation)
{};
void operator() (const Field &in, Field &out)
{
Field tmp(in.Grid());
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
_SmootherOperator.AdjOp(in,tmp);
Cheby(MdagMOp,tmp,out);
}
};
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
RealD tol;
RealD shift;
int maxit;
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
shift(_shift),tol(_tol),maxit(_maxit),
SmootherOperator(_SmootherOperator),
SmootherMatrix(_SmootherMatrix)
{};
void operator() (const Field &in, Field &out)
{
ZeroGuesser<Field> Guess;
ConjugateGradient<Field> CG(tol,maxit,false);
Field src(in.Grid());
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
SmootherOperator.AdjOp(in,src);
Guess(src,out);
CG(MdagMOp,src,out);
}
};
template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field>
{
public:
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
RealD tol;
RealD shift;
int maxit;
RedBlackSmoother(RealD _shift,RealD _tol,int _maxit,Matrix &_SmootherMatrix) :
shift(_shift),tol(_tol),maxit(_maxit),
SmootherMatrix(_SmootherMatrix)
{};
void operator() (const Field &in, Field &out)
{
std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
ConjugateGradient<Field> CG(tol,maxit,false);
out =Zero();
SchurRedBlackDiagMooeeSolve<Field> RBSolver(CG);
RBSolver(SmootherMatrix,in,out);
std::cout << " Red Black Smootheer "<<norm2(in)<<" " <<norm2(out)<<std::endl;
}
};
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
typedef LinearOperatorBase<FineField> FineOperator;
typedef LinearFunction <FineField> FineSmoother;
Aggregates & _Aggregates;
CoarseOperator & _CoarseOperator;
Matrix & _FineMatrix;
FineOperator & _FineOperator;
Guesser & _Guess;
FineSmoother & _Smoother1;
FineSmoother & _Smoother2;
CoarseSolver & _CoarseSolve;
int level; void Level(int lv) {level = lv; };
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
FineOperator &Fine,Matrix &FineMatrix,
FineSmoother &Smoother1,
FineSmoother &Smoother2,
Guesser &Guess_,
CoarseSolver &CoarseSolve_)
: _Aggregates(Agg),
_CoarseOperator(Coarse),
_FineOperator(Fine),
_FineMatrix(FineMatrix),
_Smoother1(Smoother1),
_Smoother2(Smoother2),
_Guess(Guess_),
_CoarseSolve(CoarseSolve_),
level(1) { }
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
FineOperator &Fine,Matrix &FineMatrix,
FineSmoother &Smoother,
Guesser &Guess_,
CoarseSolver &CoarseSolve_)
: _Aggregates(Agg),
_CoarseOperator(Coarse),
_FineOperator(Fine),
_FineMatrix(FineMatrix),
_Smoother1(Smoother),
_Smoother2(Smoother),
_Guess(Guess_),
_CoarseSolve(CoarseSolve_),
level(1) { }
virtual void operator()(const FineField &in, FineField & out)
{
CoarseVector Csrc(_CoarseOperator.Grid());
CoarseVector Csol(_CoarseOperator.Grid());
FineField vec1(in.Grid());
FineField vec2(in.Grid());
double t;
// Fine Smoother
t=-usecond();
_Smoother1(in,out);
t+=usecond();
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
// Update the residual
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
// Fine to Coarse
t=-usecond();
_Aggregates.ProjectToSubspace (Csrc,vec1);
t+=usecond();
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
// Coarse correction
t=-usecond();
_CoarseSolve(Csrc,Csol);
t+=usecond();
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
// Coarse to Fine
t=-usecond();
_Aggregates.PromoteFromSubspace(Csol,vec1);
add(out,out,vec1);
t+=usecond();
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
// Residual
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
// Fine Smoother
t=-usecond();
_Smoother2(vec1,vec2);
t+=usecond();
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
add( out,out,vec2);
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int Ls=16;
const int rLs=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
///////////////////////////////////////////////////
// Construct a coarsened grid; utility for this?
///////////////////////////////////////////////////
std::vector<int> block ({2,2,2,2});
const int nbasis= 32;
auto clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/block[d];
}
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::vector<int> cseeds({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
LatticeFermion result(FGrid);
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
std::string file("./ckpoint_lat");
NerscIO::readConfiguration(Umu,header,file);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
RealD mass=0.001;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
typedef CoarseOperator::CoarseVector CoarseVector;
typedef CoarseOperator::siteVector siteVector;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
Subspace Aggregates(Coarse5d,FGrid,0);
assert ( (nbasis & 0x1)==0);
{
int nb=nbasis/2;
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,100,0.0);// 18s
// rAggregates.CreateSubspaceChebyshev(RNG5,rHermDefOp,nb,60.0,0.05,500,200,150,0.0);// 15.7 23iter
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,150,0.0);//
// pad out the rAggregates.
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,150,0.0);// 19s
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,200,0.0); 15.2s
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,500,200,0.0); 16.3s
for(int n=0;n<nb;n++){
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
}
LatticeFermion A(FGrid);
LatticeFermion B(FGrid);
for(int n=0;n<nb;n++){
A = Aggregates.subspace[n];
B = Aggregates.subspace[n+nb];
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
}
}
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<Level1Op,CoarseVector> IRLHermOp(LDOp);
Chebyshev<CoarseVector> IRLCheby(0.002,12.,151);
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,IRLHermOp);
PlainHermOp<CoarseVector> IRLOp (IRLHermOp);
int Nk=48;
int Nm=64;
int Nstop=48;
int Nconv;
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
std::vector<RealD> eval(Nm);
std::vector<CoarseVector> evec(Nm,Coarse5d);
CoarseVector c_src(Coarse5d);
gaussian(CRNG,c_src);
IRL.calc(eval,evec,c_src,Nconv);
// ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
ConjugateGradient<CoarseVector> CoarseCG(0.02,1000);// 14.7s
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
c_src=1.0;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building 2 level Multigrid "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
// MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 72 iter 63s
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.1,60.0,20,HermIndefOp,Ddwf); // 66 iter 69s
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,20,HermIndefOp,Ddwf); // 63 iter 65 s
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,20,HermIndefOp,Ddwf); // 69, 70
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(1.0,60.0,14,HermIndefOp,Ddwf); // 77
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); // 23 iter 15.9s
// ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,14,HermIndefOp,Ddwf); // 20, 16.9s
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf); // 21, 15.6s
// MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.05,0.01,20,HermIndefOp,Ddwf);
// RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf);
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
HermIndefOp,Ddwf,
FineSmoother,
DeflCoarseGuesser,
DeflCoarseCGNE);
TwoLevelPrecon.Level(1);
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,TwoLevelPrecon,16,16);
l1PGCR.Level(1);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
result=Zero();
l1PGCR(src,result);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Fine CG prec DiagMooee "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
ConjugateGradient<LatticeFermion> FineCG(1.0e-8,10000);
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe
LatticeFermion f_src_e(FrbGrid); f_src_e=1.0;
LatticeFermion f_res_e(FrbGrid); f_res_e=Zero();
FineCG(FineDiagMooee,f_src_e,f_res_e);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Done "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
Grid_finalize();
}