mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 21:25:56 +01:00
commit
97703b181b
@ -0,0 +1,241 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_PREC_GCR_NON_HERM_H
|
||||||
|
#define GRID_PREC_GCR_NON_HERM_H
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//VPGCR Abe and Zhang, 2005.
|
||||||
|
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||||
|
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||||
|
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||||
|
// but VPGCR was nicely written up in their paper
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
|
||||||
|
|
||||||
|
template<class Field>
|
||||||
|
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterations;
|
||||||
|
int verbose;
|
||||||
|
int mmax;
|
||||||
|
int nstep;
|
||||||
|
int steps;
|
||||||
|
int level;
|
||||||
|
GridStopWatch PrecTimer;
|
||||||
|
GridStopWatch MatTimer;
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
|
||||||
|
LinearFunction<Field> &Preconditioner;
|
||||||
|
LinearOperatorBase<Field> &Linop;
|
||||||
|
|
||||||
|
void Level(int lv) { level=lv; };
|
||||||
|
|
||||||
|
PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||||
|
Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
Linop(_Linop),
|
||||||
|
Preconditioner(Prec),
|
||||||
|
mmax(_mmax),
|
||||||
|
nstep(_nstep)
|
||||||
|
{
|
||||||
|
level=1;
|
||||||
|
verbose=1;
|
||||||
|
};
|
||||||
|
|
||||||
|
void operator() (const Field &src, Field &psi){
|
||||||
|
|
||||||
|
psi=Zero();
|
||||||
|
RealD cp, ssq,rsq;
|
||||||
|
ssq=norm2(src);
|
||||||
|
rsq=Tolerance*Tolerance*ssq;
|
||||||
|
|
||||||
|
Field r(src.Grid());
|
||||||
|
|
||||||
|
PrecTimer.Reset();
|
||||||
|
MatTimer.Reset();
|
||||||
|
LinalgTimer.Reset();
|
||||||
|
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
|
steps=0;
|
||||||
|
for(int k=0;k<MaxIterations;k++){
|
||||||
|
|
||||||
|
cp=GCRnStep(src,psi,rsq);
|
||||||
|
|
||||||
|
GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
|
||||||
|
|
||||||
|
if(cp<rsq) {
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
Linop.Op(psi,r);
|
||||||
|
axpy(r,-1.0,src,r);
|
||||||
|
RealD tr = norm2(r);
|
||||||
|
GCRLogLevel<<"PGCR: Converged on iteration " <<steps
|
||||||
|
<< " computed residual "<<sqrt(cp/ssq)
|
||||||
|
<< " true residual " <<sqrt(tr/ssq)
|
||||||
|
<< " target " <<Tolerance <<std::endl;
|
||||||
|
|
||||||
|
GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||||
|
// assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
|
||||||
|
|
||||||
|
RealD cp;
|
||||||
|
ComplexD a, b, zAz;
|
||||||
|
RealD zAAz;
|
||||||
|
ComplexD rq;
|
||||||
|
|
||||||
|
GridBase *grid = src.Grid();
|
||||||
|
|
||||||
|
Field r(grid);
|
||||||
|
Field z(grid);
|
||||||
|
Field tmp(grid);
|
||||||
|
Field ttmp(grid);
|
||||||
|
Field Az(grid);
|
||||||
|
|
||||||
|
////////////////////////////////
|
||||||
|
// history for flexible orthog
|
||||||
|
////////////////////////////////
|
||||||
|
std::vector<Field> q(mmax,grid);
|
||||||
|
std::vector<Field> p(mmax,grid);
|
||||||
|
std::vector<RealD> qq(mmax);
|
||||||
|
|
||||||
|
GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
|
||||||
|
|
||||||
|
//////////////////////////////////
|
||||||
|
// initial guess x0 is taken as nonzero.
|
||||||
|
// r0=src-A x0 = src
|
||||||
|
//////////////////////////////////
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(psi,Az);
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
r=src-Az;
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
|
||||||
|
|
||||||
|
/////////////////////
|
||||||
|
// p = Prec(r)
|
||||||
|
/////////////////////
|
||||||
|
|
||||||
|
PrecTimer.Start();
|
||||||
|
Preconditioner(r,z);
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(z,Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
|
||||||
|
//p[0],q[0],qq[0]
|
||||||
|
p[0]= z;
|
||||||
|
q[0]= Az;
|
||||||
|
qq[0]= zAAz;
|
||||||
|
|
||||||
|
cp =norm2(r);
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
|
for(int k=0;k<nstep;k++){
|
||||||
|
|
||||||
|
steps++;
|
||||||
|
|
||||||
|
int kp = k+1;
|
||||||
|
int peri_k = k %mmax;
|
||||||
|
int peri_kp= kp%mmax;
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
rq= innerProduct(q[peri_k],r); // what if rAr not real?
|
||||||
|
a = rq/qq[peri_k];
|
||||||
|
|
||||||
|
axpy(psi,a,p[peri_k],psi);
|
||||||
|
|
||||||
|
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
|
||||||
|
GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
|
||||||
|
|
||||||
|
if((k==nstep-1)||(cp<rsq)){
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PrecTimer.Start();
|
||||||
|
Preconditioner(r,z);// solve Az = r
|
||||||
|
PrecTimer.Stop();
|
||||||
|
|
||||||
|
MatTimer.Start();
|
||||||
|
Linop.Op(z,Az);
|
||||||
|
MatTimer.Stop();
|
||||||
|
zAz = innerProduct(Az,psi);
|
||||||
|
zAAz= norm2(Az);
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
|
||||||
|
q[peri_kp]=Az;
|
||||||
|
p[peri_kp]=z;
|
||||||
|
|
||||||
|
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||||
|
for(int back=0;back<northog;back++){
|
||||||
|
|
||||||
|
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||||
|
|
||||||
|
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||||
|
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||||
|
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||||
|
|
||||||
|
}
|
||||||
|
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
}
|
||||||
|
assert(0); // never reached
|
||||||
|
return cp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
#endif
|
@ -52,41 +52,79 @@ public:
|
|||||||
pointer allocate(size_type __n, const void* _p= 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
|
|
||||||
profilerAllocate(bytes);
|
profilerAllocate(bytes);
|
||||||
|
|
||||||
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
_Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
|
||||||
|
|
||||||
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(pointer __p, size_type __n)
|
void deallocate(pointer __p, size_type __n)
|
||||||
{
|
{
|
||||||
size_type bytes = __n * sizeof(_Tp);
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
|
|
||||||
profilerFree(bytes);
|
profilerFree(bytes);
|
||||||
|
|
||||||
MemoryManager::CpuFree((void *)__p,bytes);
|
MemoryManager::CpuFree((void *)__p,bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||||
|
//void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||||
|
void construct(pointer __p, const _Tp& __val) { assert(0);};
|
||||||
|
void construct(pointer __p) { };
|
||||||
|
void destroy(pointer __p) { };
|
||||||
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
template<typename _Tp>
|
||||||
|
class uvmAllocator {
|
||||||
|
public:
|
||||||
|
typedef std::size_t size_type;
|
||||||
|
typedef std::ptrdiff_t difference_type;
|
||||||
|
typedef _Tp* pointer;
|
||||||
|
typedef const _Tp* const_pointer;
|
||||||
|
typedef _Tp& reference;
|
||||||
|
typedef const _Tp& const_reference;
|
||||||
|
typedef _Tp value_type;
|
||||||
|
|
||||||
|
template<typename _Tp1> struct rebind { typedef uvmAllocator<_Tp1> other; };
|
||||||
|
uvmAllocator() throw() { }
|
||||||
|
uvmAllocator(const uvmAllocator&) throw() { }
|
||||||
|
template<typename _Tp1> uvmAllocator(const uvmAllocator<_Tp1>&) throw() { }
|
||||||
|
~uvmAllocator() throw() { }
|
||||||
|
pointer address(reference __x) const { return &__x; }
|
||||||
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
size_type bytes = __n*sizeof(_Tp);
|
||||||
|
profilerAllocate(bytes);
|
||||||
|
_Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
|
||||||
|
assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(pointer __p, size_type __n)
|
||||||
|
{
|
||||||
|
size_type bytes = __n * sizeof(_Tp);
|
||||||
|
profilerFree(bytes);
|
||||||
|
MemoryManager::SharedFree((void *)__p,bytes);
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: hack for the copy constructor, eventually it must be avoided
|
// FIXME: hack for the copy constructor, eventually it must be avoided
|
||||||
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); };
|
||||||
//void construct(pointer __p, const _Tp& __val) { };
|
//void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
void destroy(pointer __p) { };
|
void destroy(pointer __p) { };
|
||||||
};
|
};
|
||||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
template<typename _Tp> inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; }
|
||||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
template<typename _Tp> inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Template typedefs
|
// Template typedefs
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class T> using commAllocator = alignedAllocator<T>;
|
template<class T> using commAllocator = uvmAllocator<T>;
|
||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
template<class T> using Vector = std::vector<T,uvmAllocator<T> >;
|
||||||
template<class T> using commVector = std::vector<T,alignedAllocator<T> >;
|
template<class T> using commVector = std::vector<T,uvmAllocator<T> >;
|
||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
//template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -7,13 +7,24 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define CpuSmall (1)
|
#define CpuSmall (1)
|
||||||
#define Acc (2)
|
#define Acc (2)
|
||||||
#define AccSmall (3)
|
#define AccSmall (3)
|
||||||
|
#define Shared (4)
|
||||||
|
#define SharedSmall (5)
|
||||||
|
uint64_t total_shared;
|
||||||
|
uint64_t total_device;
|
||||||
|
uint64_t total_host;;
|
||||||
|
void MemoryManager::PrintBytes(void)
|
||||||
|
{
|
||||||
|
std::cout << " MemoryManager : "<<total_shared<<" shared bytes "<<std::endl;
|
||||||
|
std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
|
||||||
|
std::cout << " MemoryManager : "<<total_host <<" cpu bytes "<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Data tables for recently freed pooiniter caches
|
// Data tables for recently freed pooiniter caches
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
||||||
int MemoryManager::Victim[MemoryManager::NallocType];
|
int MemoryManager::Victim[MemoryManager::NallocType];
|
||||||
int MemoryManager::Ncache[MemoryManager::NallocType];
|
int MemoryManager::Ncache[MemoryManager::NallocType] = { 8, 32, 8, 32, 8, 32 };
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Actual allocation and deallocation utils
|
// Actual allocation and deallocation utils
|
||||||
@ -21,49 +32,86 @@ int MemoryManager::Ncache[MemoryManager::NallocType];
|
|||||||
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
||||||
{
|
{
|
||||||
void *ptr = (void *) Lookup(bytes,Acc);
|
void *ptr = (void *) Lookup(bytes,Acc);
|
||||||
|
|
||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocDevice(bytes);
|
ptr = (void *) acceleratorAllocDevice(bytes);
|
||||||
// std::cout <<"AcceleratorAllocate: allocated Accelerator pointer "<<std::hex<<ptr<<std::endl;
|
total_device+=bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
||||||
{
|
{
|
||||||
void *__freeme = Insert(ptr,bytes,Acc);
|
void *__freeme = Insert(ptr,bytes,Acc);
|
||||||
|
if ( __freeme ) {
|
||||||
if ( __freeme ) acceleratorFreeDevice(__freeme);
|
acceleratorFreeDevice(__freeme);
|
||||||
|
total_device-=bytes;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
void *MemoryManager::SharedAllocate(size_t bytes)
|
||||||
|
{
|
||||||
|
void *ptr = (void *) Lookup(bytes,Shared);
|
||||||
|
if ( ptr == (void *) NULL ) {
|
||||||
|
ptr = (void *) acceleratorAllocShared(bytes);
|
||||||
|
total_shared+=bytes;
|
||||||
|
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void MemoryManager::SharedFree (void *ptr,size_t bytes)
|
||||||
|
{
|
||||||
|
void *__freeme = Insert(ptr,bytes,Shared);
|
||||||
|
if ( __freeme ) {
|
||||||
|
acceleratorFreeShared(__freeme);
|
||||||
|
total_shared-=bytes;
|
||||||
|
// PrintBytes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef GRID_UVM
|
||||||
void *MemoryManager::CpuAllocate(size_t bytes)
|
void *MemoryManager::CpuAllocate(size_t bytes)
|
||||||
{
|
{
|
||||||
void *ptr = (void *) Lookup(bytes,Cpu);
|
void *ptr = (void *) Lookup(bytes,Cpu);
|
||||||
|
|
||||||
if ( ptr == (void *) NULL ) {
|
if ( ptr == (void *) NULL ) {
|
||||||
ptr = (void *) acceleratorAllocShared(bytes);
|
ptr = (void *) acceleratorAllocShared(bytes);
|
||||||
// std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::endl;
|
total_host+=bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
||||||
{
|
{
|
||||||
NotifyDeletion(_ptr);
|
NotifyDeletion(_ptr);
|
||||||
|
|
||||||
// If present remove entry and free accelerator too.
|
|
||||||
// Can we ever hit a free event with a view still in scope?
|
|
||||||
void *__freeme = Insert(_ptr,bytes,Cpu);
|
void *__freeme = Insert(_ptr,bytes,Cpu);
|
||||||
if ( __freeme ) acceleratorFreeShared(__freeme);
|
if ( __freeme ) {
|
||||||
|
acceleratorFreeShared(__freeme);
|
||||||
|
total_host-=bytes;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
void *MemoryManager::CpuAllocate(size_t bytes)
|
||||||
|
{
|
||||||
|
void *ptr = (void *) Lookup(bytes,Cpu);
|
||||||
|
if ( ptr == (void *) NULL ) {
|
||||||
|
ptr = (void *) acceleratorAllocCpu(bytes);
|
||||||
|
total_host+=bytes;
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
||||||
|
{
|
||||||
|
NotifyDeletion(_ptr);
|
||||||
|
void *__freeme = Insert(_ptr,bytes,Cpu);
|
||||||
|
if ( __freeme ) {
|
||||||
|
acceleratorFreeCpu(__freeme);
|
||||||
|
total_host-=bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
// call only once
|
// call only once
|
||||||
//////////////////////////////////////////
|
//////////////////////////////////////////
|
||||||
void MemoryManager::Init(void)
|
void MemoryManager::Init(void)
|
||||||
{
|
{
|
||||||
Ncache[Cpu] = 8;
|
|
||||||
Ncache[Acc] = 8;
|
|
||||||
Ncache[CpuSmall] = 32;
|
|
||||||
Ncache[AccSmall] = 32;
|
|
||||||
|
|
||||||
char * str;
|
char * str;
|
||||||
int Nc;
|
int Nc;
|
||||||
@ -75,6 +123,7 @@ void MemoryManager::Init(void)
|
|||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||||
Ncache[Cpu]=Nc;
|
Ncache[Cpu]=Nc;
|
||||||
Ncache[Acc]=Nc;
|
Ncache[Acc]=Nc;
|
||||||
|
Ncache[Shared]=Nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,6 +133,7 @@ void MemoryManager::Init(void)
|
|||||||
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
||||||
Ncache[CpuSmall]=Nc;
|
Ncache[CpuSmall]=Nc;
|
||||||
Ncache[AccSmall]=Nc;
|
Ncache[AccSmall]=Nc;
|
||||||
|
Ncache[SharedSmall]=Nc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
||||||
|
@ -44,14 +44,14 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
enum ViewAdvise {
|
enum ViewAdvise {
|
||||||
AdviseDefault = 0x0, // Regular data
|
AdviseDefault = 0x0, // Regular data
|
||||||
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
|
AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can
|
||||||
// significantly influence performance of bulk storage.
|
// significantly influence performance of bulk storage.
|
||||||
|
|
||||||
AdviseTransient = 0x2, // Data will mostly be read. On some architectures
|
// AdviseTransient = 0x2, // Data will mostly be read. On some architectures
|
||||||
// enables read-only copies of memory to be kept on
|
// enables read-only copies of memory to be kept on
|
||||||
// host and device.
|
// host and device.
|
||||||
|
|
||||||
AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device
|
// AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ private:
|
|||||||
} AllocationCacheEntry;
|
} AllocationCacheEntry;
|
||||||
|
|
||||||
static const int NallocCacheMax=128;
|
static const int NallocCacheMax=128;
|
||||||
static const int NallocType=4;
|
static const int NallocType=6;
|
||||||
static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
|
static AllocationCacheEntry Entries[NallocType][NallocCacheMax];
|
||||||
static int Victim[NallocType];
|
static int Victim[NallocType];
|
||||||
static int Ncache[NallocType];
|
static int Ncache[NallocType];
|
||||||
@ -95,9 +95,11 @@ private:
|
|||||||
|
|
||||||
static void *AcceleratorAllocate(size_t bytes);
|
static void *AcceleratorAllocate(size_t bytes);
|
||||||
static void AcceleratorFree (void *ptr,size_t bytes);
|
static void AcceleratorFree (void *ptr,size_t bytes);
|
||||||
|
static void PrintBytes(void);
|
||||||
public:
|
public:
|
||||||
static void Init(void);
|
static void Init(void);
|
||||||
|
static void *SharedAllocate(size_t bytes);
|
||||||
|
static void SharedFree (void *ptr,size_t bytes);
|
||||||
static void *CpuAllocate(size_t bytes);
|
static void *CpuAllocate(size_t bytes);
|
||||||
static void CpuFree (void *ptr,size_t bytes);
|
static void CpuFree (void *ptr,size_t bytes);
|
||||||
|
|
||||||
|
@ -62,7 +62,6 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites)
|
|||||||
for(int i=0;i<nthread;i++){
|
for(int i=0;i<nthread;i++){
|
||||||
ssum = ssum+sumarray[i];
|
ssum = ssum+sumarray[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
return ssum;
|
return ssum;
|
||||||
}
|
}
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
@ -93,7 +92,9 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
|
|||||||
ssum = ssum+sumarray[i];
|
ssum = ssum+sumarray[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
return ssum;
|
typedef typename vobj::scalar_object ssobj;
|
||||||
|
ssobj ret = ssum;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -154,7 +155,7 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
// Might make all code paths go this way.
|
// Might make all code paths go this way.
|
||||||
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
|
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
@ -163,16 +164,16 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
|
|||||||
autoView( right_v,right, AcceleratorRead);
|
autoView( right_v,right, AcceleratorRead);
|
||||||
|
|
||||||
// GPU - SIMT lane compliance...
|
// GPU - SIMT lane compliance...
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto x_l = left_v(ss);
|
auto x_l = left_v[ss];
|
||||||
auto y_l = right_v(ss);
|
auto y_l = right_v[ss];
|
||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
|
inner_tmp_v[ss]=innerProductD(x_l,y_l);
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is in single precision and fails some tests
|
// This is in single precision and fails some tests
|
||||||
// Need a sumD that sums in double
|
auto anrm = sum(inner_tmp_v,sites);
|
||||||
nrm = TensorRemove(sumD(inner_tmp_v,sites));
|
nrm = anrm;
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,16 +219,16 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
autoView( y_v, y, AcceleratorRead);
|
autoView( y_v, y, AcceleratorRead);
|
||||||
autoView( z_v, z, AcceleratorWrite);
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
|
|
||||||
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
|
typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
|
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto tmp = a*x_v(ss)+b*y_v(ss);
|
auto tmp = a*x_v[ss]+b*y_v[ss];
|
||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
|
inner_tmp_v[ss]=innerProductD(tmp,tmp);
|
||||||
coalescedWrite(z_v[ss],tmp);
|
z_v[ss]=tmp;
|
||||||
});
|
});
|
||||||
nrm = real(TensorRemove(sumD(inner_tmp_v,sites)));
|
nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
|
||||||
grid->GlobalSum(nrm);
|
grid->GlobalSum(nrm);
|
||||||
return nrm;
|
return nrm;
|
||||||
}
|
}
|
||||||
@ -243,29 +244,28 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Latti
|
|||||||
|
|
||||||
GridBase *grid = left.Grid();
|
GridBase *grid = left.Grid();
|
||||||
|
|
||||||
|
|
||||||
const uint64_t nsimd = grid->Nsimd();
|
const uint64_t nsimd = grid->Nsimd();
|
||||||
const uint64_t sites = grid->oSites();
|
const uint64_t sites = grid->oSites();
|
||||||
|
|
||||||
// GPU
|
// GPU
|
||||||
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
|
typedef decltype(innerProductD(vobj(),vobj())) inner_t;
|
||||||
typedef decltype(innerProduct(vobj(),vobj())) norm_t;
|
typedef decltype(innerProductD(vobj(),vobj())) norm_t;
|
||||||
Vector<inner_t> inner_tmp(sites);
|
Vector<inner_t> inner_tmp(sites);
|
||||||
Vector<norm_t> norm_tmp(sites);
|
Vector<norm_t> norm_tmp(sites);
|
||||||
auto inner_tmp_v = &inner_tmp[0];
|
auto inner_tmp_v = &inner_tmp[0];
|
||||||
auto norm_tmp_v = &norm_tmp[0];
|
auto norm_tmp_v = &norm_tmp[0];
|
||||||
{
|
{
|
||||||
autoView(left_v,left, AcceleratorRead);
|
autoView(left_v,left, AcceleratorRead);
|
||||||
autoView(right_v,right,AcceleratorRead);
|
autoView(right_v,right,AcceleratorRead);
|
||||||
accelerator_for( ss, sites, nsimd,{
|
accelerator_for( ss, sites, 1,{
|
||||||
auto left_tmp = left_v(ss);
|
auto left_tmp = left_v[ss];
|
||||||
coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss)));
|
inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]);
|
||||||
coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp));
|
norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp[0] = TensorRemove(sumD(inner_tmp_v,sites));
|
tmp[0] = TensorRemove(sum(inner_tmp_v,sites));
|
||||||
tmp[1] = TensorRemove(sumD(norm_tmp_v,sites));
|
tmp[1] = TensorRemove(sum(norm_tmp_v,sites));
|
||||||
|
|
||||||
grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector
|
grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector
|
||||||
ip = tmp[0];
|
ip = tmp[0];
|
||||||
|
@ -1,5 +1,13 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#define CPS_MD_TIME
|
||||||
|
|
||||||
|
#ifdef CPS_MD_TIME
|
||||||
|
#define HMC_MOMENTUM_DENOMINATOR (2.0)
|
||||||
|
#else
|
||||||
|
#define HMC_MOMENTUM_DENOMINATOR (1.0)
|
||||||
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template <class S>
|
template <class S>
|
||||||
@ -20,7 +28,9 @@ public:
|
|||||||
typedef Field PropagatorField;
|
typedef Field PropagatorField;
|
||||||
|
|
||||||
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
|
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
|
||||||
|
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling
|
||||||
gaussian(pRNG, P);
|
gaussian(pRNG, P);
|
||||||
|
P *= scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline Field projectForce(Field& P){return P;}
|
static inline Field projectForce(Field& P){return P;}
|
||||||
@ -66,7 +76,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void FreePropagator(const Field &in, Field &out,
|
static void FreePropagator(const Field &in, Field &out,
|
||||||
const Field &momKernel)
|
const Field &momKernel)
|
||||||
{
|
{
|
||||||
FFT fft((GridCartesian *)in.Grid());
|
FFT fft((GridCartesian *)in.Grid());
|
||||||
Field inFT(in.Grid());
|
Field inFT(in.Grid());
|
||||||
@ -139,14 +149,17 @@ public:
|
|||||||
|
|
||||||
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
|
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
|
||||||
{
|
{
|
||||||
|
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR); // CPS/UKQCD momentum rescaling
|
||||||
#ifndef USE_FFT_ACCELERATION
|
#ifndef USE_FFT_ACCELERATION
|
||||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
|
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
Field Pgaussian(P.Grid()), Pp(P.Grid());
|
Field Pgaussian(P.Grid()), Pp(P.Grid());
|
||||||
ComplexField p2(P.Grid()); p2 = zero;
|
ComplexField p2(P.Grid()); p2 = zero;
|
||||||
RealD M = FFT_MASS;
|
RealD M = FFT_MASS;
|
||||||
|
|
||||||
|
|
||||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
|
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
|
||||||
|
|
||||||
FFT theFFT((GridCartesian*)P.Grid());
|
FFT theFFT((GridCartesian*)P.Grid());
|
||||||
@ -156,17 +169,17 @@ public:
|
|||||||
p2 = sqrt(p2);
|
p2 = sqrt(p2);
|
||||||
Pp *= p2;
|
Pp *= p2;
|
||||||
theFFT.FFT_all_dim(P, Pp, FFT::backward);
|
theFFT.FFT_all_dim(P, Pp, FFT::backward);
|
||||||
|
|
||||||
#endif //USE_FFT_ACCELERATION
|
#endif //USE_FFT_ACCELERATION
|
||||||
|
P *= scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline Field projectForce(Field& P) {return P;}
|
static inline Field projectForce(Field& P) {return Ta(P);}
|
||||||
|
|
||||||
static inline void update_field(Field &P, Field &U, double ep)
|
static inline void update_field(Field &P, Field &U, double ep)
|
||||||
{
|
{
|
||||||
#ifndef USE_FFT_ACCELERATION
|
#ifndef USE_FFT_ACCELERATION
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
U += P*ep;
|
U += P*ep;
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
double total_time = (t1-t0)/1e6;
|
double total_time = (t1-t0)/1e6;
|
||||||
std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl;
|
std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl;
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
Copyright (C) 2019
|
Copyright (C) 2019
|
||||||
|
|
||||||
Author: Felix Erben <felix.erben@ed.ac.uk>
|
Author: Felix Erben <felix.erben@ed.ac.uk>
|
||||||
|
Author: Raoul Hodgson <raoul.hodgson@ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -58,9 +59,12 @@ public:
|
|||||||
const Gamma GammaA_right,
|
const Gamma GammaA_right,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_right,
|
||||||
const int parity,
|
const int parity,
|
||||||
const int * wick_contractions,
|
const bool * wick_contractions,
|
||||||
robj &result);
|
robj &result);
|
||||||
public:
|
public:
|
||||||
|
static void Wick_Contractions(std::string qi,
|
||||||
|
std::string qf,
|
||||||
|
bool* wick_contractions);
|
||||||
static void ContractBaryons(const PropagatorField &q1_left,
|
static void ContractBaryons(const PropagatorField &q1_left,
|
||||||
const PropagatorField &q2_left,
|
const PropagatorField &q2_left,
|
||||||
const PropagatorField &q3_left,
|
const PropagatorField &q3_left,
|
||||||
@ -68,8 +72,7 @@ public:
|
|||||||
const Gamma GammaB_left,
|
const Gamma GammaB_left,
|
||||||
const Gamma GammaA_right,
|
const Gamma GammaA_right,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_right,
|
||||||
const char * quarks_left,
|
const bool* wick_contractions,
|
||||||
const char * quarks_right,
|
|
||||||
const int parity,
|
const int parity,
|
||||||
ComplexField &baryon_corr);
|
ComplexField &baryon_corr);
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
@ -80,12 +83,61 @@ public:
|
|||||||
const Gamma GammaB_left,
|
const Gamma GammaB_left,
|
||||||
const Gamma GammaA_right,
|
const Gamma GammaA_right,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_right,
|
||||||
const char * quarks_left,
|
const bool* wick_contractions,
|
||||||
const char * quarks_right,
|
|
||||||
const int parity,
|
const int parity,
|
||||||
|
const int nt,
|
||||||
robj &result);
|
robj &result);
|
||||||
private:
|
private:
|
||||||
template <class mobj, class mobj2, class robj>
|
template <class mobj, class mobj2, class robj>
|
||||||
|
static void Baryon_Gamma_3pt_Group1_Site(
|
||||||
|
const mobj &Dq1_ti,
|
||||||
|
const mobj2 &Dq2_spec,
|
||||||
|
const mobj2 &Dq3_spec,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result);
|
||||||
|
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
|
static void Baryon_Gamma_3pt_Group2_Site(
|
||||||
|
const mobj2 &Dq1_spec,
|
||||||
|
const mobj &Dq2_ti,
|
||||||
|
const mobj2 &Dq3_spec,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result);
|
||||||
|
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
|
static void Baryon_Gamma_3pt_Group3_Site(
|
||||||
|
const mobj2 &Dq1_spec,
|
||||||
|
const mobj2 &Dq2_spec,
|
||||||
|
const mobj &Dq3_ti,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result);
|
||||||
|
public:
|
||||||
|
template <class mobj>
|
||||||
|
static void Baryon_Gamma_3pt(
|
||||||
|
const PropagatorField &q_ti,
|
||||||
|
const mobj &Dq_spec1,
|
||||||
|
const mobj &Dq_spec2,
|
||||||
|
const PropagatorField &q_tf,
|
||||||
|
int group,
|
||||||
|
int wick_contraction,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
SpinMatrixField &stn_corr);
|
||||||
|
private:
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop,
|
static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop,
|
||||||
const mobj2 &Du_spec,
|
const mobj2 &Du_spec,
|
||||||
const mobj &Dd_tf,
|
const mobj &Dd_tf,
|
||||||
@ -166,111 +218,137 @@ const Real BaryonUtils<FImpl>::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.};
|
|||||||
template <class FImpl>
|
template <class FImpl>
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
void BaryonUtils<FImpl>::baryon_site(const mobj &D1,
|
||||||
const mobj &D2,
|
const mobj &D2,
|
||||||
const mobj &D3,
|
const mobj &D3,
|
||||||
const Gamma GammaA_left,
|
const Gamma GammaA_i,
|
||||||
const Gamma GammaB_left,
|
const Gamma GammaB_i,
|
||||||
const Gamma GammaA_right,
|
const Gamma GammaA_f,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_f,
|
||||||
const int parity,
|
const int parity,
|
||||||
const int * wick_contraction,
|
const bool * wick_contraction,
|
||||||
robj &result)
|
robj &result)
|
||||||
{
|
{
|
||||||
|
|
||||||
Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
|
Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4)
|
||||||
auto gD1a = GammaA_left * GammaA_right * D1;
|
|
||||||
auto gD1b = GammaA_left * g4 * GammaA_right * D1;
|
|
||||||
auto pD1 = 0.5* (gD1a + (Real)parity * gD1b);
|
|
||||||
auto gD3 = GammaB_right * D3;
|
|
||||||
auto D2g = D2 * GammaB_left;
|
|
||||||
auto pD1g = pD1 * GammaB_left;
|
|
||||||
auto gD3g = gD3 * GammaB_left;
|
|
||||||
|
|
||||||
for (int ie_left=0; ie_left < 6 ; ie_left++){
|
auto D1_GAi = D1 * GammaA_i;
|
||||||
int a_left = epsilon[ie_left][0]; //a
|
auto D1_GAi_g4 = D1_GAi * g4;
|
||||||
int b_left = epsilon[ie_left][1]; //b
|
auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4);
|
||||||
int c_left = epsilon[ie_left][2]; //c
|
auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P;
|
||||||
for (int ie_right=0; ie_right < 6 ; ie_right++){
|
auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P;
|
||||||
int a_right = epsilon[ie_right][0]; //a'
|
|
||||||
int b_right = epsilon[ie_right][1]; //b'
|
auto D2_GBi = D2 * GammaB_i;
|
||||||
int c_right = epsilon[ie_right][2]; //c'
|
auto GBf_D2_GBi = GammaB_f * D2_GBi;
|
||||||
Real ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right];
|
auto GAf_D2_GBi = GammaA_f * D2_GBi;
|
||||||
|
|
||||||
|
auto GBf_D3 = GammaB_f * D3;
|
||||||
|
auto GAf_D3 = GammaA_f * D3;
|
||||||
|
|
||||||
|
for (int ie_f=0; ie_f < 6 ; ie_f++){
|
||||||
|
int a_f = epsilon[ie_f][0]; //a
|
||||||
|
int b_f = epsilon[ie_f][1]; //b
|
||||||
|
int c_f = epsilon[ie_f][2]; //c
|
||||||
|
for (int ie_i=0; ie_i < 6 ; ie_i++){
|
||||||
|
int a_i = epsilon[ie_i][0]; //a'
|
||||||
|
int b_i = epsilon[ie_i][1]; //b'
|
||||||
|
int c_i = epsilon[ie_i][2]; //c'
|
||||||
|
|
||||||
|
Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
|
||||||
//This is the \delta_{456}^{123} part
|
//This is the \delta_{456}^{123} part
|
||||||
if (wick_contraction[0]){
|
if (wick_contraction[0]){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int rho=0; rho<Ns; rho++){
|
||||||
auto eepD1 = ee * pD1()(gamma_left,gamma_left)(c_right,c_left);
|
auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i);
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
auto D2g_ab = D2g()(alpha_right,beta_left)(a_right,a_left);
|
result()()() += ee * GAf_D1_GAi_P_rr_cc
|
||||||
auto gD3_ab = gD3()(alpha_right,beta_left)(b_right,b_left);
|
* D2_GBi ()(alpha_f,beta_i)(a_f,a_i)
|
||||||
result()()() += eepD1*D2g_ab*gD3_ab;
|
* GBf_D3 ()(alpha_f,beta_i)(b_f,b_i);
|
||||||
}}
|
}}
|
||||||
}
|
|
||||||
}
|
|
||||||
//This is the \delta_{456}^{231} part
|
|
||||||
if (wick_contraction[1]){
|
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
|
||||||
auto gD3_ag = gD3()(alpha_right,gamma_left)(b_right,c_left);
|
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
|
||||||
auto eepD1g_gb = ee * pD1g()(gamma_left,beta_left)(c_right,a_left);
|
|
||||||
auto D2_ab = D2()(alpha_right,beta_left)(a_right,b_left);
|
|
||||||
result()()() += eepD1g_gb*D2_ab*gD3_ag;
|
|
||||||
}
|
}
|
||||||
}}
|
}
|
||||||
|
//This is the \delta_{456}^{231} part
|
||||||
|
if (wick_contraction[1]){
|
||||||
|
for (int rho=0; rho<Ns; rho++){
|
||||||
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
|
auto D1_GAi_P_ar_ac = D1_GAi_P()(alpha_f,rho)(a_f,c_i);
|
||||||
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
|
result()()() += ee * D1_GAi_P_ar_ac
|
||||||
|
* GBf_D2_GBi ()(alpha_f,beta_i)(b_f,a_i)
|
||||||
|
* GAf_D3 ()(rho,beta_i)(c_f,b_i);
|
||||||
|
}
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{312} part
|
//This is the \delta_{456}^{312} part
|
||||||
if (wick_contraction[2]){
|
if (wick_contraction[2]){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int rho=0; rho<Ns; rho++){
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
auto D2_ag = D2()(alpha_right,gamma_left)(a_right,c_left);
|
auto GBf_D1_GAi_P_ar_bc = GBf_D1_GAi_P()(alpha_f,rho)(b_f,c_i);
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
auto eepD1_gb = ee * pD1()(gamma_left,beta_left)(c_right,b_left);
|
result()()() += ee * GBf_D1_GAi_P_ar_bc
|
||||||
auto gD3g_ab = gD3g()(alpha_right,beta_left)(b_right,a_left);
|
* GAf_D2_GBi ()(rho,beta_i)(c_f,a_i)
|
||||||
result()()() += eepD1_gb*D2_ag*gD3g_ab;
|
* D3 ()(alpha_f,beta_i)(a_f,b_i);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{132} part
|
//This is the \delta_{456}^{132} part
|
||||||
if (wick_contraction[3]){
|
if (wick_contraction[3]){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int rho=0; rho<Ns; rho++){
|
||||||
auto eepD1 = ee * pD1()(gamma_left,gamma_left)(c_right,c_left);
|
auto GAf_D1_GAi_P_rr_cc = GAf_D1_GAi_P()(rho,rho)(c_f,c_i);
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
auto D2_ab = D2()(alpha_right,beta_left)(a_right,b_left);
|
result()()() -= ee * GAf_D1_GAi_P_rr_cc
|
||||||
auto gD3g_ab = gD3g()(alpha_right,beta_left)(b_right,a_left);
|
* GBf_D2_GBi ()(alpha_f,beta_i)(b_f,a_i)
|
||||||
result()()() -= eepD1*D2_ab*gD3g_ab;
|
* D3 ()(alpha_f,beta_i)(a_f,b_i);
|
||||||
}}
|
}
|
||||||
}
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{321} part
|
//This is the \delta_{456}^{321} part
|
||||||
if (wick_contraction[4]){
|
if (wick_contraction[4]){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int rho=0; rho<Ns; rho++){
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
auto gD3_ag = gD3()(alpha_right,gamma_left)(b_right,c_left);
|
auto GBf_D1_GAi_P_ar_bc = GBf_D1_GAi_P()(alpha_f,rho)(b_f,c_i);
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
auto eepD1_gb = ee * pD1()(gamma_left,beta_left)(c_right,b_left);
|
result()()() -= ee * GBf_D1_GAi_P_ar_bc
|
||||||
auto D2g_ab = D2g()(alpha_right,beta_left)(a_right,a_left);
|
* D2_GBi ()(alpha_f,beta_i)(a_f,a_i)
|
||||||
result()()() -= eepD1_gb*D2g_ab*gD3_ag;
|
* GAf_D3 ()(rho,beta_i)(c_f,b_i);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
//This is the \delta_{456}^{213} part
|
//This is the \delta_{456}^{213} part
|
||||||
if (wick_contraction[5]){
|
if (wick_contraction[5]){
|
||||||
for (int gamma_left=0; gamma_left<Ns; gamma_left++){
|
for (int rho=0; rho<Ns; rho++){
|
||||||
for (int alpha_right=0; alpha_right<Ns; alpha_right++){
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
auto D2_ag = D2()(alpha_right,gamma_left)(a_right,c_left);
|
auto D1_GAi_P_ar_ac = D1_GAi_P()(alpha_f,rho)(a_f,c_i);
|
||||||
for (int beta_left=0; beta_left<Ns; beta_left++){
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
auto eepD1g_gb = ee * pD1g()(gamma_left,beta_left)(c_right,a_left);
|
result()()() -= ee * D1_GAi_P_ar_ac
|
||||||
auto gD3_ab = gD3()(alpha_right,beta_left)(b_right,b_left);
|
* GAf_D2_GBi ()(rho,beta_i)(c_f,a_i)
|
||||||
result()()() -= eepD1g_gb*D2_ag*gD3_ab;
|
* GBf_D3 ()(alpha_f,beta_i)(b_f,b_i);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
}
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Computes which wick contractions should be performed for a *
|
||||||
|
* baryon 2pt function given the initial and finals state quark *
|
||||||
|
* flavours. *
|
||||||
|
* The array wick_contractions must be of length 6 */
|
||||||
|
template<class FImpl>
|
||||||
|
void BaryonUtils<FImpl>::Wick_Contractions(std::string qi, std::string qf, bool* wick_contractions) {
|
||||||
|
const int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}};
|
||||||
|
for (int ie=0; ie < 6 ; ie++) {
|
||||||
|
wick_contractions[ie] = (qi.size() == 3 && qf.size() == 3
|
||||||
|
&& qi[0] == qf[epsilon[ie][0]]
|
||||||
|
&& qi[1] == qf[epsilon[ie][1]]
|
||||||
|
&& qi[2] == qf[epsilon[ie][2]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The array wick_contractions must be of length 6. The order *
|
||||||
|
* corresponds to the to that shown in the Hadrons documentation *
|
||||||
|
* at https://aportelli.github.io/Hadrons-doc/#/mcontraction *
|
||||||
|
* This can be computed from the quark flavours using the *
|
||||||
|
* Wick_Contractions function above */
|
||||||
template<class FImpl>
|
template<class FImpl>
|
||||||
void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
||||||
const PropagatorField &q2_left,
|
const PropagatorField &q2_left,
|
||||||
@ -279,8 +357,7 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
const Gamma GammaB_left,
|
const Gamma GammaB_left,
|
||||||
const Gamma GammaA_right,
|
const Gamma GammaA_right,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_right,
|
||||||
const char * quarks_left,
|
const bool* wick_contractions,
|
||||||
const char * quarks_right,
|
|
||||||
const int parity,
|
const int parity,
|
||||||
ComplexField &baryon_corr)
|
ComplexField &baryon_corr)
|
||||||
{
|
{
|
||||||
@ -288,7 +365,6 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
|
||||||
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
||||||
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
||||||
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
|
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
|
||||||
@ -298,10 +374,6 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
|
|
||||||
GridBase *grid = q1_left.Grid();
|
GridBase *grid = q1_left.Grid();
|
||||||
|
|
||||||
int wick_contraction[6];
|
|
||||||
for (int ie=0; ie < 6 ; ie++)
|
|
||||||
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
|
|
||||||
|
|
||||||
autoView(vbaryon_corr, baryon_corr,CpuWrite);
|
autoView(vbaryon_corr, baryon_corr,CpuWrite);
|
||||||
autoView( v1 , q1_left, CpuRead);
|
autoView( v1 , q1_left, CpuRead);
|
||||||
autoView( v2 , q2_left, CpuRead);
|
autoView( v2 , q2_left, CpuRead);
|
||||||
@ -311,10 +383,10 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
|
bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real));
|
||||||
for (int ie=0; ie < 6 ; ie++){
|
for (int ie=0; ie < 6 ; ie++){
|
||||||
if(ie==0 or ie==3){
|
if(ie==0 or ie==3){
|
||||||
bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contraction[ie];
|
bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie];
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contraction[ie];
|
bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Real t=0.;
|
Real t=0.;
|
||||||
@ -325,7 +397,7 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
auto D2 = v2[ss];
|
auto D2 = v2[ss];
|
||||||
auto D3 = v3[ss];
|
auto D3 = v3[ss];
|
||||||
vobj result=Zero();
|
vobj result=Zero();
|
||||||
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result);
|
||||||
vbaryon_corr[ss] = result;
|
vbaryon_corr[ss] = result;
|
||||||
} );//end loop over lattice sites
|
} );//end loop over lattice sites
|
||||||
|
|
||||||
@ -334,6 +406,12 @@ void BaryonUtils<FImpl>::ContractBaryons(const PropagatorField &q1_left,
|
|||||||
std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl;
|
std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The array wick_contractions must be of length 6. The order *
|
||||||
|
* corresponds to the to that shown in the Hadrons documentation *
|
||||||
|
* at https://aportelli.github.io/Hadrons-doc/#/mcontraction *
|
||||||
|
* This can also be computed from the quark flavours using the *
|
||||||
|
* Wick_Contractions function above */
|
||||||
template <class FImpl>
|
template <class FImpl>
|
||||||
template <class mobj, class robj>
|
template <class mobj, class robj>
|
||||||
void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
||||||
@ -343,16 +421,15 @@ void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
|||||||
const Gamma GammaB_left,
|
const Gamma GammaB_left,
|
||||||
const Gamma GammaA_right,
|
const Gamma GammaA_right,
|
||||||
const Gamma GammaB_right,
|
const Gamma GammaB_right,
|
||||||
const char * quarks_left,
|
const bool* wick_contractions,
|
||||||
const char * quarks_right,
|
|
||||||
const int parity,
|
const int parity,
|
||||||
|
const int nt,
|
||||||
robj &result)
|
robj &result)
|
||||||
{
|
{
|
||||||
|
|
||||||
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
assert(Ns==4 && "Baryon code only implemented for N_spin = 4");
|
||||||
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
assert(Nc==3 && "Baryon code only implemented for N_colour = 3");
|
||||||
|
|
||||||
std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl;
|
|
||||||
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl;
|
||||||
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl;
|
||||||
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
|
std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl;
|
||||||
@ -360,17 +437,347 @@ void BaryonUtils<FImpl>::ContractBaryons_Sliced(const mobj &D1,
|
|||||||
|
|
||||||
assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
|
assert(parity==1 || parity == -1 && "Parity must be +1 or -1");
|
||||||
|
|
||||||
int wick_contraction[6];
|
for (int t=0; t<nt; t++) {
|
||||||
for (int ie=0; ie < 6 ; ie++)
|
baryon_site(D1[t],D2[t],D3[t],GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result[t]);
|
||||||
wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0;
|
}
|
||||||
|
|
||||||
result=Zero();
|
|
||||||
baryon_site<decltype(D1),decltype(result)>(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/***********************************************************************
|
/***********************************************************************
|
||||||
* End of Baryon 2pt-function code. *
|
* End of Baryon 2pt-function code. *
|
||||||
* *
|
* *
|
||||||
|
* The following code is for baryonGamma3pt function *
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
/* Dq1_ti is a quark line from t_i to t_J
|
||||||
|
* Dq2_spec is a quark line from t_i to t_f
|
||||||
|
* Dq3_spec is a quark line from t_i to t_f
|
||||||
|
* Dq4_tf is a quark line from t_f to t_J */
|
||||||
|
template<class FImpl>
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
|
void BaryonUtils<FImpl>::Baryon_Gamma_3pt_Group1_Site(
|
||||||
|
const mobj &Dq1_ti,
|
||||||
|
const mobj2 &Dq2_spec,
|
||||||
|
const mobj2 &Dq3_spec,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result)
|
||||||
|
{
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
|
||||||
|
auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti;
|
||||||
|
auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1;
|
||||||
|
auto D2_Gi = Dq2_spec * GammaBi;
|
||||||
|
auto Gf_D2_Gi = GammaBf * D2_Gi;
|
||||||
|
auto Gf_D3 = GammaBf * Dq3_spec;
|
||||||
|
|
||||||
|
int a_f, b_f, c_f;
|
||||||
|
int a_i, b_i, c_i;
|
||||||
|
|
||||||
|
Real ee;
|
||||||
|
|
||||||
|
for (int ie_f=0; ie_f < 6 ; ie_f++){
|
||||||
|
a_f = epsilon[ie_f][0]; //a
|
||||||
|
b_f = epsilon[ie_f][1]; //b
|
||||||
|
c_f = epsilon[ie_f][2]; //c
|
||||||
|
for (int ie_i=0; ie_i < 6 ; ie_i++){
|
||||||
|
a_i = epsilon[ie_i][0]; //a'
|
||||||
|
b_i = epsilon[ie_i][1]; //b'
|
||||||
|
c_i = epsilon[ie_i][2]; //c'
|
||||||
|
|
||||||
|
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
|
||||||
|
|
||||||
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
|
auto D2_Gi_ab_aa = D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
|
||||||
|
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
|
||||||
|
auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
|
||||||
|
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
|
||||||
|
|
||||||
|
for (int gamma_i=0; gamma_i<Ns; gamma_i++){
|
||||||
|
auto ee_adjD4_g_D1_ag_ac = ee * adjD4_g_D1 ()(alpha_f,gamma_i)(a_f,c_i);
|
||||||
|
auto ee_Gf_adjD4_g_D1_ag_bc = ee * Gf_adjD4_g_D1()(alpha_f,gamma_i)(b_f,c_i);
|
||||||
|
for (int gamma_f=0; gamma_f<Ns; gamma_f++){
|
||||||
|
auto ee_adjD4_g_D1_gg_cc = ee * adjD4_g_D1 ()(gamma_f,gamma_i)(c_f,c_i);
|
||||||
|
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
|
||||||
|
auto D2_Gi_gb_ca = D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
|
||||||
|
|
||||||
|
|
||||||
|
if(wick_contraction == 1) { // Do contraction I1
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_adjD4_g_D1_gg_cc
|
||||||
|
* D2_Gi_ab_aa
|
||||||
|
* Gf_D3_ab_bb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 2) { // Do contraction I2
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_adjD4_g_D1_ag_ac
|
||||||
|
* Gf_D2_Gi_ab_ba
|
||||||
|
* Dq3_spec_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 3) { // Do contraction I3
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Gf_adjD4_g_D1_ag_bc
|
||||||
|
* D2_Gi_gb_ca
|
||||||
|
* Dq3_spec_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 4) { // Do contraction I4
|
||||||
|
result()(gamma_f,gamma_i)() += ee_adjD4_g_D1_gg_cc
|
||||||
|
* Gf_D2_Gi_ab_ba
|
||||||
|
* Dq3_spec_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 5) { // Do contraction I5
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Gf_adjD4_g_D1_ag_bc
|
||||||
|
* D2_Gi_ab_aa
|
||||||
|
* Dq3_spec_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 6) { // Do contraction I6
|
||||||
|
result()(gamma_f,gamma_i)() += ee_adjD4_g_D1_ag_ac
|
||||||
|
* D2_Gi_gb_ca
|
||||||
|
* Gf_D3_ab_bb;
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dq1_spec is a quark line from t_i to t_f
|
||||||
|
* Dq2_ti is a quark line from t_i to t_J
|
||||||
|
* Dq3_spec is a quark line from t_i to t_f
|
||||||
|
* Dq4_tf is a quark line from t_f to t_J */
|
||||||
|
template<class FImpl>
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
|
void BaryonUtils<FImpl>::Baryon_Gamma_3pt_Group2_Site(
|
||||||
|
const mobj2 &Dq1_spec,
|
||||||
|
const mobj &Dq2_ti,
|
||||||
|
const mobj2 &Dq3_spec,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result)
|
||||||
|
{
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
|
||||||
|
auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi;
|
||||||
|
auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi;
|
||||||
|
auto Gf_D1 = GammaBf * Dq1_spec;
|
||||||
|
auto Gf_D3 = GammaBf * Dq3_spec;
|
||||||
|
|
||||||
|
int a_f, b_f, c_f;
|
||||||
|
int a_i, b_i, c_i;
|
||||||
|
|
||||||
|
Real ee;
|
||||||
|
|
||||||
|
for (int ie_f=0; ie_f < 6 ; ie_f++){
|
||||||
|
a_f = epsilon[ie_f][0]; //a
|
||||||
|
b_f = epsilon[ie_f][1]; //b
|
||||||
|
c_f = epsilon[ie_f][2]; //c
|
||||||
|
for (int ie_i=0; ie_i < 6 ; ie_i++){
|
||||||
|
a_i = epsilon[ie_i][0]; //a'
|
||||||
|
b_i = epsilon[ie_i][1]; //b'
|
||||||
|
c_i = epsilon[ie_i][2]; //c'
|
||||||
|
|
||||||
|
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
|
||||||
|
|
||||||
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
|
auto adjD4_g_D2_Gi_ab_aa = adjD4_g_D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
|
||||||
|
auto Gf_D3_ab_bb = Gf_D3 ()(alpha_f,beta_i)(b_f,b_i);
|
||||||
|
auto Gf_adjD4_g_D2_Gi_ab_ba = Gf_adjD4_g_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
|
||||||
|
auto Dq3_spec_ab_ab = Dq3_spec ()(alpha_f,beta_i)(a_f,b_i);
|
||||||
|
|
||||||
|
for (int gamma_i=0; gamma_i<Ns; gamma_i++){
|
||||||
|
auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(alpha_f,gamma_i)(a_f,c_i);
|
||||||
|
auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(alpha_f,gamma_i)(b_f,c_i);
|
||||||
|
for (int gamma_f=0; gamma_f<Ns; gamma_f++){
|
||||||
|
auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(gamma_f,gamma_i)(c_f,c_i);
|
||||||
|
auto Dq3_spec_gb_cb = Dq3_spec ()(gamma_f,beta_i)(c_f,b_i);
|
||||||
|
auto adjD4_g_D2_Gi_gb_ca = adjD4_g_D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
|
||||||
|
|
||||||
|
if(wick_contraction == 1) { // Do contraction II1
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Dq1_spec_gg_cc
|
||||||
|
* adjD4_g_D2_Gi_ab_aa
|
||||||
|
* Gf_D3_ab_bb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 2) { // Do contraction II2
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Dq1_spec_ag_ac
|
||||||
|
* Gf_adjD4_g_D2_Gi_ab_ba
|
||||||
|
* Dq3_spec_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 3) { // Do contraction II3
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Gf_D1_ag_bc
|
||||||
|
* adjD4_g_D2_Gi_gb_ca
|
||||||
|
* Dq3_spec_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 4) { // Do contraction II4
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Dq1_spec_gg_cc
|
||||||
|
* Gf_adjD4_g_D2_Gi_ab_ba
|
||||||
|
* Dq3_spec_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 5) { // Do contraction II5
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Gf_D1_ag_bc
|
||||||
|
* adjD4_g_D2_Gi_ab_aa
|
||||||
|
* Dq3_spec_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 6) { // Do contraction II6
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Dq1_spec_ag_ac
|
||||||
|
* adjD4_g_D2_Gi_gb_ca
|
||||||
|
* Gf_D3_ab_bb;
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dq1_spec is a quark line from t_i to t_f
|
||||||
|
* Dq2_spec is a quark line from t_i to t_f
|
||||||
|
* Dq3_ti is a quark line from t_i to t_J
|
||||||
|
* Dq4_tf is a quark line from t_f to t_J */
|
||||||
|
template<class FImpl>
|
||||||
|
template <class mobj, class mobj2, class robj>
|
||||||
|
void BaryonUtils<FImpl>::Baryon_Gamma_3pt_Group3_Site(
|
||||||
|
const mobj2 &Dq1_spec,
|
||||||
|
const mobj2 &Dq2_spec,
|
||||||
|
const mobj &Dq3_ti,
|
||||||
|
const mobj &Dq4_tf,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
int wick_contraction,
|
||||||
|
robj &result)
|
||||||
|
{
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
|
||||||
|
auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti;
|
||||||
|
auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3;
|
||||||
|
auto Gf_D1 = GammaBf * Dq1_spec;
|
||||||
|
auto D2_Gi = Dq2_spec * GammaBi;
|
||||||
|
auto Gf_D2_Gi = GammaBf * D2_Gi;
|
||||||
|
|
||||||
|
int a_f, b_f, c_f;
|
||||||
|
int a_i, b_i, c_i;
|
||||||
|
|
||||||
|
Real ee;
|
||||||
|
|
||||||
|
for (int ie_f=0; ie_f < 6 ; ie_f++){
|
||||||
|
a_f = epsilon[ie_f][0]; //a
|
||||||
|
b_f = epsilon[ie_f][1]; //b
|
||||||
|
c_f = epsilon[ie_f][2]; //c
|
||||||
|
for (int ie_i=0; ie_i < 6 ; ie_i++){
|
||||||
|
a_i = epsilon[ie_i][0]; //a'
|
||||||
|
b_i = epsilon[ie_i][1]; //b'
|
||||||
|
c_i = epsilon[ie_i][2]; //c'
|
||||||
|
|
||||||
|
ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i];
|
||||||
|
|
||||||
|
for (int alpha_f=0; alpha_f<Ns; alpha_f++){
|
||||||
|
for (int beta_i=0; beta_i<Ns; beta_i++){
|
||||||
|
auto D2_Gi_ab_aa = D2_Gi ()(alpha_f,beta_i)(a_f,a_i);
|
||||||
|
auto Gf_adjD4_g_D3_ab_bb = Gf_adjD4_g_D3 ()(alpha_f,beta_i)(b_f,b_i);
|
||||||
|
auto Gf_D2_Gi_ab_ba = Gf_D2_Gi ()(alpha_f,beta_i)(b_f,a_i);
|
||||||
|
auto adjD4_g_D3_ab_ab = adjD4_g_D3 ()(alpha_f,beta_i)(a_f,b_i);
|
||||||
|
|
||||||
|
for (int gamma_i=0; gamma_i<Ns; gamma_i++) {
|
||||||
|
auto ee_Dq1_spec_ag_ac = ee * Dq1_spec ()(alpha_f,gamma_i)(a_f,c_i);
|
||||||
|
auto ee_Gf_D1_ag_bc = ee * Gf_D1 ()(alpha_f,gamma_i)(b_f,c_i);
|
||||||
|
for (int gamma_f=0; gamma_f<Ns; gamma_f++) {
|
||||||
|
auto ee_Dq1_spec_gg_cc = ee * Dq1_spec ()(gamma_f,gamma_i)(c_f,c_i);
|
||||||
|
auto adjD4_g_D3_gb_cb = adjD4_g_D3 ()(gamma_f,beta_i)(c_f,b_i);
|
||||||
|
auto D2_Gi_gb_ca = D2_Gi ()(gamma_f,beta_i)(c_f,a_i);
|
||||||
|
|
||||||
|
if(wick_contraction == 1) { // Do contraction III1
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Dq1_spec_gg_cc
|
||||||
|
* D2_Gi_ab_aa
|
||||||
|
* Gf_adjD4_g_D3_ab_bb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 2) { // Do contraction III2
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Dq1_spec_ag_ac
|
||||||
|
* Gf_D2_Gi_ab_ba
|
||||||
|
* adjD4_g_D3_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 3) { // Do contraction III3
|
||||||
|
result()(gamma_f,gamma_i)() -= ee_Gf_D1_ag_bc
|
||||||
|
* D2_Gi_gb_ca
|
||||||
|
* adjD4_g_D3_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 4) { // Do contraction III4
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Dq1_spec_gg_cc
|
||||||
|
* Gf_D2_Gi_ab_ba
|
||||||
|
* adjD4_g_D3_ab_ab;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 5) { // Do contraction III5
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Gf_D1_ag_bc
|
||||||
|
* D2_Gi_ab_aa
|
||||||
|
* adjD4_g_D3_gb_cb;
|
||||||
|
}
|
||||||
|
if(wick_contraction == 6) { // Do contraction III6
|
||||||
|
result()(gamma_f,gamma_i)() += ee_Dq1_spec_ag_ac
|
||||||
|
* D2_Gi_gb_ca
|
||||||
|
* Gf_adjD4_g_D3_ab_bb;
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The group indicates which inital state quarks the current is *
|
||||||
|
* connected to. It must be in the range 1-3. *
|
||||||
|
* The wick_contraction must be in the range 1-6 correspond to *
|
||||||
|
* the contractions given in the Hadrons documentation at *
|
||||||
|
* https://aportelli.github.io/Hadrons-doc/#/mcontraction */
|
||||||
|
template<class FImpl>
|
||||||
|
template <class mobj>
|
||||||
|
void BaryonUtils<FImpl>::Baryon_Gamma_3pt(
|
||||||
|
const PropagatorField &q_ti,
|
||||||
|
const mobj &Dq_spec1,
|
||||||
|
const mobj &Dq_spec2,
|
||||||
|
const PropagatorField &q_tf,
|
||||||
|
int group,
|
||||||
|
int wick_contraction,
|
||||||
|
const Gamma GammaJ,
|
||||||
|
const Gamma GammaBi,
|
||||||
|
const Gamma GammaBf,
|
||||||
|
SpinMatrixField &stn_corr)
|
||||||
|
{
|
||||||
|
GridBase *grid = q_tf.Grid();
|
||||||
|
|
||||||
|
autoView( vcorr, stn_corr, CpuWrite);
|
||||||
|
autoView( vq_ti , q_ti, CpuRead);
|
||||||
|
autoView( vq_tf , q_tf, CpuRead);
|
||||||
|
|
||||||
|
if (group == 1) {
|
||||||
|
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||||
|
auto Dq_ti = vq_ti[ss];
|
||||||
|
auto Dq_tf = vq_tf[ss];
|
||||||
|
sobj result=Zero();
|
||||||
|
Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
|
||||||
|
vcorr[ss] += result;
|
||||||
|
});//end loop over lattice sites
|
||||||
|
} else if (group == 2) {
|
||||||
|
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||||
|
auto Dq_ti = vq_ti[ss];
|
||||||
|
auto Dq_tf = vq_tf[ss];
|
||||||
|
sobj result=Zero();
|
||||||
|
Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
|
||||||
|
vcorr[ss] += result;
|
||||||
|
});//end loop over lattice sites
|
||||||
|
} else if (group == 3) {
|
||||||
|
accelerator_for(ss, grid->oSites(), grid->Nsimd(), {
|
||||||
|
auto Dq_ti = vq_ti[ss];
|
||||||
|
auto Dq_tf = vq_tf[ss];
|
||||||
|
sobj result=Zero();
|
||||||
|
Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result);
|
||||||
|
|
||||||
|
vcorr[ss] += result;
|
||||||
|
});//end loop over lattice sites
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***********************************************************************
|
||||||
|
* End of BaryonGamma3pt-function code. *
|
||||||
|
* *
|
||||||
* The following code is for Sigma -> N rare hypeon decays *
|
* The following code is for Sigma -> N rare hypeon decays *
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
|
@ -59,6 +59,20 @@ class GridTensorBase {};
|
|||||||
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
||||||
static constexpr int TensorLevel = Traits::TensorLevel
|
static constexpr int TensorLevel = Traits::TensorLevel
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Allows to turn scalar<scalar<scalar<double>>>> back to double.
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
template <class T>
|
||||||
|
accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
|
||||||
|
TensorRemove(T arg) {
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
template <class vtype>
|
||||||
|
accelerator_inline auto TensorRemove(iScalar<vtype> arg)
|
||||||
|
-> decltype(TensorRemove(arg._internal)) {
|
||||||
|
return TensorRemove(arg._internal);
|
||||||
|
}
|
||||||
|
|
||||||
template <class vtype>
|
template <class vtype>
|
||||||
class iScalar {
|
class iScalar {
|
||||||
public:
|
public:
|
||||||
@ -135,9 +149,10 @@ public:
|
|||||||
operator ComplexD() const {
|
operator ComplexD() const {
|
||||||
return (TensorRemove(_internal));
|
return (TensorRemove(_internal));
|
||||||
}
|
}
|
||||||
|
// instantiation of "Grid::iScalar<vtype>::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, <unnamed>=0, <unnamed>=0U]"
|
||||||
template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
|
template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
|
||||||
operator RealD() const {
|
operator RealD() const {
|
||||||
return TensorRemove(_internal);
|
return (RealD) TensorRemove(_internal);
|
||||||
}
|
}
|
||||||
template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
|
template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
|
||||||
operator Integer() const {
|
operator Integer() const {
|
||||||
@ -169,20 +184,6 @@ public:
|
|||||||
strong_inline scalar_type * end() { return begin() + Traits::count; }
|
strong_inline scalar_type * end() { return begin() + Traits::count; }
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
// Allows to turn scalar<scalar<scalar<double>>>> back to double.
|
|
||||||
///////////////////////////////////////////////////////////
|
|
||||||
template <class T>
|
|
||||||
accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
|
|
||||||
TensorRemove(T arg) {
|
|
||||||
return arg;
|
|
||||||
}
|
|
||||||
template <class vtype>
|
|
||||||
accelerator_inline auto TensorRemove(iScalar<vtype> arg)
|
|
||||||
-> decltype(TensorRemove(arg._internal)) {
|
|
||||||
return TensorRemove(arg._internal);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class vtype, int N>
|
template <class vtype, int N>
|
||||||
class iVector {
|
class iVector {
|
||||||
public:
|
public:
|
||||||
|
@ -28,6 +28,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#ifdef HAVE_MALLOC_MALLOC_H
|
#ifdef HAVE_MALLOC_MALLOC_H
|
||||||
#include <malloc/malloc.h>
|
#include <malloc/malloc.h>
|
||||||
#endif
|
#endif
|
||||||
@ -334,12 +336,11 @@ inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemc
|
|||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// CPU Target - No accelerator just thread instead
|
// CPU Target - No accelerator just thread instead
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned
|
||||||
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) )
|
#if ( (!defined(GRID_SYCL)) && (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) )
|
||||||
|
|
||||||
#undef GRID_SIMT
|
#undef GRID_SIMT
|
||||||
|
|
||||||
#define GRID_ALLOC_ALIGN (2*1024*1024) // 2MB aligned
|
|
||||||
|
|
||||||
#define accelerator
|
#define accelerator
|
||||||
#define accelerator_inline strong_inline
|
#define accelerator_inline strong_inline
|
||||||
#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ });
|
#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ });
|
||||||
@ -365,6 +366,14 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr);};
|
|||||||
|
|
||||||
#endif // CPU target
|
#endif // CPU target
|
||||||
|
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
inline void *acceleratorAllocCpu(size_t bytes){return _mm_malloc(bytes,GRID_ALLOC_ALIGN);};
|
||||||
|
inline void acceleratorFreeCpu (void *ptr){_mm_free(ptr);};
|
||||||
|
#else
|
||||||
|
inline void *acceleratorAllocCpu(size_t bytes){return memalign(GRID_ALLOC_ALIGN,bytes);};
|
||||||
|
inline void acceleratorFreeCpu (void *ptr){free(ptr);};
|
||||||
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// Synchronise across local threads for divergence resynch
|
// Synchronise across local threads for divergence resynch
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
|
@ -318,6 +318,11 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
Grid_debug_handler_init();
|
Grid_debug_handler_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Memory manager
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
MemoryManager::Init();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
// MPI initialisation
|
// MPI initialisation
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
@ -357,11 +362,6 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout << GridLogMessage << "================================================ "<<std::endl;
|
std::cout << GridLogMessage << "================================================ "<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
// Memory manager
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
MemoryManager::Init();
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// Reporting
|
// Reporting
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
|
594
tests/solver/Test_dwf_multigrid.cc
Normal file
594
tests/solver/Test_dwf_multigrid.cc
Normal file
@ -0,0 +1,594 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
|
||||||
|
#include <Grid/algorithms/iterative/BiCGSTAB.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class Field> class SolverWrapper : public LinearFunction<Field> {
|
||||||
|
private:
|
||||||
|
LinearOperatorBase<Field> & _Matrix;
|
||||||
|
OperatorFunction<Field> & _Solver;
|
||||||
|
LinearFunction<Field> & _Guess;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SolverWrapper(LinearOperatorBase<Field> &Matrix,
|
||||||
|
OperatorFunction<Field> &Solver,
|
||||||
|
LinearFunction<Field> &Guess)
|
||||||
|
: _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
_Guess(in,out);
|
||||||
|
_Solver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Must use a non-hermitian solver
|
||||||
|
template<class Matrix,class Field>
|
||||||
|
class PVdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||||
|
Matrix &_Mat;
|
||||||
|
Matrix &_PV;
|
||||||
|
public:
|
||||||
|
PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
|
||||||
|
|
||||||
|
void OpDiag (const Field &in, Field &out) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
|
void Op (const Field &in, Field &out){
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
_Mat.M(in,tmp);
|
||||||
|
_PV.Mdag(tmp,out);
|
||||||
|
}
|
||||||
|
void AdjOp (const Field &in, Field &out){
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
_PV.M(tmp,out);
|
||||||
|
_Mat.Mdag(in,tmp);
|
||||||
|
}
|
||||||
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void HermOp(const Field &in, Field &out){
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
|
||||||
|
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
|
||||||
|
HDCRPreconditioner(Aggregates &Agg,
|
||||||
|
FineOperator &Fine,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
auto CoarseGrid = _Aggregates.CoarseGrid;
|
||||||
|
CoarseVector Csrc(CoarseGrid);
|
||||||
|
CoarseVector Csol(CoarseGrid);
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Guesser, class CoarseSolver>
|
||||||
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
CoarseOperator & _CoarseOperator;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
Guesser & _Guess;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
|
||||||
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
|
FineOperator &Fine,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_CoarseOperator(Coarse),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=16;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
std::vector<int> blockc ({2,2,2,2});
|
||||||
|
const int nbasis= 32;
|
||||||
|
const int nbasisc= 32;
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
auto cclatt = clatt;
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
cclatt[d] = clatt[d]/blockc[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("./ckpoint_lat.4000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.001;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
DomainWallFermionR Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
|
|
||||||
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
{
|
||||||
|
int nb=nbasis/2;
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
}
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Will coarsen G5R5 M and G5R5 Mpv in G5R5 compatible way " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpPV(Dpv);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
Level1Op LDOpPV(*Coarse5d,1); LDOpPV.CoarsenOperator(FGrid,HermIndefOpPV,Aggregates);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Testing fine and coarse solvers " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
CoarseVector c_res(Coarse5d);
|
||||||
|
|
||||||
|
LatticeFermion f_src(FGrid); f_src=1.0;
|
||||||
|
LatticeFermion f_res(FGrid);
|
||||||
|
|
||||||
|
LatticeFermion f_src_e(FrbGrid); f_src_e=1.0;
|
||||||
|
LatticeFermion f_res_e(FrbGrid);
|
||||||
|
|
||||||
|
RealD tol=1.0e-8;
|
||||||
|
int MaxIt = 10000;
|
||||||
|
|
||||||
|
BiCGSTAB<CoarseVector> CoarseBiCGSTAB(tol,MaxIt);
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(tol,MaxIt);
|
||||||
|
// GeneralisedMinimalResidual<CoarseVector> CoarseGMRES(tol,MaxIt,20);
|
||||||
|
|
||||||
|
BiCGSTAB<LatticeFermion> FineBiCGSTAB(tol,MaxIt);
|
||||||
|
ConjugateGradient<LatticeFermion> FineCG(tol,MaxIt);
|
||||||
|
// GeneralisedMinimalResidual<LatticeFermion> FineGMRES(tol,MaxIt,20);
|
||||||
|
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> FineMdagM(Ddwf); // M^\dag M
|
||||||
|
PVdagMLinearOperator<DomainWallFermionR,LatticeFermion> FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe
|
||||||
|
SchurDiagOneOperator<DomainWallFermionR,LatticeFermion> FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e
|
||||||
|
|
||||||
|
MdagMLinearOperator<Level1Op,CoarseVector> CoarseMdagM(LDOp);
|
||||||
|
PVdagMLinearOperator<Level1Op,CoarseVector> CoarsePVdagM(LDOp,LDOpPV);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Fine CG unprec "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
f_res=Zero();
|
||||||
|
FineCG(FineMdagM,f_src,f_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Fine CG prec DiagMooee "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
f_res_e=Zero();
|
||||||
|
FineCG(FineDiagMooee,f_src_e,f_res_e);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Fine CG prec DiagOne "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
f_res_e=Zero();
|
||||||
|
FineCG(FineDiagOne,f_src_e,f_res_e);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Fine BiCGSTAB unprec "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
f_res=Zero();
|
||||||
|
FineBiCGSTAB(FinePVdagM,f_src,f_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Coarse BiCGSTAB "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
c_res=Zero();
|
||||||
|
CoarseBiCGSTAB(CoarsePVdagM,c_src,c_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Coarse CG unprec "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
c_res=Zero();
|
||||||
|
CoarseCG(CoarseMdagM,c_src,c_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
Chebyshev<CoarseVector> IRLCheby(0.03,12.0,71); // 1 iter
|
||||||
|
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseMdagM);
|
||||||
|
PlainHermOp<CoarseVector> IRLOp (CoarseMdagM);
|
||||||
|
int Nk=64;
|
||||||
|
int Nm=128;
|
||||||
|
int Nstop=Nk;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
std::vector<RealD> eval(Nm);
|
||||||
|
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||||
|
IRL.calc(eval,evec,c_src,Nconv);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running Coarse grid deflated solver "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
|
NormalEquations<CoarseVector> DeflCoarseCGNE (LDOp,CoarseCG,DeflCoarseGuesser);
|
||||||
|
c_res=Zero();
|
||||||
|
DeflCoarseCGNE(c_src,c_res);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running HDCR "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
ConjugateGradient<CoarseVector> CoarseMgridCG(0.001,1000);
|
||||||
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf);
|
||||||
|
|
||||||
|
typedef HDCRPreconditioner<vSpinColourVector, vTComplex,nbasis, NormalEquations<CoarseVector> > TwoLevelHDCR;
|
||||||
|
TwoLevelHDCR TwoLevelPrecon(Aggregates,
|
||||||
|
HermIndefOp,
|
||||||
|
FineSmoother,
|
||||||
|
DeflCoarseCGNE);
|
||||||
|
TwoLevelPrecon.Level(1);
|
||||||
|
// PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
|
||||||
|
f_res=Zero();
|
||||||
|
|
||||||
|
CoarseCG.Tolerance=0.02;
|
||||||
|
l1PGCR(f_src,f_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
BiCGSTAB<CoarseVector> CoarseMgridBiCGSTAB(0.01,1000);
|
||||||
|
BiCGSTAB<LatticeFermion> FineMgridBiCGSTAB(0.0,24);
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ZeroGuesser<LatticeFermion> FineZeroGuesser;
|
||||||
|
|
||||||
|
SolverWrapper<LatticeFermion> FineBiCGSmoother( FinePVdagM, FineMgridBiCGSTAB, FineZeroGuesser);
|
||||||
|
SolverWrapper<CoarseVector> CoarsePVdagMSolver(CoarsePVdagM,CoarseMgridBiCGSTAB,CoarseZeroGuesser);
|
||||||
|
typedef HDCRPreconditioner<vSpinColourVector, vTComplex,nbasis, SolverWrapper<CoarseVector> > TwoLevelMG;
|
||||||
|
|
||||||
|
TwoLevelMG _TwoLevelMG(Aggregates,
|
||||||
|
FinePVdagM,
|
||||||
|
FineBiCGSmoother,
|
||||||
|
CoarsePVdagMSolver);
|
||||||
|
_TwoLevelMG.Level(1);
|
||||||
|
|
||||||
|
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> pvPGCR(1.0e-8,100,FinePVdagM,_TwoLevelMG,16,16);
|
||||||
|
pvPGCR.Level(1);
|
||||||
|
|
||||||
|
f_res=Zero();
|
||||||
|
pvPGCR(f_src,f_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
|
||||||
|
}
|
375
tests/solver/Test_hw_multigrid.cc
Normal file
375
tests/solver/Test_hw_multigrid.cc
Normal file
@ -0,0 +1,375 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
//#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
|
||||||
|
#include <Grid/algorithms/iterative/BiCGSTAB.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class Field> class SolverWrapper : public LinearFunction<Field> {
|
||||||
|
private:
|
||||||
|
LinearOperatorBase<Field> & _Matrix;
|
||||||
|
OperatorFunction<Field> & _Solver;
|
||||||
|
LinearFunction<Field> & _Guess;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SolverWrapper(LinearOperatorBase<Field> &Matrix,
|
||||||
|
OperatorFunction<Field> &Solver,
|
||||||
|
LinearFunction<Field> &Guess)
|
||||||
|
: _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
_Guess(in,out);
|
||||||
|
_Solver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Must use a non-hermitian solver
|
||||||
|
template<class Matrix,class Field>
|
||||||
|
class PVdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||||
|
Matrix &_Mat;
|
||||||
|
Matrix &_PV;
|
||||||
|
public:
|
||||||
|
PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){};
|
||||||
|
|
||||||
|
void OpDiag (const Field &in, Field &out) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||||
|
assert(0);
|
||||||
|
};
|
||||||
|
void Op (const Field &in, Field &out){
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
_Mat.M(in,tmp);
|
||||||
|
_PV.Mdag(tmp,out);
|
||||||
|
}
|
||||||
|
void AdjOp (const Field &in, Field &out){
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
_PV.M(tmp,out);
|
||||||
|
_Mat.Mdag(in,tmp);
|
||||||
|
}
|
||||||
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
void HermOp(const Field &in, Field &out){
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
|
||||||
|
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
|
||||||
|
HDCRPreconditioner(Aggregates &Agg,
|
||||||
|
FineOperator &Fine,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
auto CoarseGrid = _Aggregates.CoarseGrid;
|
||||||
|
CoarseVector Csrc(CoarseGrid);
|
||||||
|
CoarseVector Csol(CoarseGrid);
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=16;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
const int nbasis= 8;
|
||||||
|
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(Ls,Coarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("./ckpoint_lat.4000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.001;
|
||||||
|
RealD M5=1.8;
|
||||||
|
WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,-M5);
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
DomainWallFermionR Dpv (Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<WilsonFermionR,LatticeFermion> SubspaceOp(Dw);
|
||||||
|
|
||||||
|
Subspace Aggregates4D(Coarse4d,UGrid,0);
|
||||||
|
Subspace Aggregates5D(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " 4D subspace build " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
int nb=nbasis/2;
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
Aggregates4D.CreateSubspaceChebyshev(RNG4,SubspaceOp,nb,60.0,0.02,500,100,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
Aggregates4D.subspace[n+nb]= Aggregates4D.subspace[n] - g5 * Aggregates4D.subspace[n];
|
||||||
|
Aggregates4D.subspace[n] = Aggregates4D.subspace[n] + g5 * Aggregates4D.subspace[n];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Promote to 5D basis " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
for(int n=0;n<nbasis;n++){
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
InsertSlice(Aggregates4D.subspace[n],Aggregates5D.subspace[n],s,0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Coarsen the operator " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
|
||||||
|
NonHermitianLinearOperator<DomainWallFermionR,LatticeFermion> LinOpDwf(Ddwf);
|
||||||
|
|
||||||
|
Level1Op LDOp (*Coarse5d,0);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << " Callinig Coarsen the operator " <<std::endl;
|
||||||
|
LDOp.CoarsenOperator(FGrid,LinOpDwf,Aggregates5D);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Coarse CG unprec "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
CoarseVector c_res(Coarse5d);
|
||||||
|
|
||||||
|
LatticeFermion f_src(FGrid); f_src=1.0;
|
||||||
|
LatticeFermion f_res(FGrid);
|
||||||
|
|
||||||
|
RealD tol=1.0e-8;
|
||||||
|
int MaxIt = 10000;
|
||||||
|
|
||||||
|
MdagMLinearOperator<Level1Op,CoarseVector> CoarseMdagM(LDOp);
|
||||||
|
BiCGSTAB<CoarseVector> CoarseBiCGSTAB(tol,MaxIt);
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(tol,MaxIt);
|
||||||
|
|
||||||
|
c_res=Zero();
|
||||||
|
CoarseCG(CoarseMdagM,c_src,c_res);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Solve " <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user