1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Merge branch 'feature/dirichlet' into feature/dirichlet-gparity

This commit is contained in:
Peter Boyle 2022-07-01 12:12:50 -04:00
commit 1f903d9296
91 changed files with 3457 additions and 1258 deletions

View File

@ -44,14 +44,22 @@ directory
#ifdef __NVCC__ #ifdef __NVCC__
//disables nvcc specific warning in json.hpp //disables nvcc specific warning in json.hpp
#pragma clang diagnostic ignored "-Wdeprecated-register" #pragma clang diagnostic ignored "-Wdeprecated-register"
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
//disables nvcc specific warning in json.hpp
#pragma nv_diag_suppress unsigned_compare_with_zero
#pragma nv_diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files
#pragma nv_diag_suppress esa_on_defaulted_function_ignored
#pragma nv_diag_suppress extra_semicolon
#else
//disables nvcc specific warning in json.hpp
#pragma diag_suppress unsigned_compare_with_zero #pragma diag_suppress unsigned_compare_with_zero
#pragma diag_suppress cast_to_qualified_type #pragma diag_suppress cast_to_qualified_type
//disables nvcc specific warning in many files //disables nvcc specific warning in many files
#pragma diag_suppress esa_on_defaulted_function_ignored #pragma diag_suppress esa_on_defaulted_function_ignored
#pragma diag_suppress extra_semicolon #pragma diag_suppress extra_semicolon
#endif
//Eigen only
#endif #endif
// Disable vectorisation in Eigen on the Power8/9 and PowerPC // Disable vectorisation in Eigen on the Power8/9 and PowerPC

View File

@ -14,7 +14,11 @@
/* NVCC save and restore compile environment*/ /* NVCC save and restore compile environment*/
#ifdef __NVCC__ #ifdef __NVCC__
#pragma push #pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress code_is_unreachable
#else
#pragma diag_suppress code_is_unreachable #pragma diag_suppress code_is_unreachable
#endif
#pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__CUDA_ARCH__")
#pragma push_macro("__NVCC__") #pragma push_macro("__NVCC__")
#pragma push_macro("__CUDACC__") #pragma push_macro("__CUDACC__")

View File

@ -120,6 +120,9 @@ public:
SolverTimer.Start(); SolverTimer.Start();
int k; int k;
for (k = 1; k <= MaxIterations; k++) { for (k = 1; k <= MaxIterations; k++) {
GridStopWatch IterationTimer;
IterationTimer.Start();
c = cp; c = cp;
MatrixTimer.Start(); MatrixTimer.Start();
@ -152,8 +155,14 @@ public:
LinearCombTimer.Stop(); LinearCombTimer.Stop();
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k IterationTimer.Stop();
if ( (k % 500) == 0 ) {
std::cout << GridLogMessage << "ConjugateGradient: Iteration " << k
<< " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl; << " residual " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
} else {
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << sqrt(cp/ssq) << " target " << Tolerance << " took " << IterationTimer.Elapsed() << std::endl;
}
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@ -170,13 +179,13 @@ public:
<< "\tTrue residual " << true_residual << "\tTrue residual " << true_residual
<< "\tTarget " << Tolerance << std::endl; << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogIterative << "Time breakdown "<<std::endl; std::cout << GridLogMessage << "Time breakdown "<<std::endl;
std::cout << GridLogIterative << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tInner " << InnerTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -44,7 +44,7 @@ public:
using OperatorFunction<Field>::operator(); using OperatorFunction<Field>::operator();
RealD Tolerance; // RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
std::vector<int> IterationsToCompleteShift; // Iterations for this shift std::vector<int> IterationsToCompleteShift; // Iterations for this shift
@ -325,7 +325,7 @@ public:
std::cout << GridLogMessage << "Time Breakdown "<<std::endl; std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
IterationsToComplete = k; IterationsToComplete = k;

View File

@ -113,7 +113,43 @@ public:
blockPromote(guess_coarse,guess,subspace); blockPromote(guess_coarse,guess,subspace);
guess.Checkerboard() = src.Checkerboard(); guess.Checkerboard() = src.Checkerboard();
}; };
};
void operator()(const std::vector<FineField> &src,std::vector<FineField> &guess) {
int Nevec = (int)evec_coarse.size();
int Nsrc = (int)src.size();
// make temp variables
std::vector<CoarseField> src_coarse(Nsrc,evec_coarse[0].Grid());
std::vector<CoarseField> guess_coarse(Nsrc,evec_coarse[0].Grid());
//Preporcessing
std::cout << GridLogMessage << "Start BlockProject for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
guess_coarse[j] = Zero();
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockProject(src_coarse[j],src[j],subspace);
}
//deflation set up for eigen vector batchsize 1 and source batch size equal number of sources
std::cout << GridLogMessage << "Start ProjectAccum for loop" << std::endl;
for (int i=0;i<Nevec;i++)
{
std::cout << GridLogMessage << "ProjectAccum Nvec: " << i << std::endl;
const CoarseField & tmp = evec_coarse[i];
for (int j=0;j<Nsrc;j++)
{
axpy(guess_coarse[j],TensorRemove(innerProduct(tmp,src_coarse[j])) / eval_coarse[i],tmp,guess_coarse[j]);
}
}
//postprocessing
std::cout << GridLogMessage << "Start BlockPromote for loop" << std::endl;
for (int j=0;j<Nsrc;j++)
{
std::cout << GridLogMessage << "BlockProject iter: " << j << std::endl;
blockPromote(guess_coarse[j],guess[j],subspace);
guess[j].Checkerboard() = src[j].Checkerboard();
}
};
};

View File

@ -372,7 +372,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
double off_node_bytes=0.0; double off_node_bytes=0.0;
int tag; int tag;
if ( dox ) { if ( dor ) {
if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) { if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32; tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
@ -382,7 +382,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
} }
} }
if (dor) { if (dox) {
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32; tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
@ -390,16 +390,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
list.push_back(xrq); list.push_back(xrq);
off_node_bytes+=bytes; off_node_bytes+=bytes;
} else { } else {
// TODO : make a OMP loop on CPU, call threaded bcopy
void *shm = (void *) this->ShmBufferTranslate(dest,recv); void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL); assert(shm!=NULL);
// std::cout <<"acceleratorCopyDeviceToDeviceAsynch"<< std::endl;
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes); acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
} }
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir); this->StencilSendToRecvFromComplete(list,dir);
list.resize(0);
} }
return off_node_bytes; return off_node_bytes;

View File

@ -125,6 +125,12 @@ void pokeSite(const sobj &s,Lattice<vobj> &l,const Coordinate &site){
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Peek a scalar object from the SIMD array // Peek a scalar object from the SIMD array
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
template<class vobj>
typename vobj::scalar_object peekSite(const Lattice<vobj> &l,const Coordinate &site){
typename vobj::scalar_object s;
peekSite(s,l,site);
return s;
}
template<class vobj,class sobj> template<class vobj,class sobj>
void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){

View File

@ -232,6 +232,7 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
const uint64_t sites = grid->oSites(); const uint64_t sites = grid->oSites();
// Might make all code paths go this way. // Might make all code paths go this way.
#if 0
typedef decltype(innerProductD(vobj(),vobj())) inner_t; typedef decltype(innerProductD(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites); Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0]; auto inner_tmp_v = &inner_tmp[0];
@ -241,15 +242,31 @@ inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &
autoView( right_v,right, AcceleratorRead); autoView( right_v,right, AcceleratorRead);
// GPU - SIMT lane compliance... // GPU - SIMT lane compliance...
accelerator_for( ss, sites, 1,{ accelerator_for( ss, sites, nsimd,{
auto x_l = left_v[ss]; auto x_l = left_v(ss);
auto y_l = right_v[ss]; auto y_l = right_v(ss);
inner_tmp_v[ss]=innerProductD(x_l,y_l); coalescedWrite(inner_tmp_v[ss],innerProductD(x_l,y_l));
}); });
} }
#else
typedef decltype(innerProduct(vobj(),vobj())) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
{
autoView( left_v , left, AcceleratorRead);
autoView( right_v,right, AcceleratorRead);
// GPU - SIMT lane compliance...
accelerator_for( ss, sites, nsimd,{
auto x_l = left_v(ss);
auto y_l = right_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l));
});
}
#endif
// This is in single precision and fails some tests // This is in single precision and fails some tests
auto anrm = sum(inner_tmp_v,sites); auto anrm = sumD(inner_tmp_v,sites);
nrm = anrm; nrm = anrm;
return nrm; return nrm;
} }
@ -283,7 +300,7 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
conformable(x,y); conformable(x,y);
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type; // typedef typename vobj::vector_typeD vector_type;
RealD nrm; RealD nrm;
GridBase *grid = x.Grid(); GridBase *grid = x.Grid();
@ -295,17 +312,29 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
autoView( x_v, x, AcceleratorRead); autoView( x_v, x, AcceleratorRead);
autoView( y_v, y, AcceleratorRead); autoView( y_v, y, AcceleratorRead);
autoView( z_v, z, AcceleratorWrite); autoView( z_v, z, AcceleratorWrite);
#if 0
typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t;
Vector<inner_t> inner_tmp(sites); Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0]; auto inner_tmp_v = &inner_tmp[0];
accelerator_for( ss, sites, 1,{ accelerator_for( ss, sites, nsimd,{
auto tmp = a*x_v[ss]+b*y_v[ss]; auto tmp = a*x_v(ss)+b*y_v(ss);
inner_tmp_v[ss]=innerProductD(tmp,tmp); coalescedWrite(inner_tmp_v[ss],innerProductD(tmp,tmp));
z_v[ss]=tmp; coalescedWrite(z_v[ss],tmp);
}); });
nrm = real(TensorRemove(sum(inner_tmp_v,sites))); nrm = real(TensorRemove(sum(inner_tmp_v,sites)));
#else
typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t;
Vector<inner_t> inner_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
accelerator_for( ss, sites, nsimd,{
auto tmp = a*x_v(ss)+b*y_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp));
coalescedWrite(z_v[ss],tmp);
});
nrm = real(TensorRemove(sumD(inner_tmp_v,sites)));
#endif
grid->GlobalSum(nrm); grid->GlobalSum(nrm);
return nrm; return nrm;
} }

View File

@ -424,9 +424,32 @@ public:
// MT implementation does not implement fast discard even though // MT implementation does not implement fast discard even though
// in principle this is possible // in principle this is possible
//////////////////////////////////////////////// ////////////////////////////////////////////////
#if 1
thread_for( lidx, _grid->lSites(), {
int gidx;
int o_idx;
int i_idx;
int rank;
Coordinate pcoor;
Coordinate lcoor;
Coordinate gcoor;
_grid->LocalIndexToLocalCoor(lidx,lcoor);
pcoor=_grid->ThisProcessorCoor();
_grid->ProcessorCoorLocalCoorToGlobalCoor(pcoor,lcoor,gcoor);
_grid->GlobalCoorToGlobalIndex(gcoor,gidx);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
assert(rank == _grid->ThisRank() );
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
});
#else
// Everybody loops over global volume. // Everybody loops over global volume.
thread_for( gidx, _grid->_gsites, { thread_for( gidx, _grid->_gsites, {
// Where is it? // Where is it?
int rank; int rank;
int o_idx; int o_idx;
@ -443,6 +466,7 @@ public:
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
} }
}); });
#endif
#else #else
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Machine and thread decomposition dependent seeding is efficient // Machine and thread decomposition dependent seeding is efficient

View File

@ -65,6 +65,7 @@ GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
GridLogger GridLogError (1, "Error" , GridLogColours, "RED"); GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE"); GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
@ -72,9 +73,10 @@ GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE"); GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
void GridLogConfigure(std::vector<std::string> &logstreams) { void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(0); GridLogError.Active(1);
GridLogWarning.Active(0); GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on GridLogMessage.Active(1); // at least the messages should be always on
GridLogMemory.Active(0); // at least the messages should be always on
GridLogIterative.Active(0); GridLogIterative.Active(0);
GridLogDebug.Active(0); GridLogDebug.Active(0);
GridLogPerformance.Active(0); GridLogPerformance.Active(0);
@ -83,7 +85,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogHMC.Active(1); GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) { for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Error")) GridLogError.Active(1); if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);

View File

@ -183,6 +183,7 @@ extern GridLogger GridLogPerformance;
extern GridLogger GridLogIterative ; extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ; extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC; extern GridLogger GridLogHMC;
extern GridLogger GridLogMemory;
extern Colours GridLogColours; extern Colours GridLogColours;
std::string demangle(const char* name) ; std::string demangle(const char* name) ;

View File

@ -31,6 +31,7 @@ directory
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <string>
#include <map> #include <map>
#include <pwd.h> #include <pwd.h>
@ -654,7 +655,8 @@ class IldgWriter : public ScidacWriter {
// Fill ILDG header data struct // Fill ILDG header data struct
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
ildgFormat ildgfmt ; ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge"); const std::string stNC = std::to_string( Nc ) ;
ildgfmt.field = std::string("su"+stNC+"gauge");
if ( format == std::string("IEEE32BIG") ) { if ( format == std::string("IEEE32BIG") ) {
ildgfmt.precision = 32; ildgfmt.precision = 32;
@ -871,7 +873,8 @@ class IldgReader : public GridLimeReader {
} else { } else {
assert(found_ildgFormat); assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") ); const std::string stNC = std::to_string( Nc ) ;
assert ( ildgFormat_.field == std::string("su"+stNC+"gauge") );
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can // Populate our Grid metadata as best we can
@ -879,7 +882,7 @@ class IldgReader : public GridLimeReader {
std::ostringstream vers; vers << ildgFormat_.version; std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str(); FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); FieldMetaData_.data_type = std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC);
FieldMetaData_.nd=4; FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4); FieldMetaData_.dimension.resize(4);

View File

@ -6,8 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -182,8 +182,8 @@ class GaugeStatistics
public: public:
void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{ {
header.link_trace=WilsonLoops<Impl>::linkTrace(data); header.link_trace = WilsonLoops<Impl>::linkTrace(data);
header.plaquette =WilsonLoops<Impl>::avgPlaquette(data); header.plaquette = WilsonLoops<Impl>::avgPlaquette(data);
} }
}; };
typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics; typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics;
@ -203,20 +203,24 @@ template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzCo
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm) inline void reconstruct3(LorentzColourMatrix & cm)
{ {
const int x=0; assert( Nc < 4 && Nc > 1 ) ;
const int y=1;
const int z=2;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
#if Nc == 2
cm(mu)()(1,0) = -adj(cm(mu)()(0,y)) ;
cm(mu)()(1,1) = adj(cm(mu)()(0,x)) ;
#else
const int x=0 , y=1 , z=2 ; // a little disinenuous labelling
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
#endif
} }
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage // Some data types for intermediate storage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, Nc-1>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3; typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
@ -278,7 +282,6 @@ struct GaugeSimpleMunger{
template <class fobj, class sobj> template <class fobj, class sobj>
struct GaugeSimpleUnmunger { struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) { void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) { for (int i = 0; i < Nc; i++) {
@ -317,8 +320,8 @@ template<class fobj,class sobj>
struct Gauge3x2munger{ struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){ void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){ for(int i=0;i<Nc-1;i++){
for(int j=0;j<3;j++){ for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)(i)(j); out(mu)()(i,j) = in(mu)(i)(j);
}} }}
} }
@ -330,8 +333,8 @@ template<class fobj,class sobj>
struct Gauge3x2unmunger{ struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){ void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){ for(int i=0;i<Nc-1;i++){
for(int j=0;j<3;j++){ for(int j=0;j<Nc;j++){
out(mu)(i)(j) = in(mu)()(i,j); out(mu)(i)(j) = in(mu)()(i,j);
}} }}
} }

View File

@ -9,6 +9,7 @@
Author: Matt Spraggs <matthew.spraggs@gmail.com> Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -30,6 +31,8 @@
#ifndef GRID_NERSC_IO_H #ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H #define GRID_NERSC_IO_H
#include <string>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
using namespace Grid; using namespace Grid;
@ -147,15 +150,17 @@ public:
std::string format(header.floating_point); std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG")); const int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); const int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); const int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); const int ieee64 = (format == std::string("IEEE64") || \
format == std::string("IEEE64LITTLE"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) { const std::string stNC = std::to_string( Nc ) ;
if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F> BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F>
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format, (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
@ -166,7 +171,7 @@ public:
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format, (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
} }
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { } else if ( header.data_type == std::string("4D_SU"+stNC+"_GAUGE_"+stNC+"x"+stNC) ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF> BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format, (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
@ -211,27 +216,29 @@ public:
template<class GaugeStats=PeriodicGaugeStatistics> template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file, std::string file,
std::string ens_label = std::string("DWF")) std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{ {
writeConfiguration(Umu,file,0,1,ens_label); writeConfiguration(Umu,file,0,1,ens_label,ens_id,sequence_number);
} }
template<class GaugeStats=PeriodicGaugeStatistics> template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file, std::string file,
int two_row, int two_row,
int bits32, int bits32,
std::string ens_label = std::string("DWF")) std::string ens_label = std::string("DWF"),
std::string ens_id = std::string("UKQCD"),
unsigned int sequence_number = 1)
{ {
typedef vLorentzColourMatrixD vobj; typedef vLorentzColourMatrixD vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
FieldMetaData header; FieldMetaData header;
/////////////////////////////////////////// header.sequence_number = sequence_number;
// Following should become arguments header.ensemble_id = ens_id;
///////////////////////////////////////////
header.sequence_number = 1;
header.ensemble_id = std::string("UKQCD");
header.ensemble_label = ens_label; header.ensemble_label = ens_label;
header.hdr_version = "1.0" ;
typedef LorentzColourMatrixD fobj3D; typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D; typedef LorentzColour2x3D fobj2D;
@ -245,10 +252,14 @@ public:
uint64_t offset; uint64_t offset;
// Sod it -- always write 3x3 double // Sod it -- always write NcxNc double
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); const std::string stNC = std::to_string( Nc ) ;
GaugeSimpleUnmunger<fobj3D,sobj> munge; if( two_row ) {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE" );
} else {
header.data_type = std::string("4D_SU" + stNC + "_GAUGE_" + stNC + "x" + stNC );
}
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
@ -256,8 +267,15 @@ public:
grid->Broadcast(0,(void *)&offset,sizeof(offset)); grid->Broadcast(0,(void *)&offset,sizeof(offset));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
if( two_row ) {
Gauge3x2unmunger<fobj2D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleUnmunger<fobj3D,sobj> munge;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
}
header.checksum = nersc_csum; header.checksum = nersc_csum;
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
writeHeader(header,file); writeHeader(header,file);
@ -290,7 +308,6 @@ public:
MachineCharacteristics(header); MachineCharacteristics(header);
uint64_t offset; uint64_t offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48"); header.data_type = std::string("RANLUX48");

View File

@ -72,17 +72,9 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
inline uint64_t cyclecount(void){ inline uint64_t cyclecount(void){
return 0; return 0;
} }
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
#define __SSC_STOP __SSC_MARK(0x110)
#define __SSC_START __SSC_MARK(0x111)
#else #else
#define __SSC_MARK(mark)
#define __SSC_STOP
#define __SSC_START
/* /*
* cycle counters arch dependent * cycle counters arch dependent
*/ */

View File

@ -39,9 +39,9 @@ NAMESPACE_BEGIN(Grid)
// C++11 time facilities better? // C++11 time facilities better?
inline double usecond(void) { inline double usecond(void) {
struct timeval tv; struct timeval tv;
#ifdef TIMERS_ON tv.tv_sec = 0;
tv.tv_usec = 0;
gettimeofday(&tv,NULL); gettimeofday(&tv,NULL);
#endif
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec; return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
} }

View File

@ -16,8 +16,12 @@
#ifdef __NVCC__ #ifdef __NVCC__
#pragma push #pragma push
#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 5)
#pragma nv_diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#else
#pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" #pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning"
#endif #endif
#endif
#include "pugixml.h" #include "pugixml.h"

View File

@ -38,24 +38,32 @@ NAMESPACE_BEGIN(Grid);
struct GparityWilsonImplParams { struct GparityWilsonImplParams {
Coordinate twists; Coordinate twists;
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
GparityWilsonImplParams() : twists(Nd, 0) {}; Coordinate dirichlet; // Blocksize of dirichlet BCs
GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {};
}; };
struct WilsonImplParams { struct WilsonImplParams {
bool overlapCommsCompute; bool overlapCommsCompute;
Coordinate dirichlet; // Blocksize of dirichlet BCs
AcceleratorVector<Real,Nd> twist_n_2pi_L; AcceleratorVector<Real,Nd> twist_n_2pi_L;
AcceleratorVector<Complex,Nd> boundary_phases; AcceleratorVector<Complex,Nd> boundary_phases;
WilsonImplParams() { WilsonImplParams() {
dirichlet.resize(Nd,0);
boundary_phases.resize(Nd, 1.0); boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0); twist_n_2pi_L.resize(Nd, 0.0);
}; };
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) { WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0); twist_n_2pi_L.resize(Nd, 0.0);
dirichlet.resize(Nd,0);
} }
}; };
struct StaggeredImplParams { struct StaggeredImplParams {
StaggeredImplParams() {}; Coordinate dirichlet; // Blocksize of dirichlet BCs
StaggeredImplParams()
{
dirichlet.resize(Nd,0);
};
}; };
struct OneFlavourRationalParams : Serializable { struct OneFlavourRationalParams : Serializable {

View File

@ -68,9 +68,17 @@ public:
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Support for MADWF tricks // Support for MADWF tricks
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return mass; }; RealD Mass(void) { return (mass_plus + mass_minus) / 2.0; };
RealD MassPlus(void) { return mass_plus; };
RealD MassMinus(void) { return mass_minus; };
void SetMass(RealD _mass) { void SetMass(RealD _mass) {
mass=_mass; mass_plus=mass_minus=_mass;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ;
void SetMass(RealD _mass_plus, RealD _mass_minus) {
mass_plus=_mass_plus;
mass_minus=_mass_minus;
SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs
} ; } ;
void P(const FermionField &psi, FermionField &chi); void P(const FermionField &psi, FermionField &chi);
@ -108,7 +116,7 @@ public:
void MeooeDag5D (const FermionField &in, FermionField &out); void MeooeDag5D (const FermionField &in, FermionField &out);
// protected: // protected:
RealD mass; RealD mass_plus, mass_minus;
// Save arguments to SetCoefficientsInternal // Save arguments to SetCoefficientsInternal
Vector<Coeff_t> _gamma; Vector<Coeff_t> _gamma;

View File

@ -0,0 +1,435 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermionImplementation.h
Copyright (C) 2017 - 2022
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
////////////////////////////////////////////
// Standard Clover
// (4+m0) + csw * clover_term
// Exp Clover
// (4+m0) * exp(csw/(4+m0) clover_term)
// = (4+m0) + csw * clover_term + ...
////////////////////////////////////////////
NAMESPACE_BEGIN(Grid);
//////////////////////////////////
// Generic Standard Clover
//////////////////////////////////
template<class Impl>
class CloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
static void Instantiate(CloverField& CloverTerm, CloverField& CloverTermInv, RealD csw_t, RealD diag_mass) {
GridBase *grid = CloverTerm.Grid();
CloverTerm += diag_mass;
int lvol = grid->lSites();
int DimRep = Impl::Dimension;
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
EigenInvCloverOp = EigenCloverOp.inverse();
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Generic Exp Clover
//////////////////////////////////
template<class Impl>
class ExpCloverHelpers: public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef WilsonCloverHelpers<Impl> Helpers;
// Can this be avoided?
static void IdentityTimesC(const CloverField& in, RealD c) {
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
});
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplClover<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplClover<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void Instantiate(CloverField& Clover, CloverField& CloverInv, RealD csw_t, RealD diag_mass) {
GridBase* grid = Clover.Grid();
CloverField ExpClover(grid);
int NMAX = getNMAX(Clover, 3.*csw_t/diag_mass);
Clover *= (1.0/diag_mass);
// Taylor expansion, slow but generic
// Horner scheme: a0 + a1 x + a2 x^2 + .. = a0 + x (a1 + x(...))
// qN = cN
// qn = cn + qn+1 X
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++)
cn[i] = cn[i-1] / RealD(i);
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * Clover + cn[i];
// prepare inverse
CloverInv = (-1.0)*Clover;
Clover = ExpClover * diag_mass;
ExpClover = Zero();
IdentityTimesC(ExpClover, cn[NMAX]);
for (int i=NMAX-1; i>=0; i--)
ExpClover = ExpClover * CloverInv + cn[i];
CloverInv = ExpClover * (1.0/diag_mass);
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
//////////////////////////////////
// Compact Standard Clover
//////////////////////////////////
template<class Impl>
class CompactCloverHelpers: public CompactWilsonCloverHelpers<Impl>,
public WilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
typedef WilsonCloverHelpers<Impl> Helpers;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void MassTerm(CloverField& Clover, RealD diag_mass) {
Clover += diag_mass;
}
static void Exponentiate_Clover(CloverDiagonalField& Diagonal,
CloverTriangleField& Triangle,
RealD csw_t, RealD diag_mass) {
// Do nothing
}
// TODO: implement Cmunu for better performances with compact layout, but don't do it
// here, but rather in WilsonCloverHelpers.h -> CompactWilsonCloverHelpers
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
return Helpers::Cmunu(U, lambda, mu, nu);
}
};
//////////////////////////////////
// Compact Exp Clover
//////////////////////////////////
template<class Impl>
class CompactExpCloverHelpers: public CompactWilsonCloverHelpers<Impl> {
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
template <typename vtype> using iImplClover = iScalar<iMatrix<iMatrix<vtype, Impl::Dimension>, Ns>>;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
static void MassTerm(CloverField& Clover, RealD diag_mass) {
// do nothing!
// mass term is multiplied to exp(Clover) below
}
static int getNMAX(RealD prec, RealD R) {
/* compute stop condition for exponential */
int NMAX=1;
RealD cond=R*R/2.;
while (cond*std::exp(R)>prec) {
NMAX++;
cond*=R/(double)(NMAX+1);
}
return NMAX;
}
static int getNMAX(Lattice<iImplCloverDiagonal<vComplexD>> &t, RealD R) {return getNMAX(1e-12,R);}
static int getNMAX(Lattice<iImplCloverDiagonal<vComplexF>> &t, RealD R) {return getNMAX(1e-6,R);}
static void ExponentiateHermitean6by6(const iMatrix<ComplexD,6> &arg, const RealD& alpha, const std::vector<RealD>& cN, const int Niter, iMatrix<ComplexD,6>& dest){
typedef iMatrix<ComplexD,6> mat;
RealD qn[6];
RealD qnold[6];
RealD p[5];
RealD trA2, trA3, trA4;
mat A2, A3, A4, A5;
A2 = alpha * alpha * arg * arg;
A3 = alpha * arg * A2;
A4 = A2 * A2;
A5 = A2 * A3;
trA2 = toReal( trace(A2) );
trA3 = toReal( trace(A3) );
trA4 = toReal( trace(A4));
p[0] = toReal( trace(A3 * A3)) / 6.0 - 0.125 * trA4 * trA2 - trA3 * trA3 / 18.0 + trA2 * trA2 * trA2/ 48.0;
p[1] = toReal( trace(A5)) / 5.0 - trA3 * trA2 / 6.0;
p[2] = toReal( trace(A4)) / 4.0 - 0.125 * trA2 * trA2;
p[3] = trA3 / 3.0;
p[4] = 0.5 * trA2;
qnold[0] = cN[Niter];
qnold[1] = 0.0;
qnold[2] = 0.0;
qnold[3] = 0.0;
qnold[4] = 0.0;
qnold[5] = 0.0;
for(int i = Niter-1; i >= 0; i--)
{
qn[0] = p[0] * qnold[5] + cN[i];
qn[1] = p[1] * qnold[5] + qnold[0];
qn[2] = p[2] * qnold[5] + qnold[1];
qn[3] = p[3] * qnold[5] + qnold[2];
qn[4] = p[4] * qnold[5] + qnold[3];
qn[5] = qnold[4];
qnold[0] = qn[0];
qnold[1] = qn[1];
qnold[2] = qn[2];
qnold[3] = qn[3];
qnold[4] = qn[4];
qnold[5] = qn[5];
}
mat unit(1.0);
dest = (qn[0] * unit + qn[1] * alpha * arg + qn[2] * A2 + qn[3] * A3 + qn[4] * A4 + qn[5] * A5);
}
static void Exponentiate_Clover(CloverDiagonalField& Diagonal, CloverTriangleField& Triangle, RealD csw_t, RealD diag_mass) {
GridBase* grid = Diagonal.Grid();
int NMAX = getNMAX(Diagonal, 3.*csw_t/diag_mass);
//
// Implementation completely in Daniel's layout
//
// Taylor expansion with Cayley-Hamilton recursion
// underlying Horner scheme as above
std::vector<RealD> cn(NMAX+1);
cn[0] = 1.0;
for (int i=1; i<=NMAX; i++){
cn[i] = cn[i-1] / RealD(i);
}
// Taken over from Daniel's implementation
conformable(Diagonal, Triangle);
long lsites = grid->lSites();
{
typedef typename SiteCloverDiagonal::scalar_object scalar_object_diagonal;
typedef typename SiteCloverTriangle::scalar_object scalar_object_triangle;
typedef iMatrix<ComplexD,6> mat;
autoView(diagonal_v, Diagonal, CpuRead);
autoView(triangle_v, Triangle, CpuRead);
autoView(diagonalExp_v, Diagonal, CpuWrite);
autoView(triangleExp_v, Triangle, CpuWrite);
thread_for(site, lsites, { // NOTE: Not on GPU because of (peek/poke)LocalSite
mat srcCloverOpUL(0.0); // upper left block
mat srcCloverOpLR(0.0); // lower right block
mat ExpCloverOp;
scalar_object_diagonal diagonal_tmp = Zero();
scalar_object_diagonal diagonal_exp_tmp = Zero();
scalar_object_triangle triangle_tmp = Zero();
scalar_object_triangle triangle_exp_tmp = Zero();
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
peekLocalSite(diagonal_tmp, diagonal_v, lcoor);
peekLocalSite(triangle_tmp, triangle_v, lcoor);
int block;
block = 0;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
srcCloverOpUL(i,j) = static_cast<ComplexD>(TensorRemove(diagonal_tmp()(block)(i)));
}
else{
srcCloverOpUL(i,j) = static_cast<ComplexD>(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j)));
}
}
}
block = 1;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
srcCloverOpLR(i,j) = static_cast<ComplexD>(TensorRemove(diagonal_tmp()(block)(i)));
}
else{
srcCloverOpLR(i,j) = static_cast<ComplexD>(TensorRemove(CompactHelpers::triangle_elem(triangle_tmp, block, i, j)));
}
}
}
// exp(Clover)
ExponentiateHermitean6by6(srcCloverOpUL,1.0/diag_mass,cn,NMAX,ExpCloverOp);
block = 0;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j);
}
else if(i < j){
triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j);
}
}
}
ExponentiateHermitean6by6(srcCloverOpLR,1.0/diag_mass,cn,NMAX,ExpCloverOp);
block = 1;
for(int i = 0; i < 6; i++){
for(int j = 0; j < 6; j++){
if (i == j){
diagonal_exp_tmp()(block)(i) = ExpCloverOp(i,j);
}
else if(i < j){
triangle_exp_tmp()(block)(CompactHelpers::triangle_index(i, j)) = ExpCloverOp(i,j);
}
}
}
pokeLocalSite(diagonal_exp_tmp, diagonalExp_v, lcoor);
pokeLocalSite(triangle_exp_tmp, triangleExp_v, lcoor);
});
}
Diagonal *= diag_mass;
Triangle *= diag_mass;
}
static GaugeLinkField Cmunu(std::vector<GaugeLinkField> &U, GaugeLinkField &lambda, int mu, int nu) {
assert(0);
return lambda;
}
};
NAMESPACE_END(Grid);

View File

@ -31,6 +31,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h> #include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h> #include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -85,7 +86,7 @@ NAMESPACE_BEGIN(Grid);
// + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site // + (2 * 1 + 4 * 1/2) triangle parts = 4 triangle parts = 60 complex words per site
// = 84 complex words per site // = 84 complex words per site
template<class Impl> template<class Impl, class CloverHelpers>
class CompactWilsonCloverFermion : public WilsonFermion<Impl>, class CompactWilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl>, public WilsonCloverHelpers<Impl>,
public CompactWilsonCloverHelpers<Impl> { public CompactWilsonCloverHelpers<Impl> {

View File

@ -138,38 +138,52 @@ typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD; typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
// Clover fermions // Clover fermions
typedef WilsonCloverFermion<WilsonImplR> WilsonCloverFermionR; template <typename WImpl> using WilsonClover = WilsonCloverFermion<WImpl, CloverHelpers<WImpl>>;
typedef WilsonCloverFermion<WilsonImplF> WilsonCloverFermionF; template <typename WImpl> using WilsonExpClover = WilsonCloverFermion<WImpl, ExpCloverHelpers<WImpl>>;
typedef WilsonCloverFermion<WilsonImplD> WilsonCloverFermionD;
typedef WilsonCloverFermion<WilsonAdjImplR> WilsonCloverAdjFermionR; typedef WilsonClover<WilsonImplR> WilsonCloverFermionR;
typedef WilsonCloverFermion<WilsonAdjImplF> WilsonCloverAdjFermionF; typedef WilsonClover<WilsonImplF> WilsonCloverFermionF;
typedef WilsonCloverFermion<WilsonAdjImplD> WilsonCloverAdjFermionD; typedef WilsonClover<WilsonImplD> WilsonCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR; typedef WilsonExpClover<WilsonImplR> WilsonExpCloverFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF; typedef WilsonExpClover<WilsonImplF> WilsonExpCloverFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD; typedef WilsonExpClover<WilsonImplD> WilsonExpCloverFermionD;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR; typedef WilsonClover<WilsonAdjImplR> WilsonCloverAdjFermionR;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF; typedef WilsonClover<WilsonAdjImplF> WilsonCloverAdjFermionF;
typedef WilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD; typedef WilsonClover<WilsonAdjImplD> WilsonCloverAdjFermionD;
typedef WilsonClover<WilsonTwoIndexSymmetricImplR> WilsonCloverTwoIndexSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexSymmetricImplF> WilsonCloverTwoIndexSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexSymmetricImplD> WilsonCloverTwoIndexSymmetricFermionD;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplR> WilsonCloverTwoIndexAntiSymmetricFermionR;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplF> WilsonCloverTwoIndexAntiSymmetricFermionF;
typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiSymmetricFermionD;
// Compact Clover fermions // Compact Clover fermions
typedef CompactWilsonCloverFermion<WilsonImplR> CompactWilsonCloverFermionR; template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>;
typedef CompactWilsonCloverFermion<WilsonImplF> CompactWilsonCloverFermionF; template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>;
typedef CompactWilsonCloverFermion<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonAdjImplR> CompactWilsonCloverAdjFermionR; typedef CompactWilsonClover<WilsonImplR> CompactWilsonCloverFermionR;
typedef CompactWilsonCloverFermion<WilsonAdjImplF> CompactWilsonCloverAdjFermionF; typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF;
typedef CompactWilsonCloverFermion<WilsonAdjImplD> CompactWilsonCloverAdjFermionD; typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR; typedef CompactWilsonExpClover<WilsonImplR> CompactWilsonExpCloverFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF; typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD; typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR; typedef CompactWilsonClover<WilsonAdjImplR> CompactWilsonCloverAdjFermionR;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF; typedef CompactWilsonClover<WilsonAdjImplF> CompactWilsonCloverAdjFermionF;
typedef CompactWilsonCloverFermion<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD; typedef CompactWilsonClover<WilsonAdjImplD> CompactWilsonCloverAdjFermionD;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplR> CompactWilsonCloverTwoIndexSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplF> CompactWilsonCloverTwoIndexSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexSymmetricImplD> CompactWilsonCloverTwoIndexSymmetricFermionD;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplR> CompactWilsonCloverTwoIndexAntiSymmetricFermionR;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplF> CompactWilsonCloverTwoIndexAntiSymmetricFermionF;
typedef CompactWilsonClover<WilsonTwoIndexAntiSymmetricImplD> CompactWilsonCloverTwoIndexAntiSymmetricFermionD;
// Domain Wall fermions // Domain Wall fermions
typedef DomainWallFermion<WilsonImplR> DomainWallFermionR; typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;

View File

@ -49,7 +49,7 @@ public:
virtual FermionField &tmp(void) = 0; virtual FermionField &tmp(void) = 0;
virtual void DirichletBlock(Coordinate & _Block) { assert(0); }; virtual void DirichletBlock(const Coordinate & _Block) { assert(0); };
GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know
GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };

View File

@ -32,6 +32,7 @@
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h> #include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h> #include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -51,7 +52,7 @@ NAMESPACE_BEGIN(Grid);
// csw_r = csw_t to recover the isotropic version // csw_r = csw_t to recover the isotropic version
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
template <class Impl> template<class Impl, class CloverHelpers>
class WilsonCloverFermion : public WilsonFermion<Impl>, class WilsonCloverFermion : public WilsonFermion<Impl>,
public WilsonCloverHelpers<Impl> public WilsonCloverHelpers<Impl>
{ {

View File

@ -209,6 +209,8 @@ public:
}; };
////////////////////////////////////////////////////////
template<class Impl> class CompactWilsonCloverHelpers { template<class Impl> class CompactWilsonCloverHelpers {
public: public:

View File

@ -47,8 +47,6 @@ class CompactWilsonCloverTypes {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
static_assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3, "Wrong dimensions");
static constexpr int Nred = Nc * Nhs; // 6 static constexpr int Nred = Nc * Nhs; // 6
static constexpr int Nblock = Nhs; // 2 static constexpr int Nblock = Nhs; // 2
static constexpr int Ndiagonal = Nred; // 6 static constexpr int Ndiagonal = Nred; // 6

View File

@ -297,7 +297,7 @@ public:
void ZeroCountersi(void) { } void ZeroCountersi(void) { }
void Reporti(int calls) { } void Reporti(int calls) { }
std::vector<int> surface_list; // Vector<int> surface_list;
WilsonStencil(GridBase *grid, WilsonStencil(GridBase *grid,
int npoints, int npoints,
@ -307,10 +307,11 @@ public:
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p) : CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
{ {
ZeroCountersi(); ZeroCountersi();
surface_list.resize(0); // surface_list.resize(0);
this->same_node.resize(npoints); this->same_node.resize(npoints);
}; };
/*
void BuildSurfaceList(int Ls,int vol4){ void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM // find same node for SHM
@ -331,6 +332,7 @@ public:
} }
} }
} }
*/
template < class compressor> template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress) void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)

View File

@ -178,16 +178,8 @@ public:
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
double _M5,const ImplParams &p= ImplParams()); double _M5,const ImplParams &p= ImplParams());
virtual void DirichletBlock(Coordinate & block) virtual void DirichletBlock(const Coordinate & block)
{ {
assert(block.size()==Nd+1);
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
Dirichlet = 1;
Block = block;
Stencil.DirichletBlock(block);
StencilEven.DirichletBlock(block);
StencilOdd.DirichletBlock(block);
}
} }
// Constructors // Constructors
/* /*

View File

@ -47,7 +47,7 @@ CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
FiveDimRedBlackGrid, FiveDimRedBlackGrid,
FourDimGrid, FourDimGrid,
FourDimRedBlackGrid,_M5,p), FourDimRedBlackGrid,_M5,p),
mass(_mass) mass_plus(_mass), mass_minus(_mass)
{ {
} }
@ -209,8 +209,8 @@ void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
Vector<Coeff_t> diag (Ls,1.0); Vector<Coeff_t> diag (Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass; Vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass_minus;
Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass; Vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass_plus;
M5D(psi,chi,chi,lower,diag,upper); M5D(psi,chi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -220,8 +220,8 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
Vector<Coeff_t> diag = bs; Vector<Coeff_t> diag = bs;
Vector<Coeff_t> upper= cs; Vector<Coeff_t> upper= cs;
Vector<Coeff_t> lower= cs; Vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,Din,lower,diag,upper); M5D(psi,psi,Din,lower,diag,upper);
} }
// FIXME Redunant with the above routine; check this and eliminate // FIXME Redunant with the above routine; check this and eliminate
@ -235,8 +235,8 @@ template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &
upper[i]=-ceo[i]; upper[i]=-ceo[i];
lower[i]=-ceo[i]; lower[i]=-ceo[i];
} }
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper); M5D(psi,psi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -250,8 +250,8 @@ void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &
upper[i]=-cee[i]; upper[i]=-cee[i];
lower[i]=-cee[i]; lower[i]=-cee[i];
} }
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_minus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_plus*lower[0];
M5D(psi,psi,chi,lower,diag,upper); M5D(psi,psi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
@ -266,9 +266,9 @@ void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &
// Assemble the 5d matrix // Assemble the 5d matrix
if ( s==0 ) { if ( s==0 ) {
upper[s] = -cee[s+1] ; upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1]; lower[s] = mass_minus*cee[Ls-1];
} else if ( s==(Ls-1)) { } else if ( s==(Ls-1)) {
upper[s] = mass*cee[0]; upper[s] = mass_plus*cee[0];
lower[s] = -cee[s-1]; lower[s] = -cee[s-1];
} else { } else {
upper[s]=-cee[s+1]; upper[s]=-cee[s+1];
@ -291,8 +291,8 @@ void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
Vector<Coeff_t> diag(Ls,1.0); Vector<Coeff_t> diag(Ls,1.0);
Vector<Coeff_t> upper(Ls,-1.0); Vector<Coeff_t> upper(Ls,-1.0);
Vector<Coeff_t> lower(Ls,-1.0); Vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass_plus*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass_minus*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper); M5Ddag(psi,chi,chi,lower,diag,upper);
} }
@ -307,9 +307,9 @@ void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField
for (int s=0;s<Ls;s++){ for (int s=0;s<Ls;s++){
if ( s== 0 ) { if ( s== 0 ) {
upper[s] = cs[s+1]; upper[s] = cs[s+1];
lower[s] =-mass*cs[Ls-1]; lower[s] =-mass_minus*cs[Ls-1];
} else if ( s==(Ls-1) ) { } else if ( s==(Ls-1) ) {
upper[s] =-mass*cs[0]; upper[s] =-mass_plus*cs[0];
lower[s] = cs[s-1]; lower[s] = cs[s-1];
} else { } else {
upper[s] = cs[s+1]; upper[s] = cs[s+1];
@ -552,7 +552,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0]; leem[i]=mass_minus*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) { for(int j=0;j<i;j++) {
assert(bee[j+1]!=Coeff_t(0.0)); assert(bee[j+1]!=Coeff_t(0.0));
leem[i]*= aee[j]/bee[j+1]; leem[i]*= aee[j]/bee[j+1];
@ -560,7 +560,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass; ueem[i]=mass_plus;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0]; ueem[i]*= aee[0]/bee[0];
@ -573,7 +573,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
} }
{ {
Coeff_t delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass_minus*cee[Ls-1];
for(int j=0;j<Ls-1;j++) { for(int j=0;j<Ls-1;j++) {
assert(bee[j] != Coeff_t(0.0)); assert(bee[j] != Coeff_t(0.0));
delta_d *= cee[j]/bee[j]; delta_d *= cee[j]/bee[j];
@ -642,6 +642,10 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if (!defined(GRID_HIP)) #if (!defined(GRID_HIP))
Gamma::Algebra Gmu [] = { Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX, Gamma::Algebra::GammaX,
@ -777,6 +781,8 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
assert(mu>=0); assert(mu>=0);
assert(mu<Nd); assert(mu<Nd);
assert(mass_plus == mass_minus);
RealD mass = mass_plus;
#if 0 #if 0
int tshift = (mu == Nd-1) ? 1 : 0; int tshift = (mu == Nd-1) ? 1 : 0;

View File

@ -66,18 +66,17 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
M5Dcalls++; M5Dcalls++;
M5Dtime-=usecond(); M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls; uint64_t nloop = grid->oSites();
accelerator_for(sss,nloop,Simd::Nsimd(),{ accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss= sss*Ls; uint64_t s = sss%Ls;
uint64_t ss= sss-s;
typedef decltype(coalescedRead(psi[0])) spinor; typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1, tmp2; spinor tmp1, tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls); uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls); uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5m(tmp1,psi(idx_u)); spProj5m(tmp1,psi(idx_u));
spProj5p(tmp2,psi(idx_l)); spProj5p(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2); coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
}); });
M5Dtime+=usecond(); M5Dtime+=usecond();
} }
@ -108,18 +107,17 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
M5Dcalls++; M5Dcalls++;
M5Dtime-=usecond(); M5Dtime-=usecond();
uint64_t nloop = grid->oSites()/Ls; uint64_t nloop = grid->oSites();
accelerator_for(sss,nloop,Simd::Nsimd(),{ accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls; uint64_t s = sss%Ls;
uint64_t ss= sss-s;
typedef decltype(coalescedRead(psi[0])) spinor; typedef decltype(coalescedRead(psi[0])) spinor;
spinor tmp1,tmp2; spinor tmp1,tmp2;
for(int s=0;s<Ls;s++){
uint64_t idx_u = ss+((s+1)%Ls); uint64_t idx_u = ss+((s+1)%Ls);
uint64_t idx_l = ss+((s+Ls-1)%Ls); uint64_t idx_l = ss+((s+Ls-1)%Ls);
spProj5p(tmp1,psi(idx_u)); spProj5p(tmp1,psi(idx_u));
spProj5m(tmp2,psi(idx_l)); spProj5m(tmp2,psi(idx_l));
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2); coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
}
}); });
M5Dtime+=usecond(); M5Dtime+=usecond();
} }

View File

@ -32,9 +32,10 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> #include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> template<class Impl, class CloverHelpers>
CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu, CompactWilsonCloverFermion<Impl, CloverHelpers>::CompactWilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid, GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid, GridRedBlackCartesian& Hgrid,
const RealD _mass, const RealD _mass,
@ -58,50 +59,55 @@ CompactWilsonCloverFermion<Impl>::CompactWilsonCloverFermion(GaugeField& _Umu,
, BoundaryMask(&Fgrid) , BoundaryMask(&Fgrid)
, BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid) , BoundaryMaskEven(&Hgrid), BoundaryMaskOdd(&Hgrid)
{ {
assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3);
csw_r *= 0.5; csw_r *= 0.5;
csw_t *= 0.5; csw_t *= 0.5;
if (clover_anisotropy.isAnisotropic) if (clover_anisotropy.isAnisotropic)
csw_r /= clover_anisotropy.xi_0; csw_r /= clover_anisotropy.xi_0;
ImportGauge(_Umu); ImportGauge(_Umu);
if (open_boundaries) if (open_boundaries) {
this->BoundaryMaskEven.Checkerboard() = Even;
this->BoundaryMaskOdd.Checkerboard() = Odd;
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd); CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
}
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Dhop(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
WilsonBase::Dhop(in, out, dag); WilsonBase::Dhop(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopOE(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopOE(in, out, dag); WilsonBase::DhopOE(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopEO(const FermionField& in, FermionField& out, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopEO(in, out, dag); WilsonBase::DhopEO(in, out, dag);
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
WilsonBase::DhopDir(in, out, dir, disp); WilsonBase::DhopDir(in, out, dir, disp);
if(this->open_boundaries) ApplyBoundaryMask(out); if(this->open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
WilsonBase::DhopDirAll(in, out); WilsonBase::DhopDirAll(in, out);
if(this->open_boundaries) { if(this->open_boundaries) {
for(auto& o : out) ApplyBoundaryMask(o); for(auto& o : out) ApplyBoundaryMask(o);
} }
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::M(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
Mooee(in, Tmp); Mooee(in, Tmp);
@ -109,8 +115,8 @@ void CompactWilsonCloverFermion<Impl>::M(const FermionField& in, FermionField& o
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mdag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
MooeeDag(in, Tmp); MooeeDag(in, Tmp);
@ -118,20 +124,20 @@ void CompactWilsonCloverFermion<Impl>::Mdag(const FermionField& in, FermionField
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Meooe(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
WilsonBase::Meooe(in, out); WilsonBase::Meooe(in, out);
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MeooeDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
WilsonBase::MeooeDag(in, out); WilsonBase::MeooeDag(in, out);
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) { if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) { if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalOdd, TriangleOdd); MooeeInternal(in, out, DiagonalOdd, TriangleOdd);
@ -144,13 +150,13 @@ void CompactWilsonCloverFermion<Impl>::Mooee(const FermionField& in, FermionFiel
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField& in, FermionField& out) {
Mooee(in, out); // blocks are hermitian Mooee(in, out); // blocks are hermitian
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) { if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) { if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd); MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd);
@ -163,23 +169,23 @@ void CompactWilsonCloverFermion<Impl>::MooeeInv(const FermionField& in, FermionF
if(open_boundaries) ApplyBoundaryMask(out); if(open_boundaries) ApplyBoundaryMask(out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInvDag(const FermionField& in, FermionField& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField& in, FermionField& out) {
MooeeInv(in, out); // blocks are hermitian MooeeInv(in, out); // blocks are hermitian
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
DhopDir(in, out, dir, disp); DhopDir(in, out, dir, disp);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MdirAll(const FermionField& in, std::vector<FermionField>& out) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
DhopDirAll(in, out); DhopDirAll(in, out);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!open_boundaries); // TODO check for changes required for open bc assert(!open_boundaries); // TODO check for changes required for open bc
// NOTE: code copied from original clover term // NOTE: code copied from original clover term
@ -251,7 +257,7 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
} }
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++; count++;
} }
@ -261,18 +267,18 @@ void CompactWilsonCloverFermion<Impl>::MDeriv(GaugeField& force, const FermionFi
force += clover_force; force += clover_force;
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0); assert(0);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0); assert(0);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField& in, void CompactWilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField& in,
FermionField& out, FermionField& out,
const CloverDiagonalField& diagonal, const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle) { const CloverTriangleField& triangle) {
@ -285,8 +291,8 @@ void CompactWilsonCloverFermion<Impl>::MooeeInternal(const FermionField&
CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle); CompactHelpers::MooeeKernel(diagonal.oSites(), 1, in, out, diagonal, triangle);
} }
template<class Impl> template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) { void CompactWilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField& _Umu) {
// NOTE: parts copied from original implementation // NOTE: parts copied from original implementation
// Import gauge into base class // Import gauge into base class
@ -318,22 +324,27 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t; TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t; TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t; TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
TmpOriginal += this->diag_mass; // Handle mass term based on clover policy
CloverHelpers::MassTerm(TmpOriginal, this->diag_mass);
// Convert the data layout of the clover term // Convert the data layout of the clover term
double t4 = usecond(); double t4 = usecond();
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle); CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
// Possible modify the boundary values // Exponentiate the clover (nothing happens in case of the standard clover)
double t5 = usecond(); double t5 = usecond();
CloverHelpers::Exponentiate_Clover(Diagonal, Triangle, csw_t, this->diag_mass);
// Possible modify the boundary values
double t6 = usecond();
if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass); if(open_boundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, this->diag_mass);
// Invert the clover term in the improved layout // Invert the Clover term (explicit inversion needed for the improvement in case of open boundary conditions)
double t6 = usecond(); double t7 = usecond();
CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv); CompactHelpers::Invert(Diagonal, Triangle, DiagonalInv, TriangleInv);
// Fill the remaining clover fields // Fill the remaining clover fields
double t7 = usecond(); double t8 = usecond();
pickCheckerboard(Even, DiagonalEven, Diagonal); pickCheckerboard(Even, DiagonalEven, Diagonal);
pickCheckerboard(Even, TriangleEven, Triangle); pickCheckerboard(Even, TriangleEven, Triangle);
pickCheckerboard(Odd, DiagonalOdd, Diagonal); pickCheckerboard(Odd, DiagonalOdd, Diagonal);
@ -344,20 +355,19 @@ void CompactWilsonCloverFermion<Impl>::ImportGauge(const GaugeField& _Umu) {
pickCheckerboard(Odd, TriangleInvOdd, TriangleInv); pickCheckerboard(Odd, TriangleInvOdd, TriangleInv);
// Report timings // Report timings
double t8 = usecond(); double t9 = usecond();
#if 0
std::cout << GridLogMessage << "CompactWilsonCloverFermion::ImportGauge timings:" std::cout << GridLogDebug << "CompactWilsonCloverFermion::ImportGauge timings:" << std::endl;
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
<< ", allocations = " << (t2 - t1) / 1e6 std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
<< ", field strength = " << (t3 - t2) / 1e6 std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
<< ", fill clover = " << (t4 - t3) / 1e6 std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
<< ", convert = " << (t5 - t4) / 1e6 std::cout << GridLogDebug << "convert = " << (t5 - t4) / 1e6 << std::endl;
<< ", boundaries = " << (t6 - t5) / 1e6 std::cout << GridLogDebug << "exponentiation = " << (t6 - t5) / 1e6 << std::endl;
<< ", inversions = " << (t7 - t6) / 1e6 std::cout << GridLogDebug << "boundaries = " << (t7 - t6) / 1e6 << std::endl;
<< ", pick cbs = " << (t8 - t7) / 1e6 std::cout << GridLogDebug << "inversions = " << (t8 - t7) / 1e6 << std::endl;
<< ", total = " << (t8 - t0) / 1e6 std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
<< std::endl; std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
#endif
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -34,8 +34,8 @@
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> template<class Impl, class CloverHelpers>
WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField& _Umu, WilsonCloverFermion<Impl, CloverHelpers>::WilsonCloverFermion(GaugeField& _Umu,
GridCartesian& Fgrid, GridCartesian& Fgrid,
GridRedBlackCartesian& Hgrid, GridRedBlackCartesian& Hgrid,
const RealD _mass, const RealD _mass,
@ -74,8 +74,8 @@ WilsonCloverFermion<Impl>::WilsonCloverFermion(GaugeField&
} }
// *NOT* EO // *NOT* EO
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::M(const FermionField &in, FermionField &out)
{ {
FermionField temp(out.Grid()); FermionField temp(out.Grid());
@ -89,8 +89,8 @@ void WilsonCloverFermion<Impl>::M(const FermionField &in, FermionField &out)
out += temp; out += temp;
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::Mdag(const FermionField &in, FermionField &out)
{ {
FermionField temp(out.Grid()); FermionField temp(out.Grid());
@ -104,8 +104,8 @@ void WilsonCloverFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
out += temp; out += temp;
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu) void WilsonCloverFermion<Impl, CloverHelpers>::ImportGauge(const GaugeField &_Umu)
{ {
double t0 = usecond(); double t0 = usecond();
WilsonFermion<Impl>::ImportGauge(_Umu); WilsonFermion<Impl>::ImportGauge(_Umu);
@ -131,47 +131,11 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
CloverTerm += Helpers::fillCloverXT(Ex) * csw_t; CloverTerm += Helpers::fillCloverXT(Ex) * csw_t;
CloverTerm += Helpers::fillCloverYT(Ey) * csw_t; CloverTerm += Helpers::fillCloverYT(Ey) * csw_t;
CloverTerm += Helpers::fillCloverZT(Ez) * csw_t; CloverTerm += Helpers::fillCloverZT(Ez) * csw_t;
CloverTerm += diag_mass;
double t4 = usecond(); double t4 = usecond();
int lvol = _Umu.Grid()->lSites(); CloverHelpers::Instantiate(CloverTerm, CloverTermInv, csw_t, this->diag_mass);
int DimRep = Impl::Dimension;
double t5 = usecond(); double t5 = usecond();
{
autoView(CTv,CloverTerm,CpuRead);
autoView(CTIv,CloverTermInv,CpuWrite);
thread_for(site, lvol, {
Coordinate lcoor;
grid->LocalIndexToLocalCoor(site, lcoor);
Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep);
typename SiteClover::scalar_object Qx = Zero(), Qxinv = Zero();
peekLocalSite(Qx, CTv, lcoor);
//if (csw!=0){
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++){
auto zz = Qx()(j, k)(a, b);
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
}
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
EigenInvCloverOp = EigenCloverOp.inverse();
//std::cout << EigenInvCloverOp << std::endl;
for (int j = 0; j < Ns; j++)
for (int k = 0; k < Ns; k++)
for (int a = 0; a < DimRep; a++)
for (int b = 0; b < DimRep; b++)
Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep);
// if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl;
// }
pokeLocalSite(Qxinv, CTIv, lcoor);
});
}
double t6 = usecond();
// Separate the even and odd parts // Separate the even and odd parts
pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Even, CloverTermEven, CloverTerm);
pickCheckerboard(Odd, CloverTermOdd, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
@ -184,48 +148,44 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
double t7 = usecond(); double t6 = usecond();
#if 0 std::cout << GridLogDebug << "WilsonCloverFermion::ImportGauge timings:" << std::endl;
std::cout << GridLogMessage << "WilsonCloverFermion::ImportGauge timings:" std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
<< " WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
<< ", allocations = " << (t2 - t1) / 1e6 std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
<< ", field strength = " << (t3 - t2) / 1e6 std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
<< ", fill clover = " << (t4 - t3) / 1e6 std::cout << GridLogDebug << "instantiation = " << (t5 - t4) / 1e6 << std::endl;
<< ", misc = " << (t5 - t4) / 1e6 std::cout << GridLogDebug << "pick cbs = " << (t6 - t5) / 1e6 << std::endl;
<< ", inversions = " << (t6 - t5) / 1e6 std::cout << GridLogDebug << "total = " << (t6 - t0) / 1e6 << std::endl;
<< ", pick cbs = " << (t7 - t6) / 1e6
<< ", total = " << (t7 - t0) / 1e6
<< std::endl;
#endif
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::Mooee(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::Mooee(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerNo, InverseNo); this->MooeeInternal(in, out, DaggerNo, InverseNo);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeDag(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerYes, InverseNo); this->MooeeInternal(in, out, DaggerYes, InverseNo);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInv(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerNo, InverseYes); this->MooeeInternal(in, out, DaggerNo, InverseYes);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInvDag(const FermionField &in, FermionField &out)
{ {
this->MooeeInternal(in, out, DaggerYes, InverseYes); this->MooeeInternal(in, out, DaggerYes, InverseYes);
} }
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) void WilsonCloverFermion<Impl, CloverHelpers>::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv)
{ {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
CloverField *Clover; CloverField *Clover;
@ -278,8 +238,8 @@ void WilsonCloverFermion<Impl>::MooeeInternal(const FermionField &in, FermionFie
} // MooeeInternal } // MooeeInternal
// Derivative parts unpreconditioned pseudofermions // Derivative parts unpreconditioned pseudofermions
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag)
{ {
conformable(X.Grid(), Y.Grid()); conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid()); conformable(X.Grid(), force.Grid());
@ -349,7 +309,7 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
} }
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*Helpers::Cmunu(U, lambda, mu, nu); // checked force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++; count++;
} }
@ -360,15 +320,15 @@ void WilsonCloverFermion<Impl>::MDeriv(GaugeField &force, const FermionField &X,
} }
// Derivative parts // Derivative parts
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MooDeriv(GaugeField &mat, const FermionField &X, const FermionField &Y, int dag)
{ {
assert(0); assert(0);
} }
// Derivative parts // Derivative parts
template <class Impl> template<class Impl, class CloverHelpers>
void WilsonCloverFermion<Impl>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) void WilsonCloverFermion<Impl, CloverHelpers>::MeeDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag)
{ {
assert(0); // not implemented yet assert(0); // not implemented yet
} }

View File

@ -92,6 +92,19 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
} }
if ( p.dirichlet.size() == Nd+1) {
Coordinate block = p.dirichlet;
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
Dirichlet = 1;
Block = block;
}
} else {
Coordinate block(Nd+1,0);
Block = block;
}
ZeroCounters();
if (Impl::LsVectorised) { if (Impl::LsVectorised) {
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();

View File

@ -4,12 +4,13 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
Copyright (C) 2015 Copyright (C) 2022
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -599,11 +600,47 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid()); conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
assert(0); auto UGrid= this->GaugeGrid();
PropagatorField tmp_shifted(UGrid);
PropagatorField g5Lg5(UGrid);
PropagatorField R(UGrid);
PropagatorField gmuR(UGrid);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
g5Lg5=g5*q_in_1*g5;
tmp_shifted=Cshift(q_in_2,mu,1);
Impl::multLinkField(R,this->Umu,tmp_shifted,mu);
gmuR=gmu*R;
q_out=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
tmp_shifted=Cshift(q_in_1,mu,1);
Impl::multLinkField(g5Lg5,this->Umu,tmp_shifted,mu);
g5Lg5=g5*g5Lg5*g5;
R=q_in_2;
gmuR=gmu*R;
q_out-=adj(g5Lg5)*R;
q_out-=adj(g5Lg5)*gmuR;
} }
@ -617,9 +654,51 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx) ComplexField &lattice_cmplx)
{ {
if(curr_type != Current::Vector)
{
std::cout << GridLogError << "Only the conserved vector current is implemented so far." << std::endl;
exit(1);
}
int tshift = (mu == Nd-1) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
conformable(_grid, q_in.Grid()); conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
assert(0); auto UGrid= this->GaugeGrid();
PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid);
PropagatorField L(UGrid);
PropagatorField zz (UGrid);
zz=Zero();
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
};
Gamma gmu=Gamma(Gmu[mu]);
tmp = Cshift(q_in,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = ( Utmp*lattice_cmplx - gmu*Utmp*lattice_cmplx ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
q_out = where((lcoor<=tmax),tmp,zz); // Position of current complicated
tmp = q_in *lattice_cmplx;
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -( Utmp + gmu*Utmp );
// Mask the time
if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
unsigned int t0 = 0;
tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
} else {
tmp = where((lcoor>=tmin+tshift),tmp,zz);
}
q_out+= where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -440,6 +440,17 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); #define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
#define KERNEL_CALL_EXT(A) \
const uint64_t NN = Nsite*Ls; \
const uint64_t sz = st.surface_list.size(); \
auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \
accelerator_barrier();
#define ASM_CALL(A) \ #define ASM_CALL(A) \
thread_for( ss, Nsite, { \ thread_for( ss, Nsite, { \
int sU = ss; \ int sU = ss; \

View File

@ -9,6 +9,7 @@
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com> Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -32,10 +33,12 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> #include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h> #include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#include "impl.h" #include "impl.h"
template class CompactWilsonCloverFermion<IMPLEMENTATION>; template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactCloverHelpers<IMPLEMENTATION>>;
template class CompactWilsonCloverFermion<IMPLEMENTATION, CompactExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -8,6 +8,7 @@
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Mattia Bruno <mattia.bruno@cern.ch>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -31,10 +32,12 @@
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> #include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h> #include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#include "impl.h" #include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>; template class WilsonCloverFermion<IMPLEMENTATION, CloverHelpers<IMPLEMENTATION>>;
template class WilsonCloverFermion<IMPLEMENTATION, ExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiation.cc.master

View File

@ -18,6 +18,10 @@ WILSON_IMPL_LIST=" \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD " GparityWilsonImplD "
COMPACT_WILSON_IMPL_LIST=" \
WilsonImplF \
WilsonImplD "
DWF_IMPL_LIST=" \ DWF_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
@ -40,7 +44,7 @@ EOF
done done
CC_LIST="WilsonCloverFermionInstantiation CompactWilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation" CC_LIST="WilsonCloverFermionInstantiation WilsonFermionInstantiation WilsonKernelsInstantiation WilsonTMFermionInstantiation"
for impl in $WILSON_IMPL_LIST for impl in $WILSON_IMPL_LIST
do do
@ -50,6 +54,16 @@ do
done done
done done
CC_LIST="CompactWilsonCloverFermionInstantiation"
for impl in $COMPACT_WILSON_IMPL_LIST
do
for f in $CC_LIST
do
ln -f -s ../$f.cc.master $impl/$f$impl.cc
done
done
CC_LIST=" \ CC_LIST=" \
CayleyFermion5DInstantiation \ CayleyFermion5DInstantiation \
ContinuedFractionFermion5DInstantiation \ ContinuedFractionFermion5DInstantiation \

View File

@ -53,9 +53,9 @@ struct DirichletFilter: public MomentumFilterBase<MomentaField>
LatticeInteger coor(grid); LatticeInteger coor(grid);
LatCM zz(grid); zz = Zero(); LatCM zz(grid); zz = Zero();
for(int mu=0;mu<Nd;mu++) { for(int mu=0;mu<Nd;mu++) {
if ( (Block[mu]) && (Block[mu] < grid->GlobalDimensions()[mu] ) ) { if ( (Block[mu]) && (Block[mu] <= grid->GlobalDimensions()[mu] ) ) {
// If costly could provide Grid earlier and precompute masks // If costly could provide Grid earlier and precompute masks
std::cout << " Dirichlet in mu="<<mu<<std::endl; std::cout << GridLogMessage << " Dirichlet in mu="<<mu<<std::endl;
LatticeCoordinate(coor,mu); LatticeCoordinate(coor,mu);
auto P_mu = PeekIndex<LorentzIndex>(P, mu); auto P_mu = PeekIndex<LorentzIndex>(P, mu);
P_mu = where(mod(coor,Block[mu])==Integer(Block[mu]-1),zz,P_mu); P_mu = where(mod(coor,Block[mu])==Integer(Block[mu]-1),zz,P_mu);

View File

@ -145,7 +145,7 @@ protected:
MomFilter->applyFilter(force); MomFilter->applyFilter(force);
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl; std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
// DumpSliceNorm("force ",force,Nd-1); DumpSliceNorm("force ",force,Nd-1);
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x]) Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR; Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;

View File

@ -72,14 +72,13 @@ public:
//Fix the gauge field Umu //Fix the gauge field Umu
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf //0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
static void SteepestDescentGaugeFix(GaugeLorentz &Umu, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) { static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
GridBase *grid = Umu.Grid(); GridBase *grid = Umu.Grid();
GaugeMat xform(grid); GaugeMat xform(grid);
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog); SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
} }
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform //Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform, Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
GridBase *grid = Umu.Grid(); GridBase *grid = Umu.Grid();
@ -141,6 +140,8 @@ public:
} }
} }
std::cout << GridLogError << "Gauge fixing did not converge in " << maxiter << " iterations." << std::endl;
if (err_on_no_converge)
assert(0 && "Gauge fixing did not converge within the specified number of iterations"); assert(0 && "Gauge fixing did not converge within the specified number of iterations");
}; };
static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) { static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform, Real alpha, GaugeMat & dmuAmu,int orthog) {

View File

@ -215,7 +215,7 @@ public:
double vol = Umu.Grid()->gSites(); double vol = Umu.Grid()->gSites();
return p.real() / vol / 4.0 / 3.0; return p.real() / vol / (4.0 * Nc ) ;
}; };
////////////////////////////////////////////////// //////////////////////////////////////////////////

View File

@ -52,6 +52,11 @@ public:
return arg; return arg;
} }
}; };
class SimpleStencilParams{
public:
Coordinate dirichlet;
SimpleStencilParams() {};
};
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -131,7 +131,6 @@ class CartesianStencilAccelerator {
int _checkerboard; int _checkerboard;
int _npoints; // Move to template param? int _npoints; // Move to template param?
int _osites; int _osites;
int _dirichlet;
StencilVector _directions; StencilVector _directions;
StencilVector _distances; StencilVector _distances;
StencilVector _comms_send; StencilVector _comms_send;
@ -503,7 +502,6 @@ public:
} }
void AddCopy(void *from,void * to, Integer bytes) void AddCopy(void *from,void * to, Integer bytes)
{ {
// std::cout << "Adding CopyReceiveBuffer "<<std::hex<<from<<" "<<to<<std::dec<<" "<<bytes<<std::endl;
CopyReceiveBuffer obj; CopyReceiveBuffer obj;
obj.from_p = from; obj.from_p = from;
obj.to_p = to; obj.to_p = to;
@ -517,7 +515,7 @@ public:
cobj *from=(cobj *)CopyReceiveBuffers[i].from_p; cobj *from=(cobj *)CopyReceiveBuffers[i].from_p;
cobj *to =(cobj *)CopyReceiveBuffers[i].to_p; cobj *to =(cobj *)CopyReceiveBuffers[i].to_p;
Integer words = CopyReceiveBuffers[i].bytes/sizeof(cobj); Integer words = CopyReceiveBuffers[i].bytes/sizeof(cobj);
// std::cout << "CopyReceiveBuffer "<<std::hex<<from<<" "<<to<<std::dec<<" "<<words*sizeof(cobj)<<std::endl;
accelerator_forNB(j, words, cobj::Nsimd(), { accelerator_forNB(j, words, cobj::Nsimd(), {
coalescedWrite(to[j] ,coalescedRead(from [j])); coalescedWrite(to[j] ,coalescedRead(from [j]));
}); });
@ -543,13 +541,12 @@ public:
&&(CachedTransfers[i].lane ==lane) &&(CachedTransfers[i].lane ==lane)
&&(CachedTransfers[i].cb ==cb) &&(CachedTransfers[i].cb ==cb)
){ ){
// std::cout << "Found duplicate plane dir "<<direction<<" plane "<< OrthogPlane<< " simd "<<lane << " relproc "<<DestProc<< " bytes "<<bytes <<std::endl;
AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes); AddCopy(CachedTransfers[i].recv_buf,recv_buf,bytes);
return 1; return 1;
} }
} }
// std::cout << "No duplicate plane dir "<<direction<<" plane "<< OrthogPlane<< " simd "<<lane << " relproc "<<DestProc<<" bytes "<<bytes<<std::endl;
CachedTransfers.push_back(obj); CachedTransfers.push_back(obj);
return 0; return 0;
} }
@ -643,23 +640,23 @@ public:
} }
} }
if(local == 0) { if(local == 0) {
surface_list.push_back(site); for(int s=0;s<Ls;s++){
surface_list.push_back(site*Ls+s);
}
} }
} }
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)
{ {
this->_dirichlet = 1;
for(int ii=0;ii<this->_npoints;ii++){ for(int ii=0;ii<this->_npoints;ii++){
int dimension = this->_directions[ii]; int dimension = this->_directions[ii];
int displacement = this->_distances[ii]; int displacement = this->_distances[ii];
int shift = displacement;
int gd = _grid->_gdimensions[dimension]; int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension]; int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension]; int pd = _grid->_processors [dimension];
int ld = gd/pd;
int pc = _grid->_processor_coor[dimension]; int pc = _grid->_processor_coor[dimension];
int ld = fd/pd;
/////////////////////////////////////////// ///////////////////////////////////////////
// Figure out dirichlet send and receive // Figure out dirichlet send and receive
// on this leg of stencil. // on this leg of stencil.
@ -668,25 +665,25 @@ public:
int block = dirichlet_block[dimension]; int block = dirichlet_block[dimension];
this->_comms_send[ii] = comm_dim; this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim; this->_comms_recv[ii] = comm_dim;
if ( block ) { if ( block && comm_dim ) {
assert(abs(displacement) < ld ); assert(abs(displacement) < ld );
// Quiesce communication across block boundaries
if( displacement > 0 ) { if( displacement > 0 ) {
// High side, low side // High side, low side
// | <--B--->| // | <--B--->|
// | | | // | | |
// noR // noR
// noS // noS
if ( (ld*(pc+1) ) % block == 0 ) this->_comms_recv[ii] = 0; if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_recv[ii] = 0;
if ( ( ld*pc ) % block == 0 ) this->_comms_send[ii] = 0; if ( ( (ld*pc ) % block ) == 0 ) this->_comms_send[ii] = 0;
} else { } else {
// High side, low side // High side, low side
// | <--B--->| // | <--B--->|
// | | | // | | |
// noS // noS
// noR // noR
if ( (ld*(pc+1) ) % block == 0 ) this->_comms_send[ii] = 0; if ( ( (ld*(pc+1) ) % block ) == 0 ) this->_comms_send[ii] = 0;
if ( ( ld*pc ) % block == 0 ) this->_comms_recv[ii] = 0; if ( ( (ld*pc ) % block ) == 0 ) this->_comms_recv[ii] = 0;
} }
} }
} }
@ -698,7 +695,6 @@ public:
const std::vector<int> &distances, const std::vector<int> &distances,
Parameters p) Parameters p)
{ {
this->_dirichlet = 0;
face_table_computed=0; face_table_computed=0;
_grid = grid; _grid = grid;
this->parameters=p; this->parameters=p;
@ -715,6 +711,8 @@ public:
this->_comms_recv.resize(npoints); this->_comms_recv.resize(npoints);
this->same_node.resize(npoints); this->same_node.resize(npoints);
if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up
_unified_buffer_size=0; _unified_buffer_size=0;
surface_list.resize(0); surface_list.resize(0);
@ -734,7 +732,7 @@ public:
int gd = _grid->_gdimensions[dimension]; int gd = _grid->_gdimensions[dimension];
int fd = _grid->_fdimensions[dimension]; int fd = _grid->_fdimensions[dimension];
int pd = _grid->_processors [dimension]; int pd = _grid->_processors [dimension];
int ld = gd/pd; // int ld = gd/pd;
int rd = _grid->_rdimensions[dimension]; int rd = _grid->_rdimensions[dimension];
int pc = _grid->_processor_coor[dimension]; int pc = _grid->_processor_coor[dimension];
this->_permute_type[point]=_grid->PermuteType(dimension); this->_permute_type[point]=_grid->PermuteType(dimension);
@ -746,9 +744,6 @@ public:
int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim);
int rotate_dim = _grid->_simd_layout[dimension]>2; int rotate_dim = _grid->_simd_layout[dimension]>2;
this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim;
assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported
int sshift[2]; int sshift[2];
@ -878,12 +873,14 @@ public:
for(int x=0;x<rd;x++){ for(int x=0;x<rd;x++){
int permute_type=grid->PermuteType(dimension); int permute_type=grid->PermuteType(dimension);
int permute_slice;
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
int offnode = 0; int offnode = 0;
if ( simd_layout > 1 ) { if ( simd_layout > 1 ) {
permute_slice=1;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
int inner_bit = (Nsimd>>(permute_type+1)); int inner_bit = (Nsimd>>(permute_type+1));
@ -900,6 +897,7 @@ public:
} else { } else {
int comm_proc = ((x+sshift)/rd)%pd; int comm_proc = ((x+sshift)/rd)%pd;
offnode = (comm_proc!= 0); offnode = (comm_proc!= 0);
permute_slice=0;
} }
int wraparound=0; int wraparound=0;
@ -911,25 +909,29 @@ public:
} }
// Wrap locally dirichlet support case OR node local // Wrap locally dirichlet support case OR node local
if ( (offnode==0) || (comms_recv==0) ) { if ( offnode==0 ) {
int permute_slice=0; permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else { } else {
if ( comms_recv ) {
ScatterPlane(point,dimension,x,cbmask,_unified_buffer_size,wraparound); // permute/extract/merge is done in comms phase
} else {
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
}
}
if ( offnode ) {
int words = buffer_size; int words = buffer_size;
if (cbmask != 0x3) words=words>>1; if (cbmask != 0x3) words=words>>1;
// int rank = grid->_processor;
// int recv_from_rank;
// int xmit_to_rank;
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words; _unified_buffer_size += words;
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
} }
} }
} }
@ -1061,71 +1063,75 @@ public:
if (comm_proc) { if (comm_proc) {
int words = buffer_size; int words = buffer_size;
if (cbmask != 0x3) words=words>>1; if (cbmask != 0x3) words=words>>1;
int bytes = words * compress.CommDatumSize(); int bytes = words * compress.CommDatumSize();
int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane int so = sx*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
int comm_off = u_comm_offset;
int recv_from_rank;
int xmit_to_rank;
cobj *recv_buf;
cobj *send_buf;
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
assert (xmit_to_rank != _grid->ThisRank());
assert (recv_from_rank != _grid->ThisRank());
if( comms_send ) {
if ( !face_table_computed ) { if ( !face_table_computed ) {
face_table.resize(face_idx+1); face_table.resize(face_idx+1);
std::vector<std::pair<int,int> > face_table_host ; std::vector<std::pair<int,int> > face_table_host ;
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
face_table[face_idx].resize(face_table_host.size()); face_table[face_idx].resize(face_table_host.size());
acceleratorCopyToDevice(&face_table_host[0], acceleratorCopyToDevice(&face_table_host[0],
&face_table[face_idx][0], &face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0])); face_table[face_idx].size()*sizeof(face_table_host[0]));
} }
// int rank = _grid->_processor;
int recv_from_rank;
int xmit_to_rank;
_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
assert (xmit_to_rank != _grid->ThisRank());
assert (recv_from_rank != _grid->ThisRank());
cobj *recv_buf;
if ( compress.DecompressionStep() ) { if ( compress.DecompressionStep() ) {
recv_buf=u_simd_recv_buf[0]; recv_buf=u_simd_recv_buf[0];
} else { } else {
recv_buf=this->u_recv_buf_p; recv_buf=this->u_recv_buf_p;
} }
cobj *send_buf;
send_buf = this->u_send_buf_p; // Gather locally, must send send_buf = this->u_send_buf_p; // Gather locally, must send
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Gather locally // Gather locally
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
assert(send_buf!=NULL); assert(send_buf!=NULL);
if ( comms_send )
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so);
face_idx++;
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[u_comm_offset],0,bytes,cbmask); Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
}
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,bytes,cbmask);
if ( (!duplicate) ) { // Force comms for now if ( (!duplicate) ) { // Force comms for now
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Build a list of things to do after we synchronise GPUs // Build a list of things to do after we synchronise GPUs
// Start comms now??? // Start comms now???
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
AddPacket((void *)&send_buf[u_comm_offset], AddPacket((void *)&send_buf[comm_off],
(void *)&recv_buf[u_comm_offset], (void *)&recv_buf[comm_off],
xmit_to_rank, comms_send, xmit_to_rank, comms_send,
recv_from_rank, comms_recv, recv_from_rank, comms_recv,
bytes); bytes);
} }
if ( compress.DecompressionStep() ) { if ( compress.DecompressionStep() && comms_recv ) {
AddDecompress(&this->u_recv_buf_p[u_comm_offset], AddDecompress(&this->u_recv_buf_p[comm_off],
&recv_buf[u_comm_offset], &recv_buf[comm_off],
words,Decompressions); words,Decompressions);
} }
u_comm_offset+=words; u_comm_offset+=words;
face_idx++;
} }
} }
return 0; return 0;
@ -1155,7 +1161,6 @@ public:
int permute_type=_grid->PermuteType(dimension); int permute_type=_grid->PermuteType(dimension);
// std::cout << "SimdNew permute type "<<permute_type<<std::endl;
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Simd direction uses an extract/merge pair // Simd direction uses an extract/merge pair
@ -1189,8 +1194,9 @@ public:
if ( any_offnode ) { if ( any_offnode ) {
int comm_off = u_comm_offset;
for(int i=0;i<maxl;i++){ for(int i=0;i<maxl;i++){
spointers[i] = (cobj *) &u_simd_send_buf[i][u_comm_offset]; spointers[i] = (cobj *) &u_simd_send_buf[i][comm_off];
} }
int sx = (x+sshift)%rd; int sx = (x+sshift)%rd;
@ -1199,14 +1205,14 @@ public:
face_table.resize(face_idx+1); face_table.resize(face_idx+1);
std::vector<std::pair<int,int> > face_table_host ; std::vector<std::pair<int,int> > face_table_host ;
Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table_host); Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,comm_off,face_table_host);
face_table[face_idx].resize(face_table_host.size()); face_table[face_idx].resize(face_table_host.size());
acceleratorCopyToDevice(&face_table_host[0], acceleratorCopyToDevice(&face_table_host[0],
&face_table[face_idx][0], &face_table[face_idx][0],
face_table[face_idx].size()*sizeof(face_table_host[0])); face_table[face_idx].size()*sizeof(face_table_host[0]));
} }
// if ( comms_send ) if ( comms_send || comms_recv )
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type); Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
face_idx++; face_idx++;
@ -1226,8 +1232,8 @@ public:
int nbr_plane = nbr_ic; int nbr_plane = nbr_ic;
assert (sx == nbr_ox); assert (sx == nbr_ox);
auto rp = &u_simd_recv_buf[i ][u_comm_offset]; auto rp = &u_simd_recv_buf[i ][comm_off];
auto sp = &u_simd_send_buf[nbr_plane][u_comm_offset]; auto sp = &u_simd_send_buf[nbr_plane][comm_off];
if(nbr_proc){ if(nbr_proc){
@ -1253,9 +1259,12 @@ public:
} }
} }
AddMerge(&this->u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,permute_type,Mergers); if ( comms_recv ) {
AddMerge(&this->u_recv_buf_p[comm_off],rpointers,reduced_buffer_size,permute_type,Mergers);
}
u_comm_offset +=buffer_size; u_comm_offset +=buffer_size;
} }
} }
return 0; return 0;

View File

@ -16,6 +16,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
#ifdef GRID_CUDA #ifdef GRID_CUDA
cudaDeviceProp *gpu_props; cudaDeviceProp *gpu_props;
cudaStream_t copyStream; cudaStream_t copyStream;
cudaStream_t cpuStream;
void acceleratorInit(void) void acceleratorInit(void)
{ {
int nDevices = 1; int nDevices = 1;
@ -98,6 +99,7 @@ void acceleratorInit(void)
cudaSetDevice(device); cudaSetDevice(device);
cudaStreamCreate(&copyStream); cudaStreamCreate(&copyStream);
cudaStreamCreate(&cpuStream);
const int len=64; const int len=64;
char busid[len]; char busid[len];
if( rank == world_rank ) { if( rank == world_rank ) {
@ -112,6 +114,7 @@ void acceleratorInit(void)
#ifdef GRID_HIP #ifdef GRID_HIP
hipDeviceProp_t *gpu_props; hipDeviceProp_t *gpu_props;
hipStream_t copyStream; hipStream_t copyStream;
hipStream_t cpuStream;
void acceleratorInit(void) void acceleratorInit(void)
{ {
int nDevices = 1; int nDevices = 1;
@ -180,6 +183,7 @@ void acceleratorInit(void)
#endif #endif
hipSetDevice(device); hipSetDevice(device);
hipStreamCreate(&copyStream); hipStreamCreate(&copyStream);
hipStreamCreate(&cpuStream);
const int len=64; const int len=64;
char busid[len]; char busid[len];
if( rank == world_rank ) { if( rank == world_rank ) {

View File

@ -107,6 +107,7 @@ void acceleratorInit(void);
extern int acceleratorAbortOnGpuError; extern int acceleratorAbortOnGpuError;
extern cudaStream_t copyStream; extern cudaStream_t copyStream;
extern cudaStream_t cpuStream;
accelerator_inline int acceleratorSIMTlane(int Nsimd) { accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT #ifdef GRID_SIMT
@ -134,7 +135,7 @@ inline void cuda_mem(void)
}; \ }; \
dim3 cu_threads(nsimd,acceleratorThreads(),1); \ dim3 cu_threads(nsimd,acceleratorThreads(),1); \
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
LambdaApply<<<cu_blocks,cu_threads>>>(num1,num2,nsimd,lambda); \ LambdaApply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,nsimd,lambda); \
} }
#define accelerator_for6dNB(iter1, num1, \ #define accelerator_for6dNB(iter1, num1, \
@ -153,7 +154,7 @@ inline void cuda_mem(void)
}; \ }; \
dim3 cu_blocks (num1,num2,num3); \ dim3 cu_blocks (num1,num2,num3); \
dim3 cu_threads(num4,num5,num6); \ dim3 cu_threads(num4,num5,num6); \
Lambda6Apply<<<cu_blocks,cu_threads>>>(num1,num2,num3,num4,num5,num6,lambda); \ Lambda6Apply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
} }
template<typename lambda> __global__ template<typename lambda> __global__
@ -189,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3,
#define accelerator_barrier(dummy) \ #define accelerator_barrier(dummy) \
{ \ { \
cudaDeviceSynchronize(); \ cudaStreamSynchronize(cpuStream); \
cudaError err = cudaGetLastError(); \ cudaError err = cudaGetLastError(); \
if ( cudaSuccess != err ) { \ if ( cudaSuccess != err ) { \
printf("accelerator_barrier(): Cuda error %s \n", \ printf("accelerator_barrier(): Cuda error %s \n", \
@ -339,6 +340,7 @@ NAMESPACE_BEGIN(Grid);
#define accelerator_inline __host__ __device__ inline #define accelerator_inline __host__ __device__ inline
extern hipStream_t copyStream; extern hipStream_t copyStream;
extern hipStream_t cpuStream;
/*These routines define mapping from thread grid to loop & vector lane indexing */ /*These routines define mapping from thread grid to loop & vector lane indexing */
accelerator_inline int acceleratorSIMTlane(int Nsimd) { accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT #ifdef GRID_SIMT
@ -360,11 +362,11 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \ if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \ hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
0,0, \ 0,cpuStream, \
num1,num2,nsimd, lambda); \ num1,num2,nsimd, lambda); \
} else { \ } else { \
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
0,0, \ 0,cpuStream, \
num1,num2,nsimd, lambda); \ num1,num2,nsimd, lambda); \
} \ } \
} }
@ -398,7 +400,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
#define accelerator_barrier(dummy) \ #define accelerator_barrier(dummy) \
{ \ { \
hipDeviceSynchronize(); \ hipStreamSynchronize(cpuStream); \
auto err = hipGetLastError(); \ auto err = hipGetLastError(); \
if ( err != hipSuccess ) { \ if ( err != hipSuccess ) { \
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \ printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \

View File

@ -0,0 +1,419 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef MobiusFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 6;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 12;
RealD M5 = 1.8;
RealD b = 1.5;
RealD c = 0.5;
// Real beta = 2.31;
// Real light_mass = 5.4e-4;
Real beta = 2.13;
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp; // Strange
SFRp.lo = 4.0e-3;
SFRp.hi = 90.0;
SFRp.MaxIter = 60000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-4;
SFRp.degree = 12;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=0;
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 2.0e-5;
OFRp.hi = 90.0;
OFRp.MaxIter = 60000;
OFRp.tolerance= 1.0e-7;
OFRp.mdtolerance= 1.0e-4;
// OFRp.degree = 20; converges
// OFRp.degree = 16;
OFRp.degree = 12;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
// Block4[0] = Dirichlet[1];
// Block4[1] = Dirichlet[2];
// Block4[2] = Dirichlet[3];
Block4[0] = 0;
Block4[1] = 0;
Block4[2] = 0;
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grid
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
Params.dirichlet=NonDirichlet;
FermionAction::ImplParams ParamsDir(boundary);
ParamsDir.dirichlet=Dirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-8;
double MDStoppingCondition = 1e-6;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
FermionAction::ImplParams ParamsDen(boundary);
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
else ParamsNum.dirichlet = NonDirichlet;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
if(h!=0) {
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
if(1){
// TODO:
// i) Break high bound, how rapidly does it break? Tune this test.
// ii) Break low bound, how rapidly?
// iii) Run lanczos
// iv) Have CG return spectral range estimate
FermionField vec(StrangeOp.FermionRedBlackGrid());
FermionField res(StrangeOp.FermionRedBlackGrid());
vec = 1; // Fill with any old junk
std::cout << "Bounds check on strange operator mass "<< StrangeOp.Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> SdagS(StrangeOp);
HighBoundCheck(SdagS,vec,SFRp.hi);
ChebyBoundsCheck(SdagS,vec,SFRp.lo,SFRp.hi);
std::cout << "Strange inversion"<<std::endl;
res=Zero();
// MDCG(SdagS,vec,res);
std::cout << "Bounds check on light quark operator mass "<< Denominators[0]->Mass() <<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> UdagU(*Denominators[0]);
HighBoundCheck(UdagU,vec,OFRp.hi);
ChebyBoundsCheck(UdagU,vec,OFRp.lo,OFRp.hi);
std::cout << "light inversion"<<std::endl;
res=Zero();
// MDCG(UdagU,vec,res);
std::cout << "Bounds check on strange dirichlet operator mass "<< StrangeOpDir.Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> SddagSd(StrangeOpDir);
HighBoundCheck(SddagSd,vec,OFRp.hi);
ChebyBoundsCheck(SddagSd,vec,OFRp.lo,OFRp.hi);
std::cout << "strange dirichlet inversion"<<std::endl;
res=Zero();
// MDCG(SddagSd,vec,res);
std::cout << "Bounds check on light dirichlet operator mass "<< Numerators[0]->Mass()<<std::endl;
SchurDifferentiableOperator<FermionImplPolicy> UddagUd(*Numerators[0]);
HighBoundCheck(UddagUd,vec,OFRp.hi);
ChebyBoundsCheck(UddagUd,vec,OFRp.lo,OFRp.hi);
std::cout << "light dirichlet inversion"<<std::endl;
res=Zero();
//MDCG(UddagUd,vec,res);
auto grid4= GridPtr;
auto rbgrid4= GridRBPtr;
auto rbgrid = StrangeOp.FermionRedBlackGrid();
auto grid = StrangeOp.FermionGrid();
if(1){
const int Nstop = 5;
const int Nk = 20;
const int Np = 20;
const int Nm = Nk+Np;
const int MaxIt= 10000;
int Nconv;
RealD resid = 1.0e-5;
if(0)
{
int order = 501;
RealD bound = 5.0e-4;
std::cout << GridLogMessage << " Lanczos for dirichlet bound " << bound<<" order "<< order<<std::endl;
Chebyshev<FermionField> Cheby(bound,90.,order);
FunctionHermOp<FermionField> OpCheby(Cheby,UddagUd);
PlainHermOp<FermionField> Op (UddagUd);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);
std::vector<FermionField> evec(Nm,rbgrid);
FermionField src(rbgrid);src = 1.0;
IRL.calc(eval,evec,src,Nconv);
FermionField tmp(rbgrid);
FermionField ftmp(grid);
FermionField ftmp4(grid4);
for(int ev=0;ev<evec.size();ev++){
Gamma GT(Gamma::Algebra::GammaT);
std::cout << " evec " << ev << std::endl;
tmp = evec[ev] + GT*evec[ev];
DumpSliceNorm(" 1+gammaT ",tmp,Nd);
tmp = evec[ev] - GT*evec[ev];
DumpSliceNorm(" 1-gammaT ",tmp,Nd);
}
for(int e=0;e<10;e++){
std::cout << " Dirichlet evec "<<e<<std::endl;
tmp = evec[e];
for(int s=0;s<Ls;s++){
ftmp=Zero();
setCheckerboard(ftmp,tmp);
ExtractSlice(ftmp4,ftmp,s,0);
std::cout << "s-slice "<<s<< " evec[0] " << std::endl;
DumpSliceNorm(" s-slice ",ftmp4,Nd-1);
}
}
}
if(1)
{
int order = 2001;
RealD bound = 6.0e-5;
std::cout << GridLogMessage << " Lanczos for full operator bound " << bound<<" order "<< order<<std::endl;
Chebyshev<FermionField> Cheby(bound,90.,order);
FunctionHermOp<FermionField> OpCheby(Cheby,UdagU);
PlainHermOp<FermionField> Op (UdagU);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
std::vector<RealD> eval(Nm);
std::vector<FermionField> evec(Nm,rbgrid);
FermionField src(rbgrid); src = 1.0;
IRL.calc(eval,evec,src,Nconv);
FermionField tmp(rbgrid);
FermionField ftmp(grid);
FermionField ftmp4(grid4);
for(int e=0;e<evec.size();e++){
std::cout << " Full evec "<<e<<std::endl;
tmp = evec[e];
for(int s=0;s<Ls;s++){
ftmp=Zero();
setCheckerboard(ftmp,tmp);
ExtractSlice(ftmp4,ftmp,s,0);
std::cout << "s-slice "<<s<< " evec[0] " << std::endl;
DumpSliceNorm(" s-slice ",ftmp4,Nd-1);
}
}
}
Grid_finalize();
std::cout << " All done "<<std::endl;
exit(EXIT_SUCCESS);
}
}
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -0,0 +1,444 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
/* Debugging instances of objects; references are stored
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
*/
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
// Assumption made in code to extract gauge field
// We could avoid storing LinopD reference alltogether ?
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
////////////////////////////////////////////////////////////////////////////////////
// Must snarf a single precision copy of the gauge field in Linop_d argument
////////////////////////////////////////////////////////////////////////////////////
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
GridBase * GridPtrF = SinglePrecGrid4;
GridBase * GridPtrD = FermOpD.Umu.Grid();
GaugeFieldF U_f (GridPtrF);
GaugeLinkFieldF Umu_f(GridPtrF);
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
////////////////////////////////////////////////////////////////////////////////////
// Moving this to a Clone method of fermion operator would allow to duplicate the
// physics parameters and decrease gauge field copies
////////////////////////////////////////////////////////////////////////////////////
GaugeLinkFieldD Umu_d(GridPtrD);
for(int mu=0;mu<Nd*2;mu++){
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
precisionChange(Umu_f,Umu_d);
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
}
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef WilsonImplF FermionImplPolicyF;
typedef MobiusFermionR FermionAction;
typedef MobiusFermionF FermionActionF;
typedef typename FermionAction::FermionField FermionField;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 4;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 12;
RealD M5 = 1.8;
RealD b = 1.5;
RealD c = 0.5;
Real beta = 2.31;
// Real light_mass = 5.4e-4;
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp; // Strange
SFRp.lo = 4.0e-3;
SFRp.hi = 90.0;
SFRp.MaxIter = 60000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-6;
SFRp.degree = 12;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=0;
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 2.0e-5;
OFRp.hi = 90.0;
OFRp.MaxIter = 60000;
OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-6;
// OFRp.degree = 20; converges
// OFRp.degree = 16;
OFRp.degree = 12;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
Block4[0] = Dirichlet[1];
Block4[1] = Dirichlet[2];
Block4[2] = Dirichlet[3];
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grids
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi);
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
LatticeGaugeFieldF UF(GridPtrF);
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
Params.dirichlet=NonDirichlet;
FermionAction::ImplParams ParamsDir(boundary);
ParamsDir.dirichlet=Dirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-10;
double MDStoppingCondition = 1e-7;
double MDStoppingConditionLoose = 1e-6;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy);
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy);
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionAction *> Denominators;
std::vector<FermionActionF *> DenominatorsF;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
std::vector<MxPCG *> ActionMPCG;
std::vector<MxPCG *> MPCG;
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
std::vector<LinearOperatorD *> LinOpD;
std::vector<LinearOperatorF *> LinOpF;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
FermionAction::ImplParams ParamsDen(boundary);
FermionActionF::ImplParams ParamsDenF(boundary);
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
else ParamsNum.dirichlet = NonDirichlet;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
ParamsDenF.dirichlet = ParamsDen.dirichlet;
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
double conv = MDStoppingCondition;
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
const int MX_inner = 5000;
MPCG.push_back(new MxPCG(conv,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
ActionMPCG.push_back(new MxPCG(StoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
if(h!=0) {
// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
} else {
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
}
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

53
HMC/RNGstate.cc Normal file
View File

@ -0,0 +1,53 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file:
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
int main(int argc, char **argv)
{
using namespace Grid;
Grid_init(&argc, &argv);
Coordinate latt4 = GridDefaultLatt();
Coordinate mpi = GridDefaultMpi();
Coordinate simd = GridDefaultSimd(Nd,vComplexD::Nsimd());
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4,simd,mpi);
GridSerialRNG sRNG; sRNG.SeedUniqueString(std::string("The Serial RNG"));
GridParallelRNG pRNG(UGrid); pRNG.SeedUniqueString(std::string("The 4D RNG"));
std::string rngfile("ckpoint_rng.0");
NerscIO::writeRNGState(sRNG, pRNG, rngfile);
Grid_finalize();
}

View File

@ -191,9 +191,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<"Called warmup"<<std::endl; std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0); Dw.Dhop(src,result,0);
__SSC_STOP;
} }
double t1=usecond(); double t1=usecond();
FGrid->Barrier(); FGrid->Barrier();

View File

@ -249,8 +249,9 @@ void Benchmark(int Ls, Coordinate Dirichlet)
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionF::ImplParams p;
Dw.DirichletBlock(Dirichlet); p.dirichlet=Dirichlet;
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,p);
Dw.ImportGauge(Umu); Dw.ImportGauge(Umu);
int ncall =300; int ncall =300;
@ -261,9 +262,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
std::cout<<GridLogMessage<<"Called warmup"<<std::endl; std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0); Dw.Dhop(src,result,0);
__SSC_STOP;
} }
double t1=usecond(); double t1=usecond();
FGrid->Barrier(); FGrid->Barrier();

View File

@ -81,8 +81,8 @@ int main (int argc, char ** argv)
Vector<Coeff_t> diag = Dw.bs; Vector<Coeff_t> diag = Dw.bs;
Vector<Coeff_t> upper= Dw.cs; Vector<Coeff_t> upper= Dw.cs;
Vector<Coeff_t> lower= Dw.cs; Vector<Coeff_t> lower= Dw.cs;
upper[Ls-1]=-Dw.mass*upper[Ls-1]; upper[Ls-1]=-Dw.mass_minus*upper[Ls-1];
lower[0] =-Dw.mass*lower[0]; lower[0] =-Dw.mass_plus*lower[0];
LatticeFermion r_eo(FGrid); LatticeFermion r_eo(FGrid);
LatticeFermion src_e (FrbGrid); LatticeFermion src_e (FrbGrid);

View File

@ -44,6 +44,13 @@ void bench_wilson (
double const volume, double const volume,
int const dag ); int const dag );
void bench_wilson_eo (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag );
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -110,8 +117,8 @@ int main (int argc, char ** argv)
bench_wilson(src,result,Dw,volume,DaggerYes); bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << "\t"; std::cout << "\t";
// EO // EO
bench_wilson(src,result,Dw,volume,DaggerNo); bench_wilson_eo(src_o,result_e,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes); bench_wilson_eo(src_o,result_e,Dw,volume,DaggerYes);
std::cout << std::endl; std::cout << std::endl;
} }
} }

View File

@ -159,7 +159,7 @@ case ${ac_ZMOBIUS} in
esac esac
############### Nc ############### Nc
AC_ARG_ENABLE([Nc], AC_ARG_ENABLE([Nc],
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])], [AC_HELP_STRING([--enable-Nc=2|3|4|5], [enable number of colours])],
[ac_Nc=${enable_Nc}], [ac_Nc=3]) [ac_Nc=${enable_Nc}], [ac_Nc=3])
case ${ac_Nc} in case ${ac_Nc} in
@ -394,11 +394,10 @@ case ${CXXTEST} in
fi fi
;; ;;
hipcc) hipcc)
# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
CXXLD=${CXX} CXXLD=${CXX}
if test $ac_openmp = yes; then if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" CXXFLAGS="$CXXFLAGS -fopenmp"
fi fi
;; ;;
dpcpp) dpcpp)
@ -557,16 +556,19 @@ esac
AC_ARG_ENABLE([setdevice],[AC_HELP_STRING([--enable-setdevice | --disable-setdevice], AC_ARG_ENABLE([setdevice],[AC_HELP_STRING([--enable-setdevice | --disable-setdevice],
[Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no]) [Set GPU to rank in node with cudaSetDevice or similar])],[ac_SETDEVICE=${enable_SETDEVICE}],[ac_SETDEVICE=no])
case ${ac_SETDEVICE} in case ${ac_SETDEVICE} in
yes);; yes)
no) echo ENABLE SET DEVICE
;;
*)
AC_DEFINE([GRID_DEFAULT_GPU],[1],[GRID_DEFAULT_GPU] ) AC_DEFINE([GRID_DEFAULT_GPU],[1],[GRID_DEFAULT_GPU] )
echo DISABLE SET DEVICE
;; ;;
esac esac
######################################################### #########################################################
###################### Shared memory intranode ######### ###################### Shared memory intranode #########
######################################################### #########################################################
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no], AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone|nvlink|no|none],
[Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no]) [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=no])
case ${ac_SHM} in case ${ac_SHM} in
@ -586,7 +588,7 @@ case ${ac_SHM} in
AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
;; ;;
shmnone | no) shmnone | no | none)
AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] ) AC_DEFINE([GRID_MPI3_SHM_NONE],[1],[GRID_MPI3_SHM_NONE] )
;; ;;

View File

@ -93,14 +93,14 @@ template<class Field> class FreeLaplacianStencil : public SparseMatrixBase<Field
{ {
public: public:
typedef typename Field::vector_object siteObject; typedef typename Field::vector_object siteObject;
typedef CartesianStencil<siteObject, siteObject, int> StencilImpl; typedef CartesianStencil<siteObject, siteObject, SimpleStencilParams> StencilImpl;
GridBase *grid; GridBase *grid;
StencilImpl Stencil; StencilImpl Stencil;
SimpleCompressor<siteObject> Compressor; SimpleCompressor<siteObject> Compressor;
FreeLaplacianStencil(GridBase *_grid) FreeLaplacianStencil(GridBase *_grid)
: Stencil (_grid,6,Even,directions,displacements,0), grid(_grid) : Stencil (_grid,6,Even,directions,displacements,SimpleStencilParams()), grid(_grid)
{ }; { };
virtual GridBase *Grid(void) { return grid; }; virtual GridBase *Grid(void) { return grid; };
@ -168,7 +168,8 @@ public:
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef CartesianStencil<siteObject, siteObject, int> StencilImpl; typedef CartesianStencil<siteObject, siteObject,SimpleStencilParams> StencilImpl;
SimpleStencilParams p;
GridBase *grid; GridBase *grid;
StencilImpl Stencil; StencilImpl Stencil;
@ -177,7 +178,7 @@ public:
CovariantLaplacianStencil(GaugeField &Umu) CovariantLaplacianStencil(GaugeField &Umu)
: :
grid(Umu.Grid()), grid(Umu.Grid()),
Stencil (grid,6,Even,directions,displacements,0), Stencil (grid,6,Even,directions,displacements,p),
Uds(grid) Uds(grid)
{ {
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {

View File

@ -7,21 +7,19 @@
#SBATCH -o DWF.%J #SBATCH -o DWF.%J
#SBATCH -e DWF.%J #SBATCH -e DWF.%J
#SBATCH -N 1 #SBATCH -N 1
#SBATCH -n 4 #SBATCH -n 2
#SBATCH --exclusive #SBATCH --gpu-bind=map_gpu:0,1
DIR=. DIR=.
module list source setup.sh
export MPICH_OFI_NIC_POLICY=GPU
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM export OMP_NUM_THREADS=16
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=4
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads 8 --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun --gpus-per-task 1 -n4 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS srun --gpus-per-task 1 -N1 -n2 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 16.16.32.64 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8

View File

@ -6,10 +6,10 @@
#SBATCH -J DWF #SBATCH -J DWF
#SBATCH -o DWF.%J #SBATCH -o DWF.%J
#SBATCH -e DWF.%J #SBATCH -e DWF.%J
#SBATCH -N 8 #SBATCH -N 1
#SBATCH -n 64 #SBATCH -n 8
#SBATCH --exclusive ##SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4
#SBATCH --gpu-bind=map_gpu:0,1,2,3,7,6,5,4 #SBATCH --gpu-bind=map_gpu:0,1,2,3,6,7,4,5
DIR=. DIR=.
source setup.sh source setup.sh
@ -17,25 +17,12 @@ source setup.sh
export MPICH_OFI_NIC_POLICY=GPU export MPICH_OFI_NIC_POLICY=GPU
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0 export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM #export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA #export MPICH_SMP_SINGLE_COPY_MODE=CMA
#export MPICH_SMP_SINGLE_COPY_MODE=NONE #export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=16
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
for vol in 64.64.64.256 64.64.64.128 32.32.32.256 32.32.32.128 srun --gpus-per-task 1 -N1 -n8 ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.1 --shm-mpi 1 --shm 2048 --comms-sequential --accelerator-threads 8
do
PARAMS=" --accelerator-threads 8 --grid $vol --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_dwf_fp32 $PARAMS > dwf.${vol}.8node.shm-mpi1
done
PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 1"
echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node
PARAMS=" --accelerator-threads 8 --grid 64.64.64.32 --mpi 2.2.2.8 --comms-overlap --shm 2048 --shm-mpi 0"
echo $PARAMS
srun --gpus-per-task 1 -N8 -n64 ./benchmarks/Benchmark_ITT $PARAMS > itt.8node_shm0

View File

@ -1,6 +1,6 @@
module load PrgEnv-gnu module load PrgEnv-gnu
module load rocm/5.1.0 module load rocm/5.1.0
module load cray-mpich/8.1.15 module load cray-mpich/8.1.16
module load gmp module load gmp
#module load cray-fftw #module load cray-fftw
module load craype-accel-amd-gfx90a module load craype-accel-amd-gfx90a

View File

@ -1,9 +1,14 @@
DIR=`pwd`
PREFIX=$DIR/../Prequisites/install/
../../configure \ ../../configure \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-simd=GPU \ --enable-simd=GPU \
--enable-shm=nvlink \ --enable-shm=nvlink \
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-accelerator=cuda \ --enable-accelerator=cuda \
--enable-setdevice \
--disable-accelerator-cshift \
--with-gmp=$PREFIX \
--disable-fermion-reps \ --disable-fermion-reps \
--disable-unified \ --disable-unified \
--disable-gparity \ --disable-gparity \

View File

@ -1,24 +1,27 @@
#!/bin/bash #!/bin/bash
#SBATCH -A mp13 #SBATCH -A m3886_g
#SBATCH -C gpu #SBATCH -C gpu
#SBATCH -q regular #SBATCH -q debug
#SBATCH -t 0:20:00 #SBATCH -t 0:20:00
#SBATCH -n 16
#SBATCH --ntasks-per-node=4
#SBATCH -c 32 #SBATCH -c 32
#SBATCH --exclusive #SBATCH -N 1
#SBATCH -n 4
#SBATCH --ntasks-per-node=4
#SBATCH --gpus-per-task=1 #SBATCH --gpus-per-task=1
#SBATCH --gpu-bind=map_gpu:0,1,2,3 #SBATCH --exclusive
#SBATCH --gpu-bind=none
export SLURM_CPU_BIND="cores" export SLURM_CPU_BIND="cores"
export MPICH_RDMA_ENABLED_CUDA=1
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
srun ./benchmarks/Benchmark_comms_host_device --mpi 2.2.2.2 --accelerator-threads 8 > comms.4node export MPICH_RDMA_ENABLED_CUDA=1
export MPICH_GPU_IPC_ENABLED=1
export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
export MPICH_GPU_NO_ASYNC_MEMCPY=0
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
OPT="--comms-overlap --comms-concurrent --shm-mpi 0" OPT="--comms-sequential --shm-mpi 1"
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt0 VOL=64.64.64.64
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt0 srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.1.1 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.1.1.4 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
#srun ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.8 --grid $VOL --accelerator-threads 8 --shm 2048 $OPT
OPT="--comms-overlap --comms-concurrent --shm-mpi 1"
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 64.64.64.64 --accelerator-threads 8 --shm 2048 $OPT > dwf.64.64.64.64.4node.opt1
srun ./benchmarks/Benchmark_dwf_fp32 --mpi 2.2.2.2 --grid 48.48.48.48 --accelerator-threads 8 --shm 2048 $OPT > dwf.48.48.48.48.4node.opt1

View File

@ -1,4 +1,4 @@
export CRAY_ACCEL_TARGET=nvidia80 export CRAY_ACCEL_TARGET=nvidia80
module load PrgEnv-gnu cpe-cuda cuda module load PrgEnv-gnu cpe-cuda cudatoolkit/11.4

View File

@ -2,11 +2,12 @@
--enable-simd=GPU \ --enable-simd=GPU \
--enable-gen-simd-width=32 \ --enable-gen-simd-width=32 \
--enable-unified=no \ --enable-unified=no \
--enable-shm=nvlink \ --enable-shm=no \
--disable-gparity \ --disable-gparity \
--enable-setdevice \ --disable-setdevice \
--disable-fermion-reps \ --disable-fermion-reps \
--enable-accelerator=cuda \ --enable-accelerator=cuda \
--enable-accelerator-cshift \
--prefix /ccs/home/paboyle/prefix \ --prefix /ccs/home/paboyle/prefix \
CXX=nvcc \ CXX=nvcc \
LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \ LDFLAGS=-L/ccs/home/paboyle/prefix/lib/ \

View File

@ -1,25 +1,39 @@
#!/bin/bash #!/bin/bash
#BSUB -P LGT104 #BSUB -P LGT104
#BSUB -W 2:00 #BSUB -W 0:20
#BSUB -nnodes 16 #BSUB -nnodes 16
#BSUB -J DWF #BSUB -J DWF
export OMP_NUM_THREADS=6 export OMP_NUM_THREADS=6
export PAMI_IBV_ADAPTER_AFFINITY=1 export PAMI_IBV_ADAPTER_AFFINITY=1
export PAMI_ENABLE_STRIPING=1 export PAMI_ENABLE_STRIPING=1
export OPT="--comms-concurrent --comms-overlap "
APP="./benchmarks/Benchmark_comms_host_device --mpi 4.4.4.3 " DIR=.
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > comms.16node.log source sourceme.sh
APP="./benchmarks/Benchmark_dwf_fp32 --grid 96.96.96.72 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.24.log
APP="./benchmarks/Benchmark_dwf_fp32 --grid 128.128.128.96 --mpi 4.4.4.3 --shm 2048 --shm-force-mpi 1 --device-mem 8000 --shm-force-mpi 1 $OPT " VOLS=( 32.32.32.16 32.32.32.64 64.32.32.64 64.32.64.64 64.64.64.64 64.64.64.128 64.64.64.256 64.64.64.512 128.64.64.64.512)
jsrun --nrs 16 -a6 -g6 -c42 -dpacked -b packed:7 --latency_priority gpu-cpu --smpiargs=-gpu $APP > dwf.16node.32.log MPI=( 1.1.1.1 1.1.1.4 2.1.1.4 2.1.2.4 2.2.2.4 2.2.2.8 2.2.2.16 2.2.2.32 4.4.2.32 )
RANKS=( 1 4 8 16 32 64 128 256 1024)
NODES=( 1 1 2 4 8 16 32 64 128)
INTS=( 0 1 2 3 4 5 6 7 8)
for i in 5
do
vol=${VOLS[$i]}
nodes=${NODES[$i]}
mpi=${MPI[$i]}
ranks=${RANKS[$i]}
JSRUN="jsrun --nrs $nodes -a4 -g4 -c42 -dpacked -b packed:10 --latency_priority gpu-cpu --smpiargs=-gpu"
PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-sequential --shm 2048 --shm-mpi 0"
$JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.seq.ker
PARAMS=" --accelerator-threads 8 --grid $vol --mpi $mpi --comms-overlap --shm 2048 --shm-mpi 0"
$JSRUN ./benchmarks/Benchmark_dwf_fp32 $PARAMS > run.v${vol}.n${nodes}.m${mpi}.over.ker
done

View File

@ -147,7 +147,7 @@ int main (int argc, char ** argv)
Complex p = TensorRemove(Tp); Complex p = TensorRemove(Tp);
std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl; std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl;
Complex LinkTraceScale(1.0/vol/4.0/3.0); Complex LinkTraceScale(1.0/vol/4.0/(Real)Nc);
TComplex Tl = sum(LinkTrace); TComplex Tl = sum(LinkTrace);
Complex l = TensorRemove(Tl); Complex l = TensorRemove(Tl);
std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl; std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl;
@ -157,8 +157,10 @@ int main (int argc, char ** argv)
Complex ll= TensorRemove(TcP); Complex ll= TensorRemove(TcP);
std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl; std::cout<<GridLogMessage << "coarsened plaquettes sum to " <<ll*PlaqScale<<std::endl;
std::string clone2x3("./ckpoint_clone2x3.4000"); const string stNc = to_string( Nc ) ;
std::string clone3x3("./ckpoint_clone3x3.4000"); const string stNcM1 = to_string( Nc-1 ) ;
std::string clone2x3("./ckpoint_clone"+stNcM1+"x"+stNc+".4000");
std::string clone3x3("./ckpoint_clone"+stNc+"x"+stNc+".4000");
NerscIO::writeConfiguration(Umu,clone3x3,0,precision32); NerscIO::writeConfiguration(Umu,clone3x3,0,precision32);
NerscIO::writeConfiguration(Umu,clone2x3,1,precision32); NerscIO::writeConfiguration(Umu,clone2x3,1,precision32);

View File

@ -46,7 +46,7 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
const int Ls=8; const int Ls=12;
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
@ -94,13 +94,32 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
double t1,t2,flops;
int iters;
for(int i=0;i<100;i++){
result_o = Zero();
t1=usecond();
mCG(src_o,result_o); mCG(src_o,result_o);
t2=usecond();
iters = mCG.TotalInnerIterations; //Number of inner CG iterations
flops = 1320.0*2*FGrid->gSites()*iters;
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl; std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000); ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
for(int i=0;i<100;i++){
result_o_2 = Zero();
t1=usecond();
CG(HermOpEO,src_o,result_o_2); CG(HermOpEO,src_o,result_o_2);
t2=usecond();
iters = CG.IterationsToComplete;
flops = 1320.0*2*FGrid->gSites()*iters;
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}
MemoryManager::Print(); // MemoryManager::Print();
LatticeFermionD diff_o(FrbGrid); LatticeFermionD diff_o(FrbGrid);
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);

View File

@ -31,7 +31,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -80,7 +79,8 @@ int main(int argc, char ** argv) {
Foo=lex; Foo=lex;
} }
typedef CartesianStencil<vobj,vobj,int> Stencil; typedef CartesianStencil<vobj,vobj,SimpleStencilParams> Stencil;
SimpleStencilParams p;
for(int dir=0;dir<4;dir++){ for(int dir=0;dir<4;dir++){
for(int disp=0;disp<Fine._fdimensions[dir];disp++){ for(int disp=0;disp<Fine._fdimensions[dir];disp++){
@ -90,7 +90,7 @@ int main(int argc, char ** argv) {
std::vector<int> directions(npoint,dir); std::vector<int> directions(npoint,dir);
std::vector<int> displacements(npoint,disp); std::vector<int> displacements(npoint,disp);
Stencil myStencil(&Fine,npoint,0,directions,displacements,0); Stencil myStencil(&Fine,npoint,0,directions,displacements,p);
Coordinate ocoor(4); Coordinate ocoor(4);
for(int o=0;o<Fine.oSites();o++){ for(int o=0;o<Fine.oSites();o++){
Fine.oCoorFromOindex(ocoor,o); Fine.oCoorFromOindex(ocoor,o);
@ -183,8 +183,8 @@ int main(int argc, char ** argv) {
std::vector<int> directions(npoint,dir); std::vector<int> directions(npoint,dir);
std::vector<int> displacements(npoint,disp); std::vector<int> displacements(npoint,disp);
Stencil EStencil(&rbFine,npoint,Even,directions,displacements,0); Stencil EStencil(&rbFine,npoint,Even,directions,displacements,p);
Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,0); Stencil OStencil(&rbFine,npoint,Odd,directions,displacements,p);
Coordinate ocoor(4); Coordinate ocoor(4);
for(int o=0;o<Fine.oSites();o++){ for(int o=0;o<Fine.oSites();o++){

View File

@ -117,8 +117,8 @@ void runBenchmark(int* argc, char*** argv) {
// type definitions // type definitions
typedef WilsonImpl<vCoeff_t, FundamentalRepresentation, CoeffReal> WImpl; typedef WilsonImpl<vCoeff_t, FundamentalRepresentation, CoeffReal> WImpl;
typedef WilsonCloverFermion<WImpl> WilsonCloverOperator; typedef WilsonCloverFermion<WImpl, CloverHelpers<WImpl>> WilsonCloverOperator;
typedef CompactWilsonCloverFermion<WImpl> CompactWilsonCloverOperator; typedef CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>> CompactWilsonCloverOperator;
typedef typename WilsonCloverOperator::FermionField Fermion; typedef typename WilsonCloverOperator::FermionField Fermion;
typedef typename WilsonCloverOperator::GaugeField Gauge; typedef typename WilsonCloverOperator::GaugeField Gauge;

View File

@ -9,6 +9,7 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Jamie Hudspith <renwick.james.hudspth@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -42,7 +43,7 @@ directory
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
; ;
int main(int argc, char** argv) { int main(int argc, char** argv) {
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -60,6 +61,10 @@ int main(int argc, char** argv) {
<< std::endl; << std::endl;
SU2::printGenerators(); SU2::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl; std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
// guard as this code fails to compile for Nc != 3
#if (Nc == 3)
SU2Adjoint::printGenerators(); SU2Adjoint::printGenerators();
SU2::testGenerators(); SU2::testGenerators();
SU2Adjoint::testGenerators(); SU2Adjoint::testGenerators();
@ -111,8 +116,6 @@ int main(int argc, char** argv) {
// AdjointRepresentation has the predefined number of colours Nc // AdjointRepresentation has the predefined number of colours Nc
// Representations<FundamentalRepresentation, AdjointRepresentation, TwoIndexSymmetricRepresentation> RepresentationTypes(grid); // Representations<FundamentalRepresentation, AdjointRepresentation, TwoIndexSymmetricRepresentation> RepresentationTypes(grid);
LatticeGaugeField U(grid), V(grid); LatticeGaugeField U(grid), V(grid);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, U);
SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V); SU3::HotConfiguration<LatticeGaugeField>(gridRNG, V);
@ -181,7 +184,6 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck) std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck)
<< std::endl; << std::endl;
// Construct the fundamental matrix in the group // Construct the fundamental matrix in the group
SU3::LatticeMatrix Af(grid); SU3::LatticeMatrix Af(grid);
SU3::FundamentalLieAlgebraMatrix(h_adj,Af); SU3::FundamentalLieAlgebraMatrix(h_adj,Af);
@ -198,7 +200,6 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck) std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck)
<< std::endl; << std::endl;
// Tranform to the adjoint representation // Tranform to the adjoint representation
U = Zero(); // fill this with only one direction U = Zero(); // fill this with only one direction
pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields
@ -210,11 +211,7 @@ int main(int argc, char** argv) {
typename AdjointRep<Nc>::LatticeMatrix Diff_check_mat = Ur0 - Uadj; typename AdjointRep<Nc>::LatticeMatrix Diff_check_mat = Ur0 - Uadj;
std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl; std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl;
// TwoIndexRep tests // TwoIndexRep tests
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
@ -253,8 +250,6 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl; std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::testGenerators(); SU2TwoIndexAntiSymm::testGenerators();
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors"
@ -357,7 +352,6 @@ int main(int argc, char** argv) {
SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2; SU3::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl; std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
// Exponentiate // Exponentiate
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid); typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid);
U2iS = expMat(Ar_sym, 1.0, 16); U2iS = expMat(Ar_sym, 1.0, 16);
@ -376,8 +370,6 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS) std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS)
<< std::endl; << std::endl;
// Construct the fundamental matrix in the group // Construct the fundamental matrix in the group
SU3::LatticeMatrix Af_sym(grid); SU3::LatticeMatrix Af_sym(grid);
SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym); SU3::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
@ -403,9 +395,6 @@ int main(int argc, char** argv) {
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS; typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl; std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl;
if (TwoIndexRep<Nc, AntiSymmetric >::Dimension != 1){ if (TwoIndexRep<Nc, AntiSymmetric >::Dimension != 1){
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
@ -513,20 +502,13 @@ int main(int argc, char** argv) {
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS; typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl; std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl;
} else { } else {
std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests " std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests "
"because representation is trivial (dim = 1)" "because representation is trivial (dim = 1)"
<< std::endl; << std::endl;
} }
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -122,13 +122,14 @@ int main (int argc, char ** argv)
std::cout << "Determinant defect before projection " <<norm2(detU)<<std::endl; std::cout << "Determinant defect before projection " <<norm2(detU)<<std::endl;
tmp = U*adj(U) - ident; tmp = U*adj(U) - ident;
std::cout << "Unitarity check before projection " << norm2(tmp)<<std::endl; std::cout << "Unitarity check before projection " << norm2(tmp)<<std::endl;
#if (Nc == 3)
ProjectSU3(U); ProjectSU3(U);
detU= Determinant(U) ; detU= Determinant(U) ;
detU= detU -1.0; detU= detU -1.0;
std::cout << "Determinant ProjectSU3 defect " <<norm2(detU)<<std::endl; std::cout << "Determinant ProjectSU3 defect " <<norm2(detU)<<std::endl;
tmp = U*adj(U) - ident; tmp = U*adj(U) - ident;
std::cout << "Unitarity check after projection " << norm2(tmp)<<std::endl; std::cout << "Unitarity check after projection " << norm2(tmp)<<std::endl;
#endif
ProjectSUn(UU); ProjectSUn(UU);
detUU= Determinant(UU); detUU= Determinant(UU);

View File

@ -2,11 +2,12 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc Source file: ./tests/core/Test_wilson_clover.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -67,8 +68,6 @@ int main(int argc, char **argv)
tmp = Zero(); tmp = Zero();
FermionField err(&Grid); FermionField err(&Grid);
err = Zero(); err = Zero();
FermionField err2(&Grid);
err2 = Zero();
FermionField phi(&Grid); FermionField phi(&Grid);
random(pRNG, phi); random(pRNG, phi);
FermionField chi(&Grid); FermionField chi(&Grid);
@ -77,6 +76,8 @@ int main(int argc, char **argv)
SU<Nc>::HotConfiguration(pRNG, Umu); SU<Nc>::HotConfiguration(pRNG, Umu);
std::vector<LatticeColourMatrix> U(4, &Grid); std::vector<LatticeColourMatrix> U(4, &Grid);
double tolerance = 1e-4;
double volume = 1; double volume = 1;
for (int mu = 0; mu < Nd; mu++) for (int mu = 0; mu < Nd; mu++)
{ {
@ -88,7 +89,7 @@ int main(int argc, char **argv)
RealD csw_t = 1.0; RealD csw_t = 1.0;
WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params); WilsonCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
//Dwc.ImportGauge(Umu); // not necessary, included in the constructor CompactWilsonCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl; std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl;
@ -112,7 +113,24 @@ int main(int argc, char **argv)
setCheckerboard(r_eo, r_e); setCheckerboard(r_eo, r_e);
err = ref - r_eo; err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff " << norm2(err) << " " << norm2(ref) << " " << norm2(r_eo) << std::endl; std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc_compact.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc_compact.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl; std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
@ -152,6 +170,22 @@ int main(int argc, char **argv)
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl; std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl; std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl;
Dwc_compact.Meooe(chi_e, dchi_o);
Dwc_compact.Meooe(chi_o, dchi_e);
Dwc_compact.MeooeDag(phi_e, dphi_o);
Dwc_compact.MeooeDag(phi_o, dphi_e);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
@ -170,6 +204,20 @@ int main(int argc, char **argv)
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Mooee(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.Mooee(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl;
@ -189,6 +237,20 @@ int main(int argc, char **argv)
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInvDag(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl; std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl; std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl;
@ -208,6 +270,20 @@ int main(int argc, char **argv)
err = phi - chi; err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "================================================================" << std::endl; std::cout << GridLogMessage << "================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl;
@ -249,7 +325,7 @@ int main(int argc, char **argv)
///////////////// /////////////////
WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params); WilsonCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
Dwc_prime.ImportGauge(U_prime); CompactWilsonCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
tmp = Omega * src; tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp); pickCheckerboard(Even, src_e, tmp);
@ -263,6 +339,36 @@ int main(int argc, char **argv)
err = chi - adj(Omega) * phi; err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_compact_prime.Mooee(src_e, phi_e);
Dwc_compact_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "=================================================================" << std::endl; std::cout << GridLogMessage << "=================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl; std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl;
@ -272,7 +378,6 @@ int main(int argc, char **argv)
phi = Zero(); phi = Zero();
WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params); WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params);
Dw.ImportGauge(Umu);
Dw.M(src, result); Dw.M(src, result);
Dwc.M(src, chi); Dwc.M(src, chi);
@ -280,13 +385,24 @@ int main(int argc, char **argv)
Dwc_prime.M(Omega * src, phi); Dwc_prime.M(Omega * src, phi);
WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params); WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params);
Dw_prime.ImportGauge(U_prime);
Dw_prime.M(Omega * src, result2); Dw_prime.M(Omega * src, result2);
err = chi - adj(Omega) * phi; err = result - adj(Omega) * result2;
err2 = result - adj(Omega) * result2;
std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl;
std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err2) << std::endl; assert(fabs(norm2(err)) < tolerance);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff WilsonClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
Dwc_compact.M(src, chi);
Dwc_compact_prime.M(Omega * src, phi);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff CompactWilsonClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl; std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl;
@ -296,7 +412,6 @@ int main(int argc, char **argv)
phi = Zero(); phi = Zero();
err = Zero(); err = Zero();
WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0 WilsonCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0
Dwc_csw0.ImportGauge(Umu);
pickCheckerboard(Even, phi_e, phi); pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi); pickCheckerboard(Odd, phi_o, phi);
@ -317,6 +432,33 @@ int main(int argc, char **argv)
err = chi - phi; err = chi - phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
CompactWilsonCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_compact_csw0.Mooee(src_e, phi_e);
Dwc_compact_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl; std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl; std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl;
@ -348,9 +490,41 @@ int main(int argc, char **argv)
setCheckerboard(phi, phi_o); setCheckerboard(phi, phi_o);
err = ref - phi; err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff :" << norm2(ref) << std::endl; std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff :" << norm2(phi) << std::endl; std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff :" << norm2(err) << std::endl; std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
Dwc_compact.Meooe(src_o, phi_e);
Dwc_compact.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Grid_finalize(); Grid_finalize();
} }

View File

@ -0,0 +1,253 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cayley_cg.cc
Copyright (C) 2022
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
template<class What>
void TestConserved(What & Dw,
LatticeGaugeField &Umu,
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
GridParallelRNG *RNG4);
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
Gamma::Algebra::Gamma5
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG4(UGrid);
std::vector<int> seeds4({1,2,3,4}); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid);
if( argc > 1 && argv[1][0] != '-' )
{
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, argv[1]);
}
else
{
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::HotConfiguration(RNG4,Umu);
}
typename WilsonCloverFermionR::ImplParams params;
WilsonAnisotropyCoefficients anis;
RealD mass = 0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonFermionR Dw(Umu,*UGrid,*UrbGrid,mass,params);
TestConserved<WilsonFermionR>(Dw,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonCloverFermionR Dwc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params);
TestConserved<WilsonCloverFermionR>(Dwc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"CompactWilsonCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
CompactWilsonCloverFermionR Dwcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params);
TestConserved<CompactWilsonCloverFermionR>(Dwcc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"WilsonExpCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
WilsonExpCloverFermionR Dewc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, anis, params);
TestConserved<WilsonExpCloverFermionR>(Dewc,Umu,UGrid,UrbGrid,&RNG4);
std::cout<<GridLogMessage <<"=================================="<<std::endl;
std::cout<<GridLogMessage <<"CompactWilsonExpCloverFermion test"<<std::endl;
std::cout<<GridLogMessage <<"=================================="<<std::endl;
CompactWilsonExpCloverFermionR Dewcc(Umu, *UGrid, *UrbGrid, mass, csw_r, csw_t, 1.0, anis, params);
TestConserved<CompactWilsonExpCloverFermionR>(Dewcc,Umu,UGrid,UrbGrid,&RNG4);
Grid_finalize();
}
template<class Action>
void TestConserved(Action & Dw,
LatticeGaugeField &Umu,
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
GridParallelRNG *RNG4)
{
LatticePropagator phys_src(UGrid);
LatticePropagator seqsrc(UGrid);
LatticePropagator prop4(UGrid);
LatticePropagator Vector_mu(UGrid);
LatticeComplex SV (UGrid);
LatticeComplex VV (UGrid);
LatticePropagator seqprop(UGrid);
SpinColourMatrix kronecker; kronecker=1.0;
Coordinate coor({0,0,0,0});
phys_src=Zero();
pokeSite(kronecker,phys_src,coor);
ConjugateGradient<LatticeFermion> CG(1.0e-16,100000);
SchurRedBlackDiagTwoSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> zpg;
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
LatticeFermion src4 (UGrid);
PropToFerm<Action>(src4,phys_src,s,c);
LatticeFermion result4(UGrid); result4=Zero();
schur(Dw,src4,result4,zpg);
std::cout<<GridLogMessage<<"spin "<<s<<" color "<<c<<" norm2(sourc4d) "<<norm2(src4)
<<" norm2(result4d) "<<norm2(result4)<<std::endl;
FermToProp<Action>(prop4,result4,s,c);
}
}
auto curr = Current::Vector;
const int mu_J=0;
const int t_J=0;
LatticeComplex ph (UGrid); ph=1.0;
Dw.SeqConservedCurrent(prop4,
seqsrc,
phys_src,
curr,
mu_J,
t_J,
t_J,// whole lattice
ph);
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
LatticeFermion src4 (UGrid);
PropToFerm<Action>(src4,seqsrc,s,c);
LatticeFermion result4(UGrid); result4=Zero();
schur(Dw,src4,result4,zpg);
FermToProp<Action>(seqprop,result4,s,c);
}
}
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gT(Gamma::Algebra::GammaT);
std::vector<TComplex> sumSV;
std::vector<TComplex> sumVV;
Dw.ContractConservedCurrent(prop4,prop4,Vector_mu,phys_src,Current::Vector,Tdir);
SV = trace(Vector_mu); // Scalar-Vector conserved current
VV = trace(gT*Vector_mu); // (local) Vector-Vector conserved current
// Spatial sum
sliceSum(SV,sumSV,Tdir);
sliceSum(VV,sumVV,Tdir);
const int Nt{static_cast<int>(sumSV.size())};
std::cout<<GridLogMessage<<"Vector Ward identity by timeslice (~ 0)"<<std::endl;
for(int t=0;t<Nt;t++){
std::cout<<GridLogMessage <<" t "<<t<<" SV "<<real(TensorRemove(sumSV[t]))<<" VV "<<real(TensorRemove(sumVV[t]))<<std::endl;
assert(abs(real(TensorRemove(sumSV[t]))) < 1e-10);
assert(abs(real(TensorRemove(sumVV[t]))) < 1e-2);
}
///////////////////////////////
// 3pt vs 2pt check
///////////////////////////////
{
Gamma::Algebra gA = Gamma::Algebra::Identity;
Gamma g(gA);
LatticePropagator cur(UGrid);
LatticePropagator tmp(UGrid);
LatticeComplex c(UGrid);
SpinColourMatrix qSite;
peekSite(qSite, seqprop, coor);
Complex test_S, test_V, check_S, check_V;
std::vector<TComplex> check_buf;
test_S = trace(qSite*g);
test_V = trace(qSite*g*Gamma::gmu[mu_J]);
Dw.ContractConservedCurrent(prop4,prop4,cur,phys_src,curr,mu_J);
c = trace(cur*g);
sliceSum(c, check_buf, Tp);
check_S = TensorRemove(check_buf[t_J]);
auto gmu=Gamma::gmu[mu_J];
c = trace(cur*g*gmu);
sliceSum(c, check_buf, Tp);
check_V = TensorRemove(check_buf[t_J]);
std::cout<<GridLogMessage << std::setprecision(14)<<"Test S = " << abs(test_S) << std::endl;
std::cout<<GridLogMessage << "Test V = " << abs(test_V) << std::endl;
std::cout<<GridLogMessage << "Check S = " << abs(check_S) << std::endl;
std::cout<<GridLogMessage << "Check V = " << abs(check_V) << std::endl;
// Check difference = 0
check_S = check_S - test_S;
check_V = check_V - test_V;
std::cout<<GridLogMessage << "Consistency check for sequential conserved " <<std::endl;
std::cout<<GridLogMessage << "Diff S = " << abs(check_S) << std::endl;
assert(abs(check_S) < 1e-8);
std::cout<<GridLogMessage << "Diff V = " << abs(check_V) << std::endl;
assert(abs(check_V) < 1e-8);
}
}

View File

@ -0,0 +1,530 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/core/Test_wilson_exp_clover.cc
Copyright (C) 2022
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Fabian Joswig <fabian.joswig@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
int main(int argc, char **argv)
{
Grid_init(&argc, &argv);
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
GridRedBlackCartesian RBGrid(&Grid);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REALF" << sizeof(RealF) << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REALD" << sizeof(RealD) << std::endl;
std::cout << GridLogMessage << "Grid floating point word size is REAL" << sizeof(Real) << std::endl;
std::vector<int> seeds({1, 2, 3, 4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
typedef typename WilsonExpCloverFermionR::FermionField FermionField;
typename WilsonExpCloverFermionR::ImplParams params;
WilsonAnisotropyCoefficients anis;
FermionField src(&Grid);
random(pRNG, src);
FermionField result(&Grid);
result = Zero();
FermionField result2(&Grid);
result2 = Zero();
FermionField ref(&Grid);
ref = Zero();
FermionField tmp(&Grid);
tmp = Zero();
FermionField err(&Grid);
err = Zero();
FermionField phi(&Grid);
random(pRNG, phi);
FermionField chi(&Grid);
random(pRNG, chi);
LatticeGaugeField Umu(&Grid);
SU<Nc>::HotConfiguration(pRNG, Umu);
std::vector<LatticeColourMatrix> U(4, &Grid);
double tolerance = 1e-4;
double volume = 1;
for (int mu = 0; mu < Nd; mu++)
{
volume = volume * latt_size[mu];
}
RealD mass = 0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
WilsonExpCloverFermionR Dwc(Umu, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
CompactWilsonExpCloverFermionR Dwc_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing that Deo + Doe = Dunprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
FermionField src_e(&RBGrid);
FermionField src_o(&RBGrid);
FermionField r_e(&RBGrid);
FermionField r_o(&RBGrid);
FermionField r_eo(&Grid);
pickCheckerboard(Even, src_e, src);
pickCheckerboard(Odd, src_o, src);
Dwc.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Meooe(src_e, r_o);
std::cout << GridLogMessage << "Applied Meo" << std::endl;
Dwc_compact.Meooe(src_o, r_e);
std::cout << GridLogMessage << "Applied Moe" << std::endl;
Dwc_compact.Dhop(src, ref, DaggerNo);
setCheckerboard(r_eo, r_o);
setCheckerboard(r_eo, r_e);
err = ref - r_eo;
std::cout << GridLogMessage << "EO norm diff compact\t" << norm2(err) << " (" << norm2(ref) << " - " << norm2(r_eo) << ")" << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test Ddagger is the dagger of D by requiring " << std::endl;
std::cout << GridLogMessage << "= < phi | Deo | chi > * = < chi | Deo^dag| phi> " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
FermionField chi_e(&RBGrid);
FermionField chi_o(&RBGrid);
FermionField dchi_e(&RBGrid);
FermionField dchi_o(&RBGrid);
FermionField phi_e(&RBGrid);
FermionField phi_o(&RBGrid);
FermionField dphi_e(&RBGrid);
FermionField dphi_o(&RBGrid);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc.Meooe(chi_e, dchi_o);
Dwc.Meooe(chi_o, dchi_e);
Dwc.MeooeDag(phi_e, dphi_o);
Dwc.MeooeDag(phi_o, dphi_e);
ComplexD pDce = innerProduct(phi_e, dchi_e);
ComplexD pDco = innerProduct(phi_o, dchi_o);
ComplexD cDpe = innerProduct(chi_e, dphi_e);
ComplexD cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) " << pDco - conj(cDpe) << std::endl;
Dwc_compact.Meooe(chi_e, dchi_o);
Dwc_compact.Meooe(chi_o, dchi_e);
Dwc_compact.MeooeDag(phi_e, dphi_o);
Dwc_compact.MeooeDag(phi_o, dphi_e);
pDce = innerProduct(phi_e, dchi_e);
pDco = innerProduct(phi_o, dchi_o);
cDpe = innerProduct(chi_e, dphi_e);
cDpo = innerProduct(chi_o, dphi_o);
std::cout << GridLogMessage << "e compact " << pDce << " " << cDpe << std::endl;
std::cout << GridLogMessage << "o compact " << pDco << " " << cDpo << std::endl;
std::cout << GridLogMessage << "pDce - conj(cDpo) compact " << pDce - conj(cDpo) << std::endl;
std::cout << GridLogMessage << "pDco - conj(cDpe) compact " << pDco - conj(cDpe) << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv Mee = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.Mooee(chi_e, src_e);
Dwc.MooeeInv(src_e, phi_e);
Dwc.Mooee(chi_o, src_o);
Dwc.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.Mooee(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.Mooee(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeDag MeeInvDag = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.MooeeDag(chi_e, src_e);
Dwc.MooeeInvDag(src_e, phi_e);
Dwc.MooeeDag(chi_o, src_o);
Dwc.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInvDag(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInvDag(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==============================================================" << std::endl;
std::cout << GridLogMessage << "= Test MeeInv MeeDag = 1 (if csw!=0) " << std::endl;
std::cout << GridLogMessage << "==============================================================" << std::endl;
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dwc.MooeeDag(chi_e, src_e);
Dwc.MooeeInv(src_e, phi_e);
Dwc.MooeeDag(chi_o, src_o);
Dwc.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Dwc_compact.MooeeDag(chi_e, src_e);
Dwc_compact.MooeeInv(src_e, phi_e);
Dwc_compact.MooeeDag(chi_o, src_o);
Dwc_compact.MooeeInv(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = phi - chi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term with EO preconditioning " << std::endl;
std::cout << GridLogMessage << "================================================================" << std::endl;
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc.Mooee(src_e, chi_e);
Dwc.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
////////////////////// Gauge Transformation
std::vector<int> seeds2({5, 6, 7, 8});
GridParallelRNG pRNG2(&Grid);
pRNG2.SeedFixedIntegers(seeds2);
LatticeColourMatrix Omega(&Grid);
LatticeColourMatrix ShiftedOmega(&Grid);
LatticeGaugeField U_prime(&Grid);
U_prime = Zero();
LatticeColourMatrix U_prime_mu(&Grid);
U_prime_mu = Zero();
SU<Nc>::LieRandomize(pRNG2, Omega, 1.0);
for (int mu = 0; mu < Nd; mu++)
{
U[mu] = peekLorentz(Umu, mu);
ShiftedOmega = Cshift(Omega, mu, 1);
U_prime_mu = Omega * U[mu] * adj(ShiftedOmega);
pokeLorentz(U_prime, U_prime_mu, mu);
}
/////////////////
WilsonExpCloverFermionR Dwc_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, anis, params);
CompactWilsonExpCloverFermionR Dwc_compact_prime(U_prime, Grid, RBGrid, mass, csw_r, csw_t, 1.0, anis, params);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_prime.Mooee(src_e, phi_e);
Dwc_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
tmp = Zero();
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
tmp = Omega * src;
pickCheckerboard(Even, src_e, tmp);
pickCheckerboard(Odd, src_o, tmp);
Dwc_compact_prime.Mooee(src_e, phi_e);
Dwc_compact_prime.Mooee(src_o, phi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "=================================================================" << std::endl;
std::cout << GridLogMessage << "= Testing gauge covariance Clover term w/o EO preconditioning " << std::endl;
std::cout << GridLogMessage << "================================================================" << std::endl;
chi = Zero();
phi = Zero();
WilsonFermionR Dw(Umu, Grid, RBGrid, mass, params);
Dw.M(src, result);
Dwc.M(src, chi);
Dwc_prime.M(Omega * src, phi);
WilsonFermionR Dw_prime(U_prime, Grid, RBGrid, mass, params);
Dw_prime.M(Omega * src, result2);
err = result - adj(Omega) * result2;
std::cout << GridLogMessage << "norm diff Wilson " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff WilsonExpClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
Dwc_compact.M(src, chi);
Dwc_compact_prime.M(Omega * src, phi);
err = chi - adj(Omega) * phi;
std::cout << GridLogMessage << "norm diff CompactWilsonExpClover " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing Mooee(csw=0) Clover to reproduce Mooee Wilson " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
chi = Zero();
phi = Zero();
err = Zero();
WilsonExpCloverFermionR Dwc_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_csw0.Mooee(src_e, phi_e);
Dwc_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
CompactWilsonExpCloverFermionR Dwc_compact_csw0(Umu, Grid, RBGrid, mass, 0.0, 0.0, 1.0, anis, params); // <-- Notice: csw=0
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
Dw.Mooee(src_e, chi_e);
Dw.Mooee(src_o, chi_o);
Dwc_compact_csw0.Mooee(src_e, phi_e);
Dwc_compact_csw0.Mooee(src_o, phi_o);
setCheckerboard(chi, chi_e);
setCheckerboard(chi, chi_o);
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
setCheckerboard(src, src_e);
setCheckerboard(src, src_o);
err = chi - phi;
std::cout << GridLogMessage << "norm diff compact " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
std::cout << GridLogMessage << "==========================================================" << std::endl;
std::cout << GridLogMessage << "= Testing EO operator is equal to the unprec " << std::endl;
std::cout << GridLogMessage << "==========================================================" << std::endl;
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc.Mooee(src_e, chi_e);
Dwc.Mooee(src_o, chi_o);
Dwc.Meooe(src_o, phi_e);
Dwc.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
chi = Zero();
phi = Zero();
err = Zero();
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd, phi_o, phi);
pickCheckerboard(Even, chi_e, chi);
pickCheckerboard(Odd, chi_o, chi);
// M phi = (Mooee src_e + Meooe src_o , Meooe src_e + Mooee src_o)
Dwc_compact.M(src, ref); // Reference result from the unpreconditioned operator
// EO matrix
Dwc_compact.Mooee(src_e, chi_e);
Dwc_compact.Mooee(src_o, chi_o);
Dwc_compact.Meooe(src_o, phi_e);
Dwc_compact.Meooe(src_e, phi_o);
phi_o += chi_o;
phi_e += chi_e;
setCheckerboard(phi, phi_e);
setCheckerboard(phi, phi_o);
err = ref - phi;
std::cout << GridLogMessage << "ref (unpreconditioned operator) diff compact : " << norm2(ref) << std::endl;
std::cout << GridLogMessage << "phi (EO decomposition) diff compact : " << norm2(phi) << std::endl;
std::cout << GridLogMessage << "norm diff compact : " << norm2(err) << std::endl;
assert(fabs(norm2(err)) < tolerance);
Grid_finalize();
}

View File

@ -132,8 +132,8 @@ int main(int argc, char **argv) {
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
//TheHMC.Resources.LoadBinaryCheckpointer(CPparams); TheHMC.Resources.LoadBinaryCheckpointer(CPparams);
TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); //TheHMC.Resources.LoadScidacCheckpointer(CPparams, SPar); this breaks for compilation without lime
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);

View File

@ -74,10 +74,10 @@ int main(int argc, char **argv) {
// Checkpointer definition // Checkpointer definition
CheckpointerParameters CPparams(Reader); CheckpointerParameters CPparams(Reader);
//TheHMC.Resources.LoadNerscCheckpointer(CPparams); TheHMC.Resources.LoadNerscCheckpointer(CPparams);
// Store metadata in the Scidac checkpointer // Store metadata in the Scidac checkpointer - obviously breaks without LIME
TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar); //TheHMC.Resources.LoadScidacCheckpointer(CPparams, WilsonPar);
RNGModuleParameters RNGpar(Reader); RNGModuleParameters RNGpar(Reader);
TheHMC.Resources.SetRNGSeeds(RNGpar); TheHMC.Resources.SetRNGSeeds(RNGpar);

View File

@ -37,6 +37,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
#ifdef HAVE_LIME
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis> class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{ {
@ -249,3 +251,11 @@ int main (int argc, char ** argv) {
Grid_finalize(); Grid_finalize();
} }
#else
int main( void )
{
return 0 ;
}
#endif // HAVE_LIME_H

View File

@ -36,7 +36,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
#ifdef HAVE_LIME
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis> class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
@ -250,3 +251,11 @@ int main (int argc, char ** argv) {
Grid_finalize(); Grid_finalize();
} }
#else
int main( void )
{
return 0 ;
}
#endif

View File

@ -93,8 +93,16 @@ int main(int argc, char** argv) {
// Setup of Dirac Matrix and Operator // // Setup of Dirac Matrix and Operator //
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
LatticeGaugeField Umu(Grid_f); SU3::HotConfiguration(pRNG_f, Umu); LatticeGaugeField Umu(Grid_f);
#if (Nc==2)
SU2::HotConfiguration(pRNG_f, Umu);
#elif (defined Nc==3)
SU3::HotConfiguration(pRNG_f, Umu);
#elif (defined Nc==4)
SU4::HotConfiguration(pRNG_f, Umu);
#elif (defined Nc==5)
SU5::HotConfiguration(pRNG_f, Umu);
#endif
RealD checkTolerance = (getPrecision<LatticeFermion>::value == 1) ? 1e-7 : 1e-15; RealD checkTolerance = (getPrecision<LatticeFermion>::value == 1) ? 1e-7 : 1e-15;
RealD mass = -0.30; RealD mass = -0.30;

View File

@ -34,6 +34,7 @@ using namespace Grid;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef HAVE_LIME
typedef typename DomainWallFermionR::FermionField FermionField; typedef typename DomainWallFermionR::FermionField FermionField;
typedef typename DomainWallFermionR::ComplexField ComplexField; typedef typename DomainWallFermionR::ComplexField ComplexField;
typename DomainWallFermionR::ImplParams params; typename DomainWallFermionR::ImplParams params;
@ -237,4 +238,5 @@ int main (int argc, char ** argv)
} }
Grid_finalize(); Grid_finalize();
#endif // HAVE_LIME
} }

View File

@ -71,7 +71,12 @@ int main (int argc, char ** argv)
RealD mass = -0.1; RealD mass = -0.1;
RealD csw_r = 1.0; RealD csw_r = 1.0;
RealD csw_t = 1.0; RealD csw_t = 1.0;
RealD cF = 1.0;
WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t); WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t);
CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0);
WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t);
CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0);
// HermitianOperator<WilsonFermion,LatticeFermion> HermOp(Dw); // HermitianOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
// ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); // ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
@ -80,12 +85,28 @@ int main (int argc, char ** argv)
LatticeFermion src_o(&RBGrid); LatticeFermion src_o(&RBGrid);
LatticeFermion result_o(&RBGrid); LatticeFermion result_o(&RBGrid);
pickCheckerboard(Odd,src_o,src); pickCheckerboard(Odd,src_o,src);
result_o=Zero();
SchurDiagMooeeOperator<WilsonCloverFermionR,LatticeFermion> HermOpEO(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl;
SchurDiagMooeeOperator<WilsonCloverFermionR,LatticeFermion> HermOpEO(Dw);
result_o=Zero();
CG(HermOpEO,src_o,result_o); CG(HermOpEO,src_o,result_o);
std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl;
SchurDiagMooeeOperator<CompactWilsonCloverFermionR,LatticeFermion> HermOpEO_compact(Dw_compact);
result_o=Zero();
CG(HermOpEO_compact,src_o,result_o);
std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl;
SchurDiagMooeeOperator<WilsonExpCloverFermionR,LatticeFermion> HermOpEO_exp(Dwe);
result_o=Zero();
CG(HermOpEO_exp,src_o,result_o);
std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl;
SchurDiagMooeeOperator<CompactWilsonExpCloverFermionR,LatticeFermion> HermOpEO_exp_compact(Dwe_compact);
result_o=Zero();
CG(HermOpEO_exp_compact,src_o,result_o);
Grid_finalize(); Grid_finalize();
} }

View File

@ -60,18 +60,36 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src); LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=Zero(); LatticeFermion result(&Grid);
LatticeFermion resid(&Grid); LatticeFermion resid(&Grid);
RealD mass = -0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG); SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
RealD mass = -0.1;
RealD csw_r = 1.0;
RealD csw_t = 1.0;
RealD cF = 1.0;
std::cout << GridLogMessage << "Testing Wilson Clover" << std::endl;
WilsonCloverFermionR Dw(Umu, Grid, RBGrid, mass, csw_r, csw_t);
result=Zero();
SchurSolver(Dw,src,result); SchurSolver(Dw,src,result);
std::cout << GridLogMessage << "Testing Compact Wilson Clover" << std::endl;
CompactWilsonCloverFermionR Dw_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0);
result=Zero();
SchurSolver(Dw_compact,src,result);
std::cout << GridLogMessage << "Testing Wilson Exp Clover" << std::endl;
WilsonExpCloverFermionR Dwe(Umu, Grid, RBGrid, mass, csw_r, csw_t);
result=Zero();
SchurSolver(Dwe,src,result);
std::cout << GridLogMessage << "Testing Compact Wilson Exp Clover" << std::endl;
CompactWilsonExpCloverFermionR Dwe_compact(Umu, Grid, RBGrid, mass, csw_r, csw_t, 0.0);
result=Zero();
SchurSolver(Dwe_compact,src,result);
Grid_finalize(); Grid_finalize();
} }

Some files were not shown because too many files have changed in this diff Show More