mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 22:07:05 +01:00
Compare commits
23 Commits
3c67d626ba
...
feature/ad
Author | SHA1 | Date | |
---|---|---|---|
b284d50863 | |||
92def28bd3 | |||
ca10bfa1c7 | |||
0e27e3847d | |||
8cfc7342cd | |||
15ae317858 | |||
834f536b5f | |||
c332d9f08b | |||
cf2923d5dd | |||
0e4413ddde | |||
009ccd581e | |||
8cd4263974 | |||
d45c868656 | |||
955a8113de | |||
dbe210dd53 | |||
86e11743ca | |||
980e721f6e | |||
e2a0142d87 | |||
895244ecc3 | |||
addeb621a7 | |||
a7fb25adf6 | |||
e947992957 | |||
bb89a82a07 |
56
.travis.yml
56
.travis.yml
@ -1,56 +0,0 @@
|
|||||||
language: cpp
|
|
||||||
|
|
||||||
cache:
|
|
||||||
directories:
|
|
||||||
- clang
|
|
||||||
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- os: osx
|
|
||||||
osx_image: xcode8.3
|
|
||||||
compiler: clang
|
|
||||||
|
|
||||||
before_install:
|
|
||||||
- export GRIDDIR=`pwd`
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
|
||||||
|
|
||||||
install:
|
|
||||||
- export CWD=`pwd`
|
|
||||||
- echo $CWD
|
|
||||||
- export CC=$CC$VERSION
|
|
||||||
- export CXX=$CXX$VERSION
|
|
||||||
- echo $PATH
|
|
||||||
- which autoconf
|
|
||||||
- autoconf --version
|
|
||||||
- which automake
|
|
||||||
- automake --version
|
|
||||||
- which $CC
|
|
||||||
- $CC --version
|
|
||||||
- which $CXX
|
|
||||||
- $CXX --version
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
|
||||||
|
|
||||||
script:
|
|
||||||
- ./bootstrap.sh
|
|
||||||
- mkdir build
|
|
||||||
- cd build
|
|
||||||
- mkdir lime
|
|
||||||
- cd lime
|
|
||||||
- mkdir build
|
|
||||||
- cd build
|
|
||||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
|
||||||
- tar xf lime-1.3.2.tar.gz
|
|
||||||
- cd lime-1.3.2
|
|
||||||
- ./configure --prefix=$CWD/build/lime/install
|
|
||||||
- make -j4
|
|
||||||
- make install
|
|
||||||
- cd $CWD/build
|
|
||||||
- ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
|
||||||
- make -j4
|
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
|
||||||
- make check
|
|
@ -122,8 +122,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||||
cshiftVector<vobj> send_buf(buffer_size);
|
static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size);
|
||||||
cshiftVector<vobj> recv_buf(buffer_size);
|
static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||||
@ -198,8 +198,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||||
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||||
scalar_object * recv_buf_extract_mpi;
|
scalar_object * recv_buf_extract_mpi;
|
||||||
scalar_object * send_buf_extract_mpi;
|
scalar_object * send_buf_extract_mpi;
|
||||||
|
|
||||||
@ -294,8 +294,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||||
cshiftVector<vobj> send_buf_v(buffer_size);
|
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
||||||
cshiftVector<vobj> recv_buf_v(buffer_size);
|
static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
|
||||||
vobj *send_buf;
|
vobj *send_buf;
|
||||||
vobj *recv_buf;
|
vobj *recv_buf;
|
||||||
{
|
{
|
||||||
@ -381,8 +381,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||||
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd);
|
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||||
scalar_object * recv_buf_extract_mpi;
|
scalar_object * recv_buf_extract_mpi;
|
||||||
scalar_object * send_buf_extract_mpi;
|
scalar_object * send_buf_extract_mpi;
|
||||||
{
|
{
|
||||||
|
@ -128,7 +128,7 @@ inline void MachineCharacteristics(FieldMetaData &header)
|
|||||||
std::time_t t = std::time(nullptr);
|
std::time_t t = std::time(nullptr);
|
||||||
std::tm tm_ = *std::localtime(&t);
|
std::tm tm_ = *std::localtime(&t);
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
// oss << std::put_time(&tm_, "%c %Z");
|
oss << std::put_time(&tm_, "%c %Z");
|
||||||
header.creation_date = oss.str();
|
header.creation_date = oss.str();
|
||||||
header.archive_date = header.creation_date;
|
header.archive_date = header.creation_date;
|
||||||
|
|
||||||
|
@ -205,11 +205,20 @@ public:
|
|||||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preferred interface
|
||||||
|
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||||
|
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
||||||
|
std::string file,
|
||||||
|
std::string ens_label = std::string("DWF"))
|
||||||
|
{
|
||||||
|
writeConfiguration(Umu,file,0,1,ens_label);
|
||||||
|
}
|
||||||
template<class GaugeStats=PeriodicGaugeStatistics>
|
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||||
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
int two_row,
|
int two_row,
|
||||||
int bits32)
|
int bits32,
|
||||||
|
std::string ens_label = std::string("DWF"))
|
||||||
{
|
{
|
||||||
typedef vLorentzColourMatrixD vobj;
|
typedef vLorentzColourMatrixD vobj;
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
@ -219,8 +228,8 @@ public:
|
|||||||
// Following should become arguments
|
// Following should become arguments
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
header.sequence_number = 1;
|
header.sequence_number = 1;
|
||||||
header.ensemble_id = "UKQCD";
|
header.ensemble_id = std::string("UKQCD");
|
||||||
header.ensemble_label = "DWF";
|
header.ensemble_label = ens_label;
|
||||||
|
|
||||||
typedef LorentzColourMatrixD fobj3D;
|
typedef LorentzColourMatrixD fobj3D;
|
||||||
typedef LorentzColour2x3D fobj2D;
|
typedef LorentzColour2x3D fobj2D;
|
||||||
@ -232,7 +241,7 @@ public:
|
|||||||
GaugeStats Stats; Stats(Umu,header);
|
GaugeStats Stats; Stats(Umu,header);
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
|
|
||||||
// Sod it -- always write 3x3 double
|
// Sod it -- always write 3x3 double
|
||||||
header.floating_point = std::string("IEEE64BIG");
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
|
@ -291,12 +291,6 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
|
|||||||
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
|
||||||
|
|
||||||
#ifndef GRID_CUDA
|
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
|
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
|
|
||||||
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
////////////////////
|
////////////////////
|
||||||
|
@ -183,7 +183,8 @@ NAMESPACE_CHECK(ImplStaggered);
|
|||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
// Single flavour one component spinors with colour index. 5d vec
|
// Single flavour one component spinors with colour index. 5d vec
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>
|
// Deprecate Vec5d
|
||||||
NAMESPACE_CHECK(ImplStaggered5dVec);
|
//#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>
|
||||||
|
//NAMESPACE_CHECK(ImplStaggered5dVec);
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,19 +72,23 @@ public:
|
|||||||
|
|
||||||
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
static accelerator_inline void multLink(SiteSpinor &phi,
|
template<class _Spinor>
|
||||||
|
static accelerator_inline void multLink(_Spinor &phi,
|
||||||
const SiteDoubledGaugeField &U,
|
const SiteDoubledGaugeField &U,
|
||||||
const SiteSpinor &chi,
|
const _Spinor &chi,
|
||||||
int mu)
|
int mu)
|
||||||
{
|
{
|
||||||
mult(&phi(), &U(mu), &chi());
|
auto UU = coalescedRead(U(mu));
|
||||||
|
mult(&phi(), &UU, &chi());
|
||||||
}
|
}
|
||||||
static accelerator_inline void multLinkAdd(SiteSpinor &phi,
|
template<class _Spinor>
|
||||||
|
static accelerator_inline void multLinkAdd(_Spinor &phi,
|
||||||
const SiteDoubledGaugeField &U,
|
const SiteDoubledGaugeField &U,
|
||||||
const SiteSpinor &chi,
|
const _Spinor &chi,
|
||||||
int mu)
|
int mu)
|
||||||
{
|
{
|
||||||
mac(&phi(), &U(mu), &chi());
|
auto UU = coalescedRead(U(mu));
|
||||||
|
mac(&phi(), &UU, &chi());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ref>
|
template <class ref>
|
||||||
|
@ -184,18 +184,22 @@ public:
|
|||||||
mat = TraceIndex<SpinIndex>(P);
|
mat = TraceIndex<SpinIndex>(P);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
|
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
|
||||||
|
{
|
||||||
for (int mu = 0; mu < Nd; mu++)
|
for (int mu = 0; mu < Nd; mu++)
|
||||||
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
|
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu)
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
{
|
||||||
|
#undef USE_OLD_INSERT_FORCE
|
||||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||||
|
autoView( mat_v , mat, AcceleratorWrite);
|
||||||
|
#ifdef USE_OLD_INSERT_FORCE
|
||||||
GaugeLinkField tmp(mat.Grid());
|
GaugeLinkField tmp(mat.Grid());
|
||||||
tmp = Zero();
|
tmp = Zero();
|
||||||
{
|
{
|
||||||
|
const int Nsimd = SiteSpinor::Nsimd();
|
||||||
autoView( tmp_v , tmp, AcceleratorWrite);
|
autoView( tmp_v , tmp, AcceleratorWrite);
|
||||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
@ -208,6 +212,29 @@ public:
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
const int Nsimd = SiteSpinor::Nsimd();
|
||||||
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
|
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
|
||||||
|
int sU=sss;
|
||||||
|
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||||
|
ColorMatrixType sum;
|
||||||
|
zeroit(sum);
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int sF = s+Ls*sU;
|
||||||
|
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||||
|
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
|
||||||
|
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
|
||||||
|
auto op = outerProduct(bb,aa);
|
||||||
|
sum = sum + op;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
coalescedWrite(mat_v[sU](mu)(), sum);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -880,11 +880,23 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<RealD> G_s(Ls,1.0);
|
std::vector<RealD> G_s(Ls,1.0);
|
||||||
|
Integer sign = 1; // sign flip for vector/tadpole
|
||||||
if ( curr_type == Current::Axial ) {
|
if ( curr_type == Current::Axial ) {
|
||||||
for(int s=0;s<Ls/2;s++){
|
for(int s=0;s<Ls/2;s++){
|
||||||
G_s[s] = -1.0;
|
G_s[s] = -1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if ( curr_type == Current::Tadpole ) {
|
||||||
|
auto b=this->_b;
|
||||||
|
auto c=this->_c;
|
||||||
|
if ( b == 1 && c == 0 ) {
|
||||||
|
sign = -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
|
||||||
|
assert(b==1 && c==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
|
|
||||||
@ -907,7 +919,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
|
|
||||||
tmp = Cshift(tmp,mu,1);
|
tmp = Cshift(tmp,mu,1);
|
||||||
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
|
||||||
tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
tmp = sign*G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
|
||||||
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
|
||||||
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
|
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
|
||||||
|
|
||||||
|
@ -680,7 +680,8 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st,
|
|||||||
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
gauge2 =(uint64_t)&UU[sU]( Z ); \
|
||||||
gauge3 =(uint64_t)&UU[sU]( T );
|
gauge3 =(uint64_t)&UU[sU]( T );
|
||||||
|
|
||||||
|
#undef STAG_VEC5D
|
||||||
|
#ifdef STAG_VEC5D
|
||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
#include <Grid/simd/Intel512single.h>
|
#include <Grid/simd/Intel512single.h>
|
||||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
|
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
|
||||||
@ -790,7 +791,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define PERMUTE_DIR3 __asm__ ( \
|
#define PERMUTE_DIR3 __asm__ ( \
|
||||||
|
@ -32,25 +32,50 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
#define LOAD_CHI(b) \
|
#ifdef GRID_SIMT
|
||||||
|
|
||||||
|
#define LOAD_CHI(ptype,b) \
|
||||||
|
const SiteSpinor & ref (b[offset]); \
|
||||||
|
Chi_0=coalescedReadPermute<ptype>(ref()()(0),perm,lane); \
|
||||||
|
Chi_1=coalescedReadPermute<ptype>(ref()()(1),perm,lane); \
|
||||||
|
Chi_2=coalescedReadPermute<ptype>(ref()()(2),perm,lane);
|
||||||
|
|
||||||
|
#define LOAD_CHI_COMMS(b) \
|
||||||
const SiteSpinor & ref (b[offset]); \
|
const SiteSpinor & ref (b[offset]); \
|
||||||
Chi_0=ref()()(0);\
|
Chi_0=coalescedRead(ref()()(0),lane); \
|
||||||
Chi_1=ref()()(1);\
|
Chi_1=coalescedRead(ref()()(1),lane); \
|
||||||
Chi_2=ref()()(2);
|
Chi_2=coalescedRead(ref()()(2),lane);
|
||||||
|
|
||||||
|
#define PERMUTE_DIR(dir) ;
|
||||||
|
#else
|
||||||
|
#define LOAD_CHI(ptype,b) LOAD_CHI_COMMS(b)
|
||||||
|
|
||||||
|
#define LOAD_CHI_COMMS(b) \
|
||||||
|
const SiteSpinor & ref (b[offset]); \
|
||||||
|
Chi_0=ref()()(0); \
|
||||||
|
Chi_1=ref()()(1); \
|
||||||
|
Chi_2=ref()()(2);
|
||||||
|
|
||||||
|
#define PERMUTE_DIR(dir) \
|
||||||
|
permute##dir(Chi_0,Chi_0); \
|
||||||
|
permute##dir(Chi_1,Chi_1); \
|
||||||
|
permute##dir(Chi_2,Chi_2);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// To splat or not to splat depends on the implementation
|
// To splat or not to splat depends on the implementation
|
||||||
#define MULT(A,UChi) \
|
#define MULT(A,UChi) \
|
||||||
auto & ref(U[sU](A)); \
|
auto & ref(U[sU](A)); \
|
||||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
U_00=coalescedRead(ref()(0,0),lane); \
|
||||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
U_10=coalescedRead(ref()(1,0),lane); \
|
||||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
U_20=coalescedRead(ref()(2,0),lane); \
|
||||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
U_01=coalescedRead(ref()(0,1),lane); \
|
||||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
U_11=coalescedRead(ref()(1,1),lane); \
|
||||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
U_21=coalescedRead(ref()(2,1),lane); \
|
||||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
U_02=coalescedRead(ref()(0,2),lane); \
|
||||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
U_12=coalescedRead(ref()(1,2),lane); \
|
||||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
U_22=coalescedRead(ref()(2,2),lane); \
|
||||||
UChi ## _0 = U_00*Chi_0; \
|
UChi ## _0 = U_00*Chi_0; \
|
||||||
UChi ## _1 = U_10*Chi_0;\
|
UChi ## _1 = U_10*Chi_0;\
|
||||||
UChi ## _2 = U_20*Chi_0;\
|
UChi ## _2 = U_20*Chi_0;\
|
||||||
@ -63,15 +88,15 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
#define MULT_ADD(U,A,UChi) \
|
#define MULT_ADD(U,A,UChi) \
|
||||||
auto & ref(U[sU](A)); \
|
auto & ref(U[sU](A)); \
|
||||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
U_00=coalescedRead(ref()(0,0),lane); \
|
||||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
U_10=coalescedRead(ref()(1,0),lane); \
|
||||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
U_20=coalescedRead(ref()(2,0),lane); \
|
||||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
U_01=coalescedRead(ref()(0,1),lane); \
|
||||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
U_11=coalescedRead(ref()(1,1),lane); \
|
||||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
U_21=coalescedRead(ref()(2,1),lane); \
|
||||||
Impl::loadLinkElement(U_02,ref()(0,2)); \
|
U_02=coalescedRead(ref()(0,2),lane); \
|
||||||
Impl::loadLinkElement(U_12,ref()(1,2)); \
|
U_12=coalescedRead(ref()(1,2),lane); \
|
||||||
Impl::loadLinkElement(U_22,ref()(2,2)); \
|
U_22=coalescedRead(ref()(2,2),lane); \
|
||||||
UChi ## _0 += U_00*Chi_0; \
|
UChi ## _0 += U_00*Chi_0; \
|
||||||
UChi ## _1 += U_10*Chi_0;\
|
UChi ## _1 += U_10*Chi_0;\
|
||||||
UChi ## _2 += U_20*Chi_0;\
|
UChi ## _2 += U_20*Chi_0;\
|
||||||
@ -83,24 +108,18 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
UChi ## _2 += U_22*Chi_2;
|
UChi ## _2 += U_22*Chi_2;
|
||||||
|
|
||||||
|
|
||||||
#define PERMUTE_DIR(dir) \
|
|
||||||
permute##dir(Chi_0,Chi_0); \
|
|
||||||
permute##dir(Chi_1,Chi_1); \
|
|
||||||
permute##dir(Chi_2,Chi_2);
|
|
||||||
|
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||||
offset = SE->_offset; \
|
offset = SE->_offset; \
|
||||||
local = SE->_is_local; \
|
local = SE->_is_local; \
|
||||||
perm = SE->_permute; \
|
perm = SE->_permute; \
|
||||||
if ( local ) { \
|
if ( local ) { \
|
||||||
LOAD_CHI(in); \
|
LOAD_CHI(Perm,in); \
|
||||||
if ( perm) { \
|
if ( perm) { \
|
||||||
PERMUTE_DIR(Perm); \
|
PERMUTE_DIR(Perm); \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
LOAD_CHI(buf); \
|
LOAD_CHI_COMMS(buf); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
|
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
|
||||||
@ -116,19 +135,18 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
|
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
|
||||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||||
offset = SE->_offset; \
|
offset = SE->_offset; \
|
||||||
local = SE->_is_local; \
|
local = SE->_is_local; \
|
||||||
perm = SE->_permute; \
|
perm = SE->_permute; \
|
||||||
if ( local ) { \
|
if ( local ) { \
|
||||||
LOAD_CHI(in); \
|
LOAD_CHI(Perm,in); \
|
||||||
if ( perm) { \
|
if ( perm) { \
|
||||||
PERMUTE_DIR(Perm); \
|
PERMUTE_DIR(Perm); \
|
||||||
} \
|
} \
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
LOAD_CHI(buf); \
|
LOAD_CHI_COMMS(buf); \
|
||||||
} \
|
} \
|
||||||
if (local || st.same_node[Dir] ) { \
|
if (local || st.same_node[Dir] ) { \
|
||||||
MULT_ADD(U,Dir,even); \
|
MULT_ADD(U,Dir,even); \
|
||||||
@ -140,10 +158,32 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
local = SE->_is_local; \
|
local = SE->_is_local; \
|
||||||
if ((!local) && (!st.same_node[Dir]) ) { \
|
if ((!local) && (!st.same_node[Dir]) ) { \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
{ LOAD_CHI(buf); } \
|
{ LOAD_CHI_COMMS(buf); } \
|
||||||
{ MULT_ADD(U,Dir,even); } \
|
{ MULT_ADD(U,Dir,even); } \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define HAND_DECLARATIONS(Simd) \
|
||||||
|
Simd even_0; \
|
||||||
|
Simd even_1; \
|
||||||
|
Simd even_2; \
|
||||||
|
Simd odd_0; \
|
||||||
|
Simd odd_1; \
|
||||||
|
Simd odd_2; \
|
||||||
|
\
|
||||||
|
Simd Chi_0; \
|
||||||
|
Simd Chi_1; \
|
||||||
|
Simd Chi_2; \
|
||||||
|
\
|
||||||
|
Simd U_00; \
|
||||||
|
Simd U_10; \
|
||||||
|
Simd U_20; \
|
||||||
|
Simd U_01; \
|
||||||
|
Simd U_11; \
|
||||||
|
Simd U_21; \
|
||||||
|
Simd U_02; \
|
||||||
|
Simd U_12; \
|
||||||
|
Simd U_22;
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
template <int Naik> accelerator_inline
|
template <int Naik> accelerator_inline
|
||||||
@ -155,28 +195,14 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
|||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
Simd even_0; // 12 regs on knc
|
|
||||||
Simd even_1;
|
|
||||||
Simd even_2;
|
|
||||||
Simd odd_0; // 12 regs on knc
|
|
||||||
Simd odd_1;
|
|
||||||
Simd odd_2;
|
|
||||||
|
|
||||||
Simd Chi_0; // two spinor; 6 regs
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
Simd Chi_1;
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
Simd Chi_2;
|
typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
|
||||||
|
HAND_DECLARATIONS(Simt);
|
||||||
Simd U_00; // two rows of U matrix
|
|
||||||
Simd U_10;
|
|
||||||
Simd U_20;
|
|
||||||
Simd U_01;
|
|
||||||
Simd U_11;
|
|
||||||
Simd U_21; // 2 reg left.
|
|
||||||
Simd U_02;
|
|
||||||
Simd U_12;
|
|
||||||
Simd U_22;
|
|
||||||
|
|
||||||
SiteSpinor result;
|
typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
|
||||||
|
calcSiteSpinor result;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
@ -215,7 +241,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
|
|||||||
result()()(1) = even_1 + odd_1;
|
result()()(1) = even_1 + odd_1;
|
||||||
result()()(2) = even_2 + odd_2;
|
result()()(2) = even_2 + odd_2;
|
||||||
}
|
}
|
||||||
vstream(out[sF],result);
|
coalescedWrite(out[sF],result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,28 +256,13 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
|||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
Simd even_0; // 12 regs on knc
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
Simd even_1;
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
Simd even_2;
|
typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
|
||||||
Simd odd_0; // 12 regs on knc
|
HAND_DECLARATIONS(Simt);
|
||||||
Simd odd_1;
|
|
||||||
Simd odd_2;
|
|
||||||
|
|
||||||
Simd Chi_0; // two spinor; 6 regs
|
typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
|
||||||
Simd Chi_1;
|
calcSiteSpinor result;
|
||||||
Simd Chi_2;
|
|
||||||
|
|
||||||
Simd U_00; // two rows of U matrix
|
|
||||||
Simd U_10;
|
|
||||||
Simd U_20;
|
|
||||||
Simd U_01;
|
|
||||||
Simd U_11;
|
|
||||||
Simd U_21; // 2 reg left.
|
|
||||||
Simd U_02;
|
|
||||||
Simd U_12;
|
|
||||||
Simd U_22;
|
|
||||||
|
|
||||||
SiteSpinor result;
|
|
||||||
int offset, ptype, local, perm;
|
int offset, ptype, local, perm;
|
||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
@ -261,8 +272,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
|||||||
// int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
{
|
{
|
||||||
|
|
||||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
zeroit(even_0); zeroit(even_1); zeroit(even_2);
|
||||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
|
||||||
|
|
||||||
skew = 0;
|
skew = 0;
|
||||||
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
|
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
|
||||||
@ -294,7 +305,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
|
|||||||
result()()(1) = even_1 + odd_1;
|
result()()(1) = even_1 + odd_1;
|
||||||
result()()(2) = even_2 + odd_2;
|
result()()(2) = even_2 + odd_2;
|
||||||
}
|
}
|
||||||
vstream(out[sF],result);
|
coalescedWrite(out[sF],result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -309,28 +320,13 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
|||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
Simd even_0; // 12 regs on knc
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
Simd even_1;
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
Simd even_2;
|
typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
|
||||||
Simd odd_0; // 12 regs on knc
|
HAND_DECLARATIONS(Simt);
|
||||||
Simd odd_1;
|
|
||||||
Simd odd_2;
|
|
||||||
|
|
||||||
Simd Chi_0; // two spinor; 6 regs
|
typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
|
||||||
Simd Chi_1;
|
calcSiteSpinor result;
|
||||||
Simd Chi_2;
|
|
||||||
|
|
||||||
Simd U_00; // two rows of U matrix
|
|
||||||
Simd U_10;
|
|
||||||
Simd U_20;
|
|
||||||
Simd U_01;
|
|
||||||
Simd U_11;
|
|
||||||
Simd U_21; // 2 reg left.
|
|
||||||
Simd U_02;
|
|
||||||
Simd U_12;
|
|
||||||
Simd U_22;
|
|
||||||
|
|
||||||
SiteSpinor result;
|
|
||||||
int offset, ptype, local;
|
int offset, ptype, local;
|
||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
@ -340,8 +336,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
|||||||
// int sF=s+LLs*sU;
|
// int sF=s+LLs*sU;
|
||||||
{
|
{
|
||||||
|
|
||||||
even_0 = Zero(); even_1 = Zero(); even_2 = Zero();
|
zeroit(even_0); zeroit(even_1); zeroit(even_2);
|
||||||
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero();
|
zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
skew = 0;
|
skew = 0;
|
||||||
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
|
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
|
||||||
@ -374,7 +370,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
|||||||
result()()(1) = even_1 + odd_1;
|
result()()(1) = even_1 + odd_1;
|
||||||
result()()(2) = even_2 + odd_2;
|
result()()(2) = even_2 + odd_2;
|
||||||
}
|
}
|
||||||
out[sF] = out[sF] + result;
|
coalescedWrite(out[sF] , out(sF)+ result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -397,6 +393,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
|
|||||||
const FermionFieldView &in, FermionFieldView &out, int dag); \
|
const FermionFieldView &in, FermionFieldView &out, int dag); \
|
||||||
*/
|
*/
|
||||||
#undef LOAD_CHI
|
#undef LOAD_CHI
|
||||||
|
#undef HAND_DECLARATIONS
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
int perm= SE->_permute; \
|
||||||
chi_p = χ \
|
chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
|
||||||
} else { \
|
|
||||||
chi_p = &in[SE->_offset]; \
|
|
||||||
} \
|
|
||||||
} else { \
|
} else { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir);
|
acceleratorSynchronise(); \
|
||||||
|
multLink(Uchi, U[sU], chi, Dir);
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
if (SE->_permute) { \
|
int perm= SE->_permute; \
|
||||||
chi_p = χ \
|
chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
|
||||||
permute(chi, in[SE->_offset], ptype); \
|
|
||||||
} else { \
|
|
||||||
chi_p = &in[SE->_offset]; \
|
|
||||||
} \
|
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], chi, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
||||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
chi_p = &buf[SE->_offset]; \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
multLink(Uchi, U[sU], *chi_p, Dir); \
|
multLink(Uchi, U[sU], chi, Dir); \
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
|||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out, int dag)
|
const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||||
{
|
{
|
||||||
const SiteSpinor *chi_p;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor chi;
|
calcSpinor chi;
|
||||||
SiteSpinor Uchi;
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int skew;
|
int skew;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
//
|
//
|
||||||
@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
|
|||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
vstream(out[sF], Uchi);
|
coalescedWrite(out[sF], Uchi,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -130,13 +125,16 @@ template <int Naik> accelerator_inline
|
|||||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
const SiteSpinor *chi_p;
|
{
|
||||||
SiteSpinor chi;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor Uchi;
|
calcSpinor chi;
|
||||||
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
// int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
|
|||||||
if ( dag ) {
|
if ( dag ) {
|
||||||
Uchi = - Uchi;
|
Uchi = - Uchi;
|
||||||
}
|
}
|
||||||
vstream(out[sF], Uchi);
|
coalescedWrite(out[sF], Uchi,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -178,14 +176,17 @@ template <int Naik> accelerator_inline
|
|||||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
||||||
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int sF, int sU,
|
SiteSpinor *buf, int sF, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag)
|
||||||
const SiteSpinor *chi_p;
|
{
|
||||||
// SiteSpinor chi;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
SiteSpinor Uchi;
|
calcSpinor chi;
|
||||||
|
calcSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
int skew ;
|
int skew ;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
|
|
||||||
// for(int s=0;s<LLs;s++){
|
// for(int s=0;s<LLs;s++){
|
||||||
// int sF=LLs*sU+s;
|
// int sF=LLs*sU+s;
|
||||||
@ -211,11 +212,12 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
|
|||||||
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||||
}
|
}
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
if ( dag ) {
|
auto _out = coalescedRead(out[sF],lane);
|
||||||
out[sF] = out[sF] - Uchi;
|
if ( dag ) {
|
||||||
|
coalescedWrite(out[sF], _out-Uchi,lane);
|
||||||
} else {
|
} else {
|
||||||
out[sF] = out[sF] + Uchi;
|
coalescedWrite(out[sF], _out+Uchi,lane);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GridBase *FGrid=in.Grid();
|
GridBase *FGrid=in.Grid();
|
||||||
GridBase *UGrid=U.Grid();
|
GridBase *UGrid=U.Grid();
|
||||||
typedef StaggeredKernels<Impl> ThisKernel;
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
autoView( UUU_v , UUU, AcceleratorRead);
|
autoView( UUU_v , UUU, AcceleratorRead);
|
||||||
autoView( U_v , U, AcceleratorRead);
|
autoView( U_v , U, AcceleratorRead);
|
||||||
autoView( in_v , in, AcceleratorRead);
|
autoView( in_v , in, AcceleratorRead);
|
||||||
@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
GridBase *FGrid=in.Grid();
|
GridBase *FGrid=in.Grid();
|
||||||
GridBase *UGrid=U.Grid();
|
GridBase *UGrid=U.Grid();
|
||||||
typedef StaggeredKernels<Impl> ThisKernel;
|
typedef StaggeredKernels<Impl> ThisKernel;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=acceleratorSIMTlane(Nsimd);
|
||||||
autoView( UUU_v , U, AcceleratorRead);
|
autoView( UUU_v , U, AcceleratorRead);
|
||||||
autoView( U_v , U, AcceleratorRead);
|
autoView( U_v , U, AcceleratorRead);
|
||||||
autoView( in_v , in, AcceleratorRead);
|
autoView( in_v , in, AcceleratorRead);
|
||||||
|
@ -85,21 +85,18 @@ public:
|
|||||||
|
|
||||||
std::cout << GridLogDebug << "Stout smearing started\n";
|
std::cout << GridLogDebug << "Stout smearing started\n";
|
||||||
|
|
||||||
// Smear the configurations
|
// C contains the staples multiplied by some rho
|
||||||
|
u_smr = U ; // set the smeared field to the current gauge field
|
||||||
SmearBase->smear(C, U);
|
SmearBase->smear(C, U);
|
||||||
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
if( mu == OrthogDim )
|
if( mu == OrthogDim ) continue ;
|
||||||
tmp = 1.0; // Don't smear in the orthogonal direction
|
// u_smr = exp(iQ_mu)*U_mu apart from Orthogdim
|
||||||
else {
|
Umu = peekLorentz(U, mu);
|
||||||
tmp = peekLorentz(C, mu);
|
tmp = peekLorentz(C, mu);
|
||||||
Umu = peekLorentz(U, mu);
|
iq_mu = Ta( tmp * adj(Umu));
|
||||||
iq_mu = Ta(
|
exponentiate_iQ(tmp, iq_mu);
|
||||||
tmp *
|
pokeLorentz(u_smr, tmp * Umu, mu);
|
||||||
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
|
|
||||||
exponentiate_iQ(tmp, iq_mu);
|
|
||||||
}
|
|
||||||
pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
|
|
||||||
}
|
}
|
||||||
std::cout << GridLogDebug << "Stout smearing completed\n";
|
std::cout << GridLogDebug << "Stout smearing completed\n";
|
||||||
};
|
};
|
||||||
|
@ -6,7 +6,8 @@ Source file: ./lib/qcd/modules/plaquette.h
|
|||||||
|
|
||||||
Copyright (C) 2017
|
Copyright (C) 2017
|
||||||
|
|
||||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -35,7 +36,7 @@ template <class Gimpl>
|
|||||||
class WilsonFlow: public Smear<Gimpl>{
|
class WilsonFlow: public Smear<Gimpl>{
|
||||||
unsigned int Nstep;
|
unsigned int Nstep;
|
||||||
unsigned int measure_interval;
|
unsigned int measure_interval;
|
||||||
mutable RealD epsilon, taus;
|
mutable RealD epsilon, taus,tolerance;
|
||||||
|
|
||||||
|
|
||||||
mutable WilsonGaugeAction<Gimpl> SG;
|
mutable WilsonGaugeAction<Gimpl> SG;
|
||||||
@ -47,13 +48,15 @@ class WilsonFlow: public Smear<Gimpl>{
|
|||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Gimpl)
|
INHERIT_GIMPL_TYPES(Gimpl)
|
||||||
|
|
||||||
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
|
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1, RealD tol = 1e-3):
|
||||||
Nstep(Nstep),
|
Nstep(Nstep),
|
||||||
epsilon(epsilon),
|
epsilon(epsilon),
|
||||||
|
tolerance(tol),
|
||||||
measure_interval(interval),
|
measure_interval(interval),
|
||||||
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
||||||
// WilsonGaugeAction with beta 3.0
|
// WilsonGaugeAction with beta 3.0
|
||||||
assert(epsilon > 0.0);
|
assert(epsilon > 0.0);
|
||||||
|
assert(tolerance > 0.0);
|
||||||
LogMessage();
|
LogMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,6 +67,8 @@ public:
|
|||||||
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
||||||
std::cout << GridLogMessage
|
std::cout << GridLogMessage
|
||||||
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "[WilsonFlow] tolerance : " << tolerance << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void smear(GaugeField&, const GaugeField&) const;
|
virtual void smear(GaugeField&, const GaugeField&) const;
|
||||||
@ -106,11 +111,14 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
|
|||||||
if (maxTau - taus < epsilon){
|
if (maxTau - taus < epsilon){
|
||||||
epsilon = maxTau-taus;
|
epsilon = maxTau-taus;
|
||||||
}
|
}
|
||||||
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
||||||
GaugeField Z(U.Grid());
|
GaugeField Z(U.Grid());
|
||||||
GaugeField Zprime(U.Grid());
|
GaugeField Zprime(U.Grid());
|
||||||
GaugeField tmp(U.Grid()), Uprime(U.Grid());
|
GaugeField tmp(U.Grid()), Uprime(U.Grid()),Usave(U.Grid());
|
||||||
|
|
||||||
Uprime = U;
|
Uprime = U;
|
||||||
|
Usave = U;
|
||||||
|
|
||||||
SG.deriv(U, Z);
|
SG.deriv(U, Z);
|
||||||
Zprime = -Z;
|
Zprime = -Z;
|
||||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||||
@ -128,18 +136,33 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
|
|||||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||||
|
|
||||||
// Ramos
|
// Ramos arXiv:1301.4388
|
||||||
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
|
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
|
||||||
// Compute distance as norm^2 of the difference
|
// Compute distance as norm^2 of the difference
|
||||||
GaugeField diffU = U - Uprime;
|
GaugeField diffU = U - Uprime;
|
||||||
RealD diff = norm2(diffU);
|
// Wrong
|
||||||
// adjust integration step
|
// RealD diff = norm2(diffU);
|
||||||
|
// std::cout << GridLogMessage << "norm2: " << diff << std::endl;
|
||||||
|
|
||||||
|
// RealD tol=1e-3;
|
||||||
|
|
||||||
|
RealD diff = real(rankInnerMax(diffU,diffU));
|
||||||
|
diff = sqrt(diff)/18.; // distance defined in Ramos
|
||||||
|
|
||||||
|
GridBase *grid = diffU.Grid();
|
||||||
|
std::cout << GridLogMessage << "max: " << diff << std::endl;
|
||||||
|
grid->GlobalMax(diff);
|
||||||
|
std::cout << GridLogMessage << "max: " << diff << std::endl;
|
||||||
|
|
||||||
|
if(diff < tolerance) {
|
||||||
taus += epsilon;
|
taus += epsilon;
|
||||||
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
|
// std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
|
||||||
|
} else {
|
||||||
|
U = Usave;
|
||||||
|
}
|
||||||
|
|
||||||
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
|
epsilon = epsilon*0.95*std::pow(tolerance/diff,1./3.);
|
||||||
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
|
std::cout << GridLogMessage << "Distance : "<<diff<<"New epsilon : " << epsilon << std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,8 +207,11 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
|
|||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
|
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
|
||||||
out = in;
|
out = in;
|
||||||
taus = epsilon;
|
// taus = epsilon;
|
||||||
|
taus = 0.;
|
||||||
unsigned int step = 0;
|
unsigned int step = 0;
|
||||||
|
double measTau = epsilon*measure_interval;
|
||||||
|
std::cout << GridLogMessage << "measTau :"<< measTau << std::endl;
|
||||||
do{
|
do{
|
||||||
step++;
|
step++;
|
||||||
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
||||||
@ -193,10 +219,12 @@ void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, Re
|
|||||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
|
||||||
<< step << " " << taus << " "
|
<< step << " " << taus << " "
|
||||||
<< energyDensityPlaquette(out) << std::endl;
|
<< energyDensityPlaquette(out) << std::endl;
|
||||||
if( step % measure_interval == 0){
|
// if( step % measure_interval == 0){
|
||||||
|
if( taus > measTau ) {
|
||||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
|
||||||
<< step << " "
|
<< step << " "
|
||||||
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
|
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
|
||||||
|
measTau += epsilon*measure_interval;
|
||||||
}
|
}
|
||||||
} while (taus < maxTau);
|
} while (taus < maxTau);
|
||||||
|
|
||||||
|
@ -65,7 +65,8 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
||||||
#ifndef GRID_SYCL
|
//#ifndef GRID_SYCL
|
||||||
|
#if 1
|
||||||
// Use the scalar as our own complex on GPU ... thrust::complex or std::complex
|
// Use the scalar as our own complex on GPU ... thrust::complex or std::complex
|
||||||
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
template<class vsimd,IfSimd<vsimd> = 0> accelerator_inline
|
||||||
typename vsimd::scalar_type
|
typename vsimd::scalar_type
|
||||||
|
@ -34,6 +34,16 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// outerProduct Scalar x Scalar -> Scalar
|
// outerProduct Scalar x Scalar -> Scalar
|
||||||
// Vector x Vector -> Matrix
|
// Vector x Vector -> Matrix
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<class CC,IfComplex<CC> = 0>
|
||||||
|
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
||||||
|
{
|
||||||
|
return l*conj(r);
|
||||||
|
}
|
||||||
|
template<class RR,IfReal<RR> = 0>
|
||||||
|
accelerator_inline RR outerProduct(const RR &l, const RR& r)
|
||||||
|
{
|
||||||
|
return l*r;
|
||||||
|
}
|
||||||
|
|
||||||
template<class l,class r,int N> accelerator_inline
|
template<class l,class r,int N> accelerator_inline
|
||||||
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
|
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
|
||||||
@ -57,17 +67,6 @@ auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<declt
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class CC,IfComplex<CC> = 0>
|
|
||||||
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
|
||||||
{
|
|
||||||
return l*conj(r);
|
|
||||||
}
|
|
||||||
template<class RR,IfReal<RR> = 0>
|
|
||||||
accelerator_inline RR outerProduct(const RR &l, const RR& r)
|
|
||||||
{
|
|
||||||
return l*r;
|
|
||||||
}
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -457,7 +457,7 @@ accelerator_inline void acceleratorSynchronise(void)
|
|||||||
__syncwarp();
|
__syncwarp();
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
cl::sycl::detail::workGroupBarrier();
|
//cl::sycl::detail::workGroupBarrier();
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) [](https://travis-ci.org/paboyle/Grid)
|
# Grid [),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview)
|
||||||
|
|
||||||
**Data parallel C++ mathematical object library.**
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
@ -66,7 +66,9 @@ int main(int argc, char** argv)
|
|||||||
// Set up RNGs
|
// Set up RNGs
|
||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
std::vector<int> seeds5({5, 6, 7, 8});
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
GridParallelRNG RNG5(FGrid);
|
GridParallelRNG RNG5(FGrid);
|
||||||
|
sRNG.SeedFixedIntegers(seeds5);
|
||||||
RNG5.SeedFixedIntegers(seeds5);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid);
|
GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
@ -84,7 +86,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
|
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu,sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +96,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
|
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu,sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,6 +74,9 @@ int main(int argc, char** argv)
|
|||||||
RNG5.SeedFixedIntegers(seeds5);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid);
|
GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
sRNG.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
// Random gauge field
|
// Random gauge field
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
@ -90,7 +93,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu,sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +103,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu,sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,8 +68,10 @@ int main(int argc, char** argv)
|
|||||||
// Set up RNGs
|
// Set up RNGs
|
||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
std::vector<int> seeds5({5, 6, 7, 8});
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
GridParallelRNG RNG5(FGrid);
|
GridParallelRNG RNG5(FGrid);
|
||||||
RNG5.SeedFixedIntegers(seeds5);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
sRNG.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid);
|
GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
@ -86,7 +88,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
|
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, false);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu, sRNG,RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,7 +98,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
ConjugateGradient<LatticeFermion> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
|
ExactOneFlavourRatioPseudoFermionAction<WilsonImplR> Meofa(Lop, Rop, CG, Params, true);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu, sRNG,RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,7 +73,9 @@ int main(int argc, char** argv)
|
|||||||
std::vector<int> seeds4({1, 2, 3, 4});
|
std::vector<int> seeds4({1, 2, 3, 4});
|
||||||
std::vector<int> seeds5({5, 6, 7, 8});
|
std::vector<int> seeds5({5, 6, 7, 8});
|
||||||
GridParallelRNG RNG5(FGrid);
|
GridParallelRNG RNG5(FGrid);
|
||||||
|
GridSerialRNG sRNG;
|
||||||
RNG5.SeedFixedIntegers(seeds5);
|
RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
sRNG.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid);
|
GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
@ -91,7 +93,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, false);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu, sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,7 +103,7 @@ int main(int argc, char** argv)
|
|||||||
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
ConjugateGradient<FermionField> CG(1.0e-12, 5000);
|
||||||
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
|
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> Meofa(Lop, Rop, CG, Params, true);
|
||||||
|
|
||||||
Meofa.refresh(Umu, RNG5);
|
Meofa.refresh(Umu, sRNG, RNG5);
|
||||||
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -59,6 +58,10 @@ int main (int argc, char ** argv)
|
|||||||
double beta = 1.0;
|
double beta = 1.0;
|
||||||
double c1 = 0.331;
|
double c1 = 0.331;
|
||||||
|
|
||||||
|
const int nu = 1;
|
||||||
|
std::vector<int> twists(Nd,0);
|
||||||
|
twists[nu] = 1;
|
||||||
|
ConjugateGimplD::setDirections(twists);
|
||||||
ConjugatePlaqPlusRectangleActionR Action(beta,c1);
|
ConjugatePlaqPlusRectangleActionR Action(beta,c1);
|
||||||
//ConjugateWilsonGaugeActionR Action(beta);
|
//ConjugateWilsonGaugeActionR Action(beta);
|
||||||
//WilsonGaugeActionR Action(beta);
|
//WilsonGaugeActionR Action(beta);
|
||||||
|
@ -61,7 +61,9 @@ int main (int argc, char ** argv)
|
|||||||
std::vector<int> seeds({1,2,3,4});
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
|
||||||
GridParallelRNG pRNG(&Grid);
|
GridParallelRNG pRNG(&Grid);
|
||||||
|
GridSerialRNG sRNG;
|
||||||
pRNG.SeedFixedIntegers(seeds);
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
sRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
typedef PeriodicGimplR Gimpl;
|
typedef PeriodicGimplR Gimpl;
|
||||||
typedef WilsonGaugeAction<Gimpl> GaugeAction;
|
typedef WilsonGaugeAction<Gimpl> GaugeAction;
|
||||||
@ -115,7 +117,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
integrator.setMomentumFilter(filter);
|
integrator.setMomentumFilter(filter);
|
||||||
|
|
||||||
integrator.refresh(U, pRNG); //doesn't actually change the gauge field
|
integrator.refresh(U, sRNG, pRNG); //doesn't actually change the gauge field
|
||||||
|
|
||||||
//Check the momentum is zero on the boundary
|
//Check the momentum is zero on the boundary
|
||||||
const auto &P = integrator.getMomentum();
|
const auto &P = integrator.getMomentum();
|
||||||
|
@ -33,6 +33,7 @@ namespace Grid{
|
|||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters,
|
||||||
int, steps,
|
int, steps,
|
||||||
double, step_size,
|
double, step_size,
|
||||||
|
double, tol,
|
||||||
int, meas_interval,
|
int, meas_interval,
|
||||||
double, maxTau); // for the adaptive algorithm
|
double, maxTau); // for the adaptive algorithm
|
||||||
|
|
||||||
@ -82,13 +83,27 @@ int main(int argc, char **argv) {
|
|||||||
SU<Nc>::HotConfiguration(pRNG, Umu);
|
SU<Nc>::HotConfiguration(pRNG, Umu);
|
||||||
|
|
||||||
typedef Grid::XmlReader Serialiser;
|
typedef Grid::XmlReader Serialiser;
|
||||||
Serialiser Reader("input.xml");
|
// Serialiser Reader("input.xml");
|
||||||
WFParameters WFPar(Reader);
|
// WFParameters WFPar(Reader);
|
||||||
ConfParameters CPar(Reader);
|
// ConfParameters CPar(Reader);
|
||||||
CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix);
|
// WFParameters WFPar;
|
||||||
|
int steps = 800;
|
||||||
|
double step_size=0.02;
|
||||||
|
double tol=1e-4;
|
||||||
|
int meas_interval=50;
|
||||||
|
double maxTau = 16;
|
||||||
|
// ConfParameters CPar;
|
||||||
|
// CPar. conf_prefix="configurations/ckpoint_lat";
|
||||||
|
// CPar. rng_prefix="rngs/ckpoint_rng";
|
||||||
|
// CPar. StartConfiguration=100,
|
||||||
|
// CPar. EndConfiguration=110,
|
||||||
|
// CPar. Skip=1;
|
||||||
|
// CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix);
|
||||||
|
CheckpointerParameters CPPar("configurations/ckpoint_lat","rngs/ckpoint_rng");
|
||||||
BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
|
BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
|
||||||
|
|
||||||
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
|
// for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
|
||||||
|
for (int conf = 100; conf <= 110; conf+= 1){
|
||||||
|
|
||||||
CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG);
|
CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG);
|
||||||
|
|
||||||
@ -96,9 +111,10 @@ int main(int argc, char **argv) {
|
|||||||
std::cout << GridLogMessage << "Initial plaquette: "
|
std::cout << GridLogMessage << "Initial plaquette: "
|
||||||
<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
|
<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
|
||||||
|
|
||||||
WilsonFlow<PeriodicGimplR> WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval);
|
WilsonFlow<PeriodicGimplR> WF(steps, step_size, meas_interval);
|
||||||
|
|
||||||
WF.smear_adaptive(Uflow, Umu, WFPar.maxTau);
|
// WF.smear_adaptive(Uflow, Umu, maxTau);
|
||||||
|
WF.smear(Uflow, Umu);
|
||||||
|
|
||||||
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
|
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
|
||||||
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
|
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
|
||||||
|
Reference in New Issue
Block a user