mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Merge branch 'develop' into feature/gpu-port
This commit is contained in:
commit
b57a4d32aa
19
.gitignore
vendored
19
.gitignore
vendored
@ -83,6 +83,7 @@ ltmain.sh
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
.dirstamp
|
||||
|
||||
# build directory #
|
||||
###################
|
||||
@ -97,11 +98,8 @@ build.sh
|
||||
|
||||
# Eigen source #
|
||||
################
|
||||
lib/Eigen/*
|
||||
|
||||
# FFTW source #
|
||||
################
|
||||
lib/fftw/*
|
||||
Grid/Eigen
|
||||
Eigen/*
|
||||
|
||||
# libtool macros #
|
||||
##################
|
||||
@ -112,14 +110,7 @@ m4/libtool.m4
|
||||
################
|
||||
gh-pages/
|
||||
|
||||
# Buck files #
|
||||
##############
|
||||
.buck*
|
||||
buck-out
|
||||
BUCK
|
||||
make-bin-BUCK.sh
|
||||
|
||||
# generated sources #
|
||||
#####################
|
||||
lib/qcd/spin/gamma-gen/*.h
|
||||
lib/qcd/spin/gamma-gen/*.cc
|
||||
Grid/qcd/spin/gamma-gen/*.h
|
||||
Grid/qcd/spin/gamma-gen/*.cc
|
||||
|
25
.travis.yml
25
.travis.yml
@ -9,6 +9,11 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=single
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env: PREC=double
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -16,9 +21,11 @@ before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
|
||||
|
||||
install:
|
||||
- export CWD=`pwd`
|
||||
- echo $CWD
|
||||
- export CC=$CC$VERSION
|
||||
- export CXX=$CXX$VERSION
|
||||
- echo $PATH
|
||||
@ -31,16 +38,24 @@ install:
|
||||
- which $CXX
|
||||
- $CXX --version
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
|
||||
|
||||
script:
|
||||
- ./bootstrap.sh
|
||||
- mkdir build
|
||||
- cd build
|
||||
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||
- mkdir lime
|
||||
- cd lime
|
||||
- mkdir build
|
||||
- cd build
|
||||
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
|
||||
- tar xf lime-1.3.2.tar.gz
|
||||
- cd lime-1.3.2
|
||||
- ./configure --prefix=$CWD/build/lime/install
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- echo make clean
|
||||
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||
- make install
|
||||
- cd $CWD/build
|
||||
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
|
@ -42,7 +42,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/GridQCDcore.h>
|
||||
#include <Grid/qcd/action/Action.h>
|
||||
#include <Grid/qcd/utils/GaugeFix.h>
|
||||
NAMESPACE_CHECK(GaugeFix);
|
||||
#include <Grid/qcd/smearing/Smearing.h>
|
||||
NAMESPACE_CHECK(Smearing);
|
||||
#include <Grid/parallelIO/MetaData.h>
|
||||
#include <Grid/qcd/hmc/HMC_aggregate.h>
|
||||
|
@ -38,18 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_BASE_H
|
||||
#define GRID_BASE_H
|
||||
|
||||
|
||||
|
||||
#include <Grid/DisableWarnings.h>
|
||||
#include <Grid/Namespace.h>
|
||||
#include <Grid/GridStd.h>
|
||||
#include <Grid/threads/Pragmas.h>
|
||||
#include <Grid/perfmon/Timer.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/log/Log.h>
|
||||
#include <Grid/allocator/AlignedAllocator.h>
|
||||
#include <Grid/simd/Simd.h>
|
||||
#include <Grid/threads/Threads.h>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/util/Sha.h>
|
||||
#include <Grid/communicator/Communicator.h>
|
||||
#include <Grid/cartesian/Cartesian.h>
|
||||
#include <Grid/tensors/Tensors.h>
|
||||
@ -58,5 +61,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/stencil/Stencil.h>
|
||||
#include <Grid/parallelIO/BinaryIO.h>
|
||||
#include <Grid/algorithms/Algorithms.h>
|
||||
NAMESPACE_CHECK(GridCore)
|
||||
|
||||
#endif
|
@ -38,5 +38,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/qcd/spin/Spin.h>
|
||||
#include <Grid/qcd/utils/Utils.h>
|
||||
#include <Grid/qcd/representations/Representations.h>
|
||||
NAMESPACE_CHECK(GridQCDCore);
|
||||
|
||||
#endif
|
@ -1,5 +1,10 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#pragma once
|
||||
// Force Eigen to use MKL if Grid has been configured with --enable-mkl
|
||||
#ifdef USE_MKL
|
||||
#define EIGEN_USE_MKL_ALL
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
@ -21,6 +21,32 @@ if BUILD_HDF5
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
all: version-cache
|
||||
|
||||
version-cache:
|
||||
@if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\
|
||||
a="uncommited changes";\
|
||||
else\
|
||||
a="clean";\
|
||||
fi;\
|
||||
echo "`git log -n 1 --format=format:"#define GITHASH \\"%H:%d $$a\\"%n" HEAD`" > vertmp;\
|
||||
if [ -e version-cache ]; then\
|
||||
d=`diff vertmp version-cache`;\
|
||||
if [ "$${d}" != "" ]; then\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
else\
|
||||
mv vertmp version-cache;\
|
||||
rm -f Version.h;\
|
||||
fi;\
|
||||
rm -f vertmp
|
||||
|
||||
Version.h:
|
||||
cp version-cache Version.h
|
||||
|
||||
.PHONY: version-cache
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
@ -30,8 +56,8 @@ include Eigen.inc
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers)
|
||||
HFILES += $(extra_headers) Config.h Version.h
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(pkgincludedir)
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files)
|
@ -28,7 +28,11 @@ directory
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <cassert>
|
||||
|
||||
#define NAMESPACE_BEGIN(A) namespace A {
|
||||
#define NAMESPACE_END(A) }
|
||||
#define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid)
|
||||
#define GRID_NAMESPACE_END NAMESPACE_END(Grid)
|
||||
#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same<namespaceTEST##x, ::namespaceTEST##x>::value,"Not in :: at" );
|
@ -39,6 +39,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
@ -182,12 +182,15 @@ class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual RealD Mpc (const Field &in, Field &out) =0;
|
||||
virtual RealD MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no)
|
||||
{
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
@ -217,11 +220,13 @@ public:
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
// std::cout <<"grid pointers: in.Grid()="<< in.Grid() << " out.Grid()=" << out.Grid() << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
//std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
@ -305,36 +310,69 @@ template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
GridLogIterative.TimingMode(1);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
||||
Mpc(in,out);
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
virtual RealD Mpc (const Field &in, Field &out)
|
||||
{
|
||||
|
||||
Field tmp(in.Grid());
|
||||
Field tmp2(in.Grid());
|
||||
|
||||
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
// std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
// std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
||||
|
||||
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
@ -353,6 +391,12 @@ template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOpera
|
||||
template<class Field> class OperatorFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
|
||||
assert(in.size()==out.size());
|
||||
for(int k=0;k<in.size();k++){
|
||||
(*this)(Linop,in[k],out[k]);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class Field> class LinearFunction {
|
@ -55,6 +55,14 @@ public:
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
virtual GridBase *RedBlackGrid(void)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Query the even even properties to make algorithmic decisions
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
virtual RealD Mass(void) { return 0.0; };
|
||||
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
|
||||
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
694
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
694
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
@ -0,0 +1,694 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
int blockDim ;
|
||||
int Nblock;
|
||||
|
||||
BlockCGtype CGtype;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer PrintInterval; //GridLogMessages or Iterative
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
//
|
||||
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
||||
//
|
||||
// Q C = R => Q = R C^{-1}
|
||||
//
|
||||
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
||||
//
|
||||
// Set C = L^{dag}, and then Q^dag Q = ident
|
||||
//
|
||||
// Checks:
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Q = R C^{-1}
|
||||
//
|
||||
// Q_j = R_i Cinv(i,j)
|
||||
//
|
||||
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
// see comments above
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
std::vector<Field> & Q,
|
||||
const std::vector<Field> & R)
|
||||
{
|
||||
InnerProductMatrix(m_rr,R,R);
|
||||
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
|
||||
MulMatrix(Q,Cinv,R);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQVec ) {
|
||||
BlockCGrQsolveVec(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B.Grid()->_fdimensions[Orthog];
|
||||
/* FAKE */
|
||||
Nblock=8;
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
Field Q(B);
|
||||
Field D(B);
|
||||
Field Z(B);
|
||||
Field AD(B);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,B,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,B,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,X,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim
|
||||
Nblock = Src.Grid()->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
std::vector<ComplexD> v_pAp(Nblock);
|
||||
std::vector<RealD> v_rr (Nblock);
|
||||
std::vector<RealD> v_rr_inv(Nblock);
|
||||
std::vector<RealD> v_alpha(Nblock);
|
||||
std::vector<RealD> v_beta(Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch sliceNormTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductVector(v_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
|
||||
}
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_rr_inv[b] = 1.0/v_rr[b];
|
||||
}
|
||||
sliceNormTimer.Start();
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
sliceNormTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_beta[b] = v_rr_inv[b] *v_rr[b];
|
||||
}
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(P,v_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
RealD rr = v_rr[b]/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
|
||||
for(int b=0;b<Nblock;b++){
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
m(b,bp) = innerProduct(X[b],Y[bp]);
|
||||
}}
|
||||
}
|
||||
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
// Deal with case AP aliases with either Y or X
|
||||
std::vector<Field> tmp(Nblock,X[0]);
|
||||
for(int b=0;b<Nblock;b++){
|
||||
tmp[b] = Y[b];
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = tmp[b];
|
||||
}
|
||||
}
|
||||
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = Zero();
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
AP[b] += (m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
}
|
||||
double normv(const std::vector<Field> &P){
|
||||
double nn = 0.0;
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
nn+=norm2(P[b]);
|
||||
}
|
||||
return nn;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQvec implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
|
||||
{
|
||||
Nblock = B.size();
|
||||
assert(Nblock == X.size());
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
X[b].checkerboard = B[b].checkerboard;
|
||||
conformable(X[b], B[b]);
|
||||
conformable(X[b], X[0]);
|
||||
}
|
||||
|
||||
Field Fake(B[0]);
|
||||
|
||||
std::vector<Field> tmp(Nblock,Fake);
|
||||
std::vector<Field> Q(Nblock,Fake);
|
||||
std::vector<Field> D(Nblock,Fake);
|
||||
std::vector<Field> Z(Nblock,Fake);
|
||||
std::vector<Field> AD(Nblock,Fake);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
Linop.HermOp(X[b], AD[b]);
|
||||
tmp[b] = B[b] - AD[b];
|
||||
}
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
|
||||
for(int b=0;b<Nblock;b++) D[b]=Q[b];
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
InnerProductMatrix(m_DZ,D,Z);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(X,m_tmp, D,X);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(tmp,m_M,Z,Q,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(D,m_tmp,D,Q);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -55,6 +55,7 @@ public:
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
@ -70,7 +71,6 @@ public:
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
@ -96,38 +96,47 @@ public:
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||
Linop.HermOp(p, mmp);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
// RealD qqck = norm2(mmp);
|
||||
// ComplexD dck = innerProduct(p,mmp);
|
||||
|
||||
InnerTimer.Start();
|
||||
ComplexD dc = innerProduct(p,mmp);
|
||||
InnerTimer.Stop();
|
||||
d = dc.real();
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
AxpyNormTimer.Start();
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
AxpyNormTimer.Stop();
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi = a * p + psi;
|
||||
p = p * b + r;
|
||||
|
||||
LinearCombTimer.Start();
|
||||
auto psi_v = psi.View();
|
||||
auto p_v = p.View();
|
||||
auto r_v = r.View();
|
||||
parallel_for(int ss=0;ss<src.Grid()->oSites();ss++){
|
||||
vstream(psi_v[ss], a * p_v[ss] + psi_v[ss]);
|
||||
vstream(p_v [ss], b * p_v[ss] + r_v[ss]);
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
@ -148,6 +157,9 @@ public:
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
@ -43,6 +43,7 @@ class ConjugateGradientMultiShift : public OperatorMultiFunction<Field>,
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
|
||||
@ -164,6 +165,15 @@ public:
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
@ -171,7 +181,9 @@ public:
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
@ -180,6 +192,7 @@ public:
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
@ -190,22 +203,34 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
@ -215,6 +240,7 @@ public:
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
@ -257,6 +283,9 @@ public:
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
@ -269,8 +298,19 @@ public:
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
104
Grid/algorithms/iterative/Deflation.h
Normal file
104
Grid/algorithms/iterative/Deflation.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = Zero();
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0].Grid());
|
||||
CoarseField guess_coarse(evec_coarse[0].Grid()); guess_coarse = Zero();
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
@ -53,7 +53,8 @@ template<class Field>
|
||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].View()) View;
|
||||
std::vector<View> basis_v(basis.size());
|
||||
auto tmp_v = basis[0].View();
|
||||
std::vector<View> basis_v(basis.size(),tmp_v);
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
@ -63,7 +64,7 @@ void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, i
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector < vobj > B(Nm); // Thread private
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
thread_loop_in_region( (int ss=0;ss < grid->oSites();ss++),{
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
@ -188,6 +189,7 @@ enum IRLdiagonalisation {
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
@ -250,6 +252,7 @@ class ImplicitlyRestartedLanczos {
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
@ -497,15 +500,13 @@ until convergence
|
||||
Field B(grid); B.Checkerboard() = evec[0].Checkerboard();
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
if ( j > Nconv ) {
|
||||
Nconv=j+1;
|
||||
jj=Nstop; // Terminate the scan
|
||||
}
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
@ -513,8 +514,10 @@ until convergence
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
@ -47,6 +47,7 @@ public:
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
@ -72,21 +73,24 @@ public:
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0].Grid();
|
||||
int checkerboard = subspace[0].Checkerboard();
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
FineField fin(FineGrid);
|
||||
FineField fout(FineGrid);
|
||||
FineField fin (FineGrid); fin.Checkerboard()= checkerboard;
|
||||
FineField fout(FineGrid); fout.Checkerboard() = checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -101,24 +105,27 @@ public:
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,LinearOperatorBase<FineField>& linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &aggregate) :
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
_Aggregate(aggregate) { };
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
GridBase *FineGrid = subspace[0].Grid();
|
||||
int checkerboard = subspace[0].Checkerboard();
|
||||
|
||||
FineField fin(FineGrid) ;fin.Checkerboard() =_Aggregate.Checkerboard();
|
||||
FineField fout(FineGrid);fout.Checkerboard() =_Aggregate.Checkerboard();
|
||||
FineField fin (FineGrid); fin.Checkerboard() =checkerboard;
|
||||
FineField fout(FineGrid);fout.Checkerboard() =checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(in,fin); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
_Aggregate.ProjectToSubspace(out,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
@ -134,19 +141,23 @@ public:
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
Aggregation<Fobj,CComplex,nbasis> &_Aggregate;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> &Aggregate,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop),_Aggregate(Aggregate), _Poly(Poly), _coarse_relax_tol(coarse_relax_tol) { };
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
@ -170,14 +181,13 @@ public:
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _Aggregate.FineGrid;
|
||||
|
||||
int checkerboard = _Aggregate.Checkerboard();
|
||||
|
||||
GridBase *FineGrid = _subspace[0].Grid();
|
||||
int checkerboard = _subspace[0].Checkerboard();
|
||||
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
|
||||
FineField fv(FineGrid);fv.Checkerboard() =checkerboard;
|
||||
|
||||
_Aggregate.PromoteFromSubspace(B,fv);
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
@ -219,27 +229,67 @@ protected:
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
// FIXME replace Aggregation with vector of fine; the code reuse is too small for
|
||||
// the hassle and complexity of cross coupling.
|
||||
Aggregation<Fobj,CComplex,nbasis> _Aggregate;
|
||||
std::vector<RealD> evals_fine;
|
||||
std::vector<RealD> evals_coarse;
|
||||
std::vector<CoarseField> evec_coarse;
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_Aggregate(CoarseGrid,FineGrid,checkerboard),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard)
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
void Orthogonalise(void ) { _Aggregate.Orthogonalise(); }
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
@ -248,43 +298,44 @@ public:
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
_Aggregate.subspace.resize(Nk,_FineGrid);
|
||||
_Aggregate.subspace[0]=1.0;
|
||||
_Aggregate.subspace[0].Checkerboard()=_checkerboard;
|
||||
normalise(_Aggregate.subspace[0]);
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].Checkerboard()=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
_Aggregate.subspace[k].Checkerboard()=_checkerboard;
|
||||
Op(_Aggregate.subspace[k-1],_Aggregate.subspace[k]);
|
||||
normalise(_Aggregate.subspace[k]);
|
||||
subspace[k].Checkerboard()=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,_Aggregate.subspace[k],evals_fine[k],1.0)==1);
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(_Aggregate.subspace.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,_Aggregate);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
@ -304,34 +355,34 @@ public:
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
_Aggregate.subspace.resize(Nm,_FineGrid);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.Checkerboard() = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,_Aggregate.subspace,src,Nconv,false);
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
_Aggregate.subspace.resize(nbasis,_FineGrid);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,_Aggregate);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,_Aggregate);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,_Aggregate,relax);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
@ -0,0 +1,473 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Use base class to share code
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackBase {
|
||||
protected:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Shared code
|
||||
/////////////////////////////////////////////////////////////
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
|
||||
template<class Guesser>
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
int nblock = in.size();
|
||||
|
||||
std::vector<Field> src_o(nblock,grid);
|
||||
std::vector<Field> sol_o(nblock,grid);
|
||||
|
||||
std::vector<Field> guess_save;
|
||||
|
||||
Field resid(fgrid);
|
||||
Field tmp(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Prepare RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++){
|
||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Make the guesses
|
||||
////////////////////////////////////////////////
|
||||
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||
|
||||
for(int b=0;b<nblock;b++){
|
||||
guess(src_o[b],sol_o[b]);
|
||||
|
||||
if ( subGuess ) {
|
||||
guess_save[b] = sol_o[b];
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the block solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++) {
|
||||
|
||||
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||
|
||||
///////// Needs even source //////////////
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Check unprec residual if possible
|
||||
/////////////////////////////////////////////////
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out[b],resid);
|
||||
resid = resid-in[b];
|
||||
RealD ns = norm2(in[b]);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
template<class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field resid(fgrid);
|
||||
Field src_o(grid);
|
||||
Field src_e(grid);
|
||||
Field sol_o(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||
|
||||
////////////////////////////////
|
||||
// Construct the guess
|
||||
////////////////////////////////
|
||||
Field tmp(grid);
|
||||
guess(src_o,sol_o);
|
||||
|
||||
Field guess_save(grid);
|
||||
guess_save = sol_o;
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Fionn A2A boolean behavioural control
|
||||
////////////////////////////////////////////////
|
||||
if (subGuess) sol_o= sol_o-guess_save;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// RedBlack solution needs the even source
|
||||
///////////////////////////////////////////////////
|
||||
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Override in derived. Not virtual as template methods
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e(grid);
|
||||
|
||||
src_e = src_e_c; // Const correctness
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal has Mooee on it.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e_i = src_e-tmp; assert( src_e_i.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
//=> psi = MeeInv phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o,tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o_i); assert( sol_o_i.Checkerboard() ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@ -219,7 +219,6 @@ public:
|
||||
if ( __freeme ) free((void *)__freeme);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
@ -60,6 +60,7 @@ public:
|
||||
|
||||
virtual ~GridBase() = default;
|
||||
|
||||
|
||||
// Physics Grid information.
|
||||
Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||
Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal
|
||||
@ -79,6 +80,8 @@ public:
|
||||
Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
||||
Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||
|
||||
bool _isCheckerBoarded;
|
||||
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
@ -96,6 +96,7 @@ public:
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_isCheckerBoarded = false;
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
@ -121,6 +122,7 @@ public:
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||
@ -165,6 +167,7 @@ public:
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -140,9 +140,8 @@ public:
|
||||
const Coordinate &checker_dim_mask,
|
||||
int checker_dim)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
|
||||
_isCheckerBoarded = true;
|
||||
_checker_dim = checker_dim;
|
||||
assert(checker_dim_mask[checker_dim] == 1);
|
||||
_ndimension = dimensions.size();
|
@ -83,6 +83,7 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
@ -44,11 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||
//assert (provided == MPI_THREAD_MULTIPLE);
|
||||
// assert (provided == MPI_THREAD_MULTIPLE);
|
||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
// Never clean up as done once.
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
@ -84,9 +88,17 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor)
|
||||
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
|
||||
{
|
||||
MPI_Comm optimal_comm;
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine
|
||||
////////////////////////////////////////////////////
|
||||
// Remap using the shared memory optimising routine
|
||||
// The remap creates a comm which must be freed
|
||||
////////////////////////////////////////////////////
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||
InitFromMPICommunicator(processors,optimal_comm);
|
||||
SetCommunicator(optimal_comm);
|
||||
///////////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////////
|
||||
MPI_Comm_free(&optimal_comm);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
@ -111,10 +123,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// int Nparent = parent._processors ;
|
||||
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
|
||||
int Nparent;
|
||||
MPI_Comm_size(parent.communicator,&Nparent);
|
||||
// std::cout << " Parent size "<<Nparent <<std::endl;
|
||||
|
||||
int childsize=1;
|
||||
for(int d=0;d<processors.size();d++) {
|
||||
@ -123,8 +133,6 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
||||
int Nchild = Nparent/childsize;
|
||||
assert (childsize * Nchild == Nparent);
|
||||
|
||||
// std::cout << " child size "<<childsize <<std::endl;
|
||||
|
||||
Coordinate ccoor(_ndimension); // coor within subcommunicator
|
||||
Coordinate scoor(_ndimension); // coor of split within parent
|
||||
Coordinate ssize(_ndimension); // coor of split within parent
|
||||
@ -152,8 +160,8 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
||||
|
||||
} else {
|
||||
srank = 0;
|
||||
comm_split = parent.communicator;
|
||||
// std::cout << " Inherited communicator " <<comm_split <<std::endl;
|
||||
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -166,6 +174,11 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
SetCommunicator(comm_split);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////
|
||||
MPI_Comm_free(&comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
for(int d=0;d<processors.size();d++){
|
||||
@ -179,6 +192,9 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
||||
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const Coordinate &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
////////////////////////////////////////////////////
|
||||
// Creates communicator, and the communicator_halo
|
||||
////////////////////////////////////////////////////
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
@ -123,6 +123,7 @@ protected:
|
||||
|
||||
public:
|
||||
SharedMemory() {};
|
||||
~SharedMemory();
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// set the buffers & sizes
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
@ -27,6 +27,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <pwd.h>
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
#include <cuda_runtime_api.h>
|
||||
@ -120,19 +121,150 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||
if ( (0x1<<i) == WorldShmSize ) {
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
@ -181,7 +313,69 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
@ -272,6 +466,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -281,7 +476,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
@ -308,6 +503,49 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHM_NONE
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
assert(WorldShmSize == 1);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
int fd=-1;
|
||||
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
@ -322,6 +560,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
@ -333,7 +572,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
shm_unlink(shm_name);
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
@ -349,7 +589,11 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
if ( ptr == (void * )MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
// std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||
if ( ptr == (void * )MAP_FAILED ) {
|
||||
perror("failed mmap");
|
||||
assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
@ -364,7 +608,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
|
||||
size_t size = bytes ;
|
||||
|
||||
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||
@ -430,7 +675,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
|
||||
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,comm);
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
}
|
||||
@ -504,6 +749,13 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{
|
||||
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||
if ( !MPI_is_finalised ) {
|
||||
MPI_Comm_free(&ShmComm);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -122,6 +122,8 @@ void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -23,10 +23,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_COMMON_H_
|
||||
#define _GRID_CSHIFT_COMMON_H_
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@ -45,40 +44,44 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
||||
int so=plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
static Vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
auto rhs_v = rhs.View();
|
||||
if ( cbmask == 0x3 ) {
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++) ,
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
buffer[off+bo+b]=rhs_v[so+o+b];
|
||||
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
);
|
||||
} else {
|
||||
int bo=0;
|
||||
std::vector<std::pair<int,int> > table;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
thread_loop( (int i=0;i<table.size();i++),{
|
||||
buffer[off+table[i].first]=rhs_v[so+table[i].second];
|
||||
}
|
||||
thread_loop( (int i=0;i<ent;i++),{
|
||||
buffer[table[i].first]=rhs_v[table[i].second];
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj> void
|
||||
Gather_plane_extract(const Lattice<vobj> &rhs,ExtractPointerArray<typename vobj::scalar_object> pointers,int dimension,int plane,int cbmask)
|
||||
Gather_plane_extract(const Lattice<vobj> &rhs,
|
||||
ExtractPointerArray<typename vobj::scalar_object> pointers,
|
||||
int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
@ -102,7 +105,6 @@ Gather_plane_extract(const Lattice<vobj> &rhs,ExtractPointerArray<typename vobj:
|
||||
|
||||
vobj temp =rhs_v[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
|
||||
}
|
||||
});
|
||||
} else {
|
||||
@ -142,31 +144,37 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int stride=rhs.Grid()->_slice_stride[dimension];
|
||||
auto rhs_v = rhs.View();
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs.Grid()->_slice_stride[dimension];
|
||||
int bo =n*rhs.Grid()->_slice_block[dimension];
|
||||
rhs_v[so+o+b]=buffer[bo+b];
|
||||
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} else {
|
||||
std::vector<std::pair<int,int> > table;
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs.Grid()->_slice_stride[dimension];
|
||||
int ocb=1<<rhs.Grid()->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
||||
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
thread_loop( (int i=0;i<table.size();i++),{
|
||||
}
|
||||
|
||||
auto rhs_v = rhs.View();
|
||||
thread_loop( (int i=0;i<ent;i++), {
|
||||
rhs_v[table[i].first]=buffer[table[i].second];
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
@ -185,8 +193,8 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension];
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
|
||||
auto rhs_v = rhs.View();
|
||||
if(cbmask ==0x3 ) {
|
||||
auto rhs_v = rhs.View();
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs.Grid()->_slice_stride[dimension];
|
||||
@ -198,9 +206,9 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA
|
||||
|
||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||
// Test_cshift_red_black code.
|
||||
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;
|
||||
// think this is buggy FIXME
|
||||
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
|
||||
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
||||
auto rhs_v = rhs.View();
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs.Grid()->_slice_stride[dimension];
|
||||
@ -225,40 +233,44 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
|
||||
int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs.Grid()->_slice_block[dimension];
|
||||
int stride = rhs.Grid()->_slice_stride[dimension];
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
thread_loop_collapse2((int n=0;n<e1;n++),{
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
vstream(lhs_v[lo+o],rhs_v[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
vstream(lhs_v[lo+o],rhs_v[ro+o]);
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
thread_loop( (int i=0;i<ent;i++),{
|
||||
lhs_v[table[i].first]=rhs_v[table[i].second];
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
{
|
||||
auto lhs_v = lhs.View();
|
||||
auto rhs_v = rhs.View();
|
||||
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
|
||||
@ -273,15 +285,29 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
||||
int e2=rhs.Grid()->_slice_block [dimension];
|
||||
int stride = rhs.Grid()->_slice_stride[dimension];
|
||||
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++),{
|
||||
for(int b=0;b<e2;b++){
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
double t_tab,t_perm;
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
permute(lhs_v[lo+o+b],rhs_v[ro+o+b],permute_type);
|
||||
}
|
||||
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
auto rhs_v = rhs.View();
|
||||
auto lhs_v = lhs.View();
|
||||
thread_loop( (int i=0;i<ent;i++),{
|
||||
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
|
||||
});
|
||||
}
|
||||
|
||||
@ -295,6 +321,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
||||
sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
|
||||
sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd);
|
||||
|
||||
double t_local;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
@ -323,17 +351,13 @@ template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &r
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// int o = 0;
|
||||
int o = 0;
|
||||
int bo = x * grid->_ostride[dimension];
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// FIXME : This must change where we have a
|
||||
// Rotate slice.
|
||||
|
||||
// Document how this works ; why didn't I do this when I first wrote it...
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
@ -370,10 +394,7 @@ template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &r
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -54,13 +54,13 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
// std::cout << "Cshift_local" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
// std::cout << "Cshift_comms_simd" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
// std::cout << "Cshift_comms" <<std::endl;
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
@ -91,9 +91,12 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vob
|
||||
sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even);
|
||||
sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
@ -175,6 +178,10 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
File diff suppressed because it is too large
Load Diff
@ -300,20 +300,14 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpy(ret,a,x,y);
|
||||
return norm2(ret);
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
axpby(ret,a,b,x,y);
|
||||
return norm2(ret); // FIXME implement parallel norm in ss loop
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -362,6 +362,16 @@ public:
|
||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||
this->checkerboard=0;
|
||||
}
|
||||
|
||||
// virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
if (this->_grid != grid) {
|
||||
this->_grid = grid;
|
||||
this->_odata.resize(grid->oSites());
|
||||
this->checkerboard = 0;
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////
|
||||
// copy constructor
|
||||
///////////////////////////////////////////
|
||||
@ -396,6 +406,7 @@ public:
|
||||
});
|
||||
return *this;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Copy assignment
|
||||
///////////////////////////////////////////
|
||||
@ -451,7 +462,6 @@ public:
|
||||
tmp = *lp; *lp=*rp; *rp=tmp;
|
||||
}
|
||||
|
||||
|
||||
}; // class Lattice
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
@ -179,7 +179,7 @@ accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0> \
|
||||
accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs) \
|
||||
{ \
|
||||
@ -212,14 +212,15 @@ accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar
|
||||
accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) DECLARE_RELATIONAL_EQ(op,functor)
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL(==,seq);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
@ -25,8 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COORDINATE_H
|
||||
#define GRID_LATTICE_COORDINATE_H
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@ -72,4 +71,4 @@ template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
138
Grid/lattice/Lattice_overload.h
Normal file
138
Grid/lattice/Lattice_overload.h
Normal file
@ -0,0 +1,138 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_overload.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_OVERLOAD_H
|
||||
#define GRID_LATTICE_OVERLOAD_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// unary negation
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
vstream(ret._odata[ss], -r._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice BinOp Lattice,
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
738
Grid/lattice/Lattice_reduction.h
Normal file
738
Grid/lattice/Lattice_reduction.h
Normal file
@ -0,0 +1,738 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
return real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
scalar_type nrm;
|
||||
|
||||
GridBase *grid = left.Grid();
|
||||
|
||||
Vector<vector_type> sumarray(grid->SumArraySize());
|
||||
|
||||
auto left_v = left.View();
|
||||
auto right_v=right.View();
|
||||
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int mywork, myoff;
|
||||
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left_v[0],right_v[0])) vnrm=Zero(); // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProductD(left_v[ss],right_v[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
});
|
||||
|
||||
vector_type vvnrm; vvnrm=Zero(); // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right.Grid()->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.Checkerboard() = x.Checkerboard();
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x.Grid();
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
auto x_v=x.View();
|
||||
auto y_v=y.View();
|
||||
auto z_v=z.View();
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),
|
||||
{
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x.Grid()->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z_v[0],z_v[0])) vnrm=Zero();
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x_v[ss]+b*y_v[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z_v[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
});
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z.Grid()->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1),eval(0,expr.arg2)))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.op.func(eval(0,expr.arg1),
|
||||
eval(0,expr.arg2),
|
||||
eval(0,expr.arg3)
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
{
|
||||
GridBase *grid=arg.Grid();
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
Vector<vobj> sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=Zero();
|
||||
}
|
||||
|
||||
auto arg_v=arg.View();
|
||||
thread_loop( (int thr=0;thr<grid->SumArraySize();thr++),{
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=Zero();
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg_v[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
});
|
||||
|
||||
vobj vsum=Zero(); // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ssum=Zero();
|
||||
|
||||
ExtractBuffer<sobj> buf(Nsimd);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg.Grid()->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// FIXME precision promoted summation
|
||||
// may be important for correlation functions
|
||||
// But easily avoided by using double precision fields
|
||||
///////////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data.Grid();
|
||||
assert(grid!=NULL);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
Vector<vobj> lvSum(rd); // will locally sum vectors first
|
||||
Vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
|
||||
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
// Parallel over orthog direction
|
||||
auto Data_v=Data.View();
|
||||
thread_loop( (int r=0;r<rd;r++), {
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data_v[ss];
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=Zero();
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs.Grid());
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
Vector<vector_type> lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
auto l_v=lhs.View();
|
||||
auto r_v=rhs.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(l_v[ss], r_v[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
});
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs.Grid();
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs.Grid());
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
Vector<vector_type> lvSum(rd); // will locally sum vectors first
|
||||
Vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=Zero();
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
auto lhv=lhs.View();
|
||||
auto rhv=rhs.View();
|
||||
thread_loop( (int r=0;r<rd;r++),{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhv[ss],rhv[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
Coordinate icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
scalar_type gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=scalar_type(0.0);
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs.Grid()->GlobalDimensions()[Orthog];
|
||||
Vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int orthogdim,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced tensor_reduced;
|
||||
|
||||
scalar_type zscale(scale);
|
||||
|
||||
GridBase *grid = X.Grid();
|
||||
|
||||
int Nsimd =grid->Nsimd();
|
||||
int Nblock =grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
int fd =grid->_fdimensions[orthogdim];
|
||||
int ld =grid->_ldimensions[orthogdim];
|
||||
int rd =grid->_rdimensions[orthogdim];
|
||||
|
||||
int e1 =grid->_slice_nblock[orthogdim];
|
||||
int e2 =grid->_slice_block [orthogdim];
|
||||
int stride =grid->_slice_stride[orthogdim];
|
||||
|
||||
Coordinate icoor;
|
||||
for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
vector_type av;
|
||||
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
grid->iCoorFromIindex(icoor,l);
|
||||
int ldx =r+icoor[orthogdim]*rd;
|
||||
scalar_type *as =(scalar_type *)&av;
|
||||
as[l] = scalar_type(a[ldx])*zscale;
|
||||
}
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
auto Rv=R.View();
|
||||
auto Xv=X.View();
|
||||
auto Yv=Y.View();
|
||||
thread_loop_collapse2( (int n=0;n<e1;n++) , {
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
Rv[ss] = at*Xv[ss]+Yv[ss];
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
thread_region
|
||||
{
|
||||
Vector<vobj> s_x(Nblock);
|
||||
|
||||
thread_loop_collapse_in_region(2 ,(int n=0;n<nblock;n++), {
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X.Grid()->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
|
||||
thread_loop_collapse_in_region( 2 , (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}});
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs.Grid();
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
thread_loop_collapse_in_region( 2, (int n=0;n<nblock;n++),{
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
mat_thread(i,j) += Reduce(rtmp);
|
||||
}}
|
||||
}});
|
||||
thread_critical
|
||||
{
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
||||
|
@ -158,10 +158,19 @@ public:
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
skip = skip<<40;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
@ -308,6 +317,19 @@ public:
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
@ -370,6 +392,14 @@ public:
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
@ -25,8 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRANSFER_H
|
||||
#define GRID_LATTICE_TRANSFER_H
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
@ -476,9 +475,11 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
if ( d!=orthog ) {
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
thread_loop( (int idx=0;idx<lg->lSites();idx++),{
|
||||
@ -497,7 +498,7 @@ void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
@ -511,9 +512,11 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slic
|
||||
assert(orthog>=0);
|
||||
|
||||
for(int d=0;d<nh;d++){
|
||||
if ( d!=orthog ) {
|
||||
assert(lg->_processors[d] == hg->_processors[d]);
|
||||
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||
}
|
||||
}
|
||||
|
||||
// the above should guarantee that the operations are local
|
||||
thread_loop( (int idx=0;idx<lg->lSites();idx++),{
|
||||
@ -616,6 +619,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
extract(in_vobj, out_ptrs, 0);
|
||||
});
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* in_grid = in._grid;
|
||||
out.resize(in_grid->lSites());
|
||||
|
||||
int ndim = in_grid->Nd();
|
||||
int in_nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<Coordinate > in_icoor(in_nsimd);
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
in_icoor[lane].resize(ndim);
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
Coordinate in_ocoor(ndim);
|
||||
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||
|
||||
Coordinate lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in._odata[in_oidx];
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
@ -664,11 +712,62 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
});
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* grid = out._grid;
|
||||
assert(in.size()==grid->lSites());
|
||||
|
||||
int ndim = grid->Nd();
|
||||
int nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<Coordinate > icoor(nsimd);
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> ptrs(nsimd);
|
||||
|
||||
Coordinate ocoor(ndim);
|
||||
grid->oCoorFromOindex(ocoor, oidx);
|
||||
|
||||
Coordinate lcoor(grid->Nd());
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||
ptrs[lane] = &in[lex];
|
||||
}
|
||||
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge1(vecobj, ptrs, 0);
|
||||
out._odata[oidx] = vecobj;
|
||||
}
|
||||
}
|
||||
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
||||
{
|
||||
assert(out.Grid()->Nd() == in.Grid()->Nd());
|
||||
for(int d=0;d<out.Grid()->Nd();d++){
|
||||
assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]);
|
||||
}
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
GridBase *in_grid=in.Grid();
|
||||
GridBase *out_grid = out.Grid();
|
||||
@ -1006,4 +1105,4 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
||||
|
86
Grid/lattice/Lattice_where.h
Normal file
86
Grid/lattice/Lattice_where.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_where.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_WHERE_H
|
||||
#define GRID_LATTICE_WHERE_H
|
||||
namespace Grid {
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj,class iobj>
|
||||
inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue._grid;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
}
|
||||
|
||||
merge(ret._odata[ss],falsevals);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue._grid);
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
@ -85,6 +85,7 @@ protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
int timing_mode;
|
||||
int topWidth{-1}, chanWidth{-1};
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
@ -123,18 +124,32 @@ public:
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
void setTopWidth(const int w) {topWidth = w;}
|
||||
void setChanWidth(const int w) {chanWidth = w;}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
stream << log.background()<< std::left << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left << log.name << log.background() << " : ";
|
||||
stream << log.background()<< std::left;
|
||||
if (log.topWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.topWidth);
|
||||
}
|
||||
stream << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left;
|
||||
if (log.chanWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.chanWidth);
|
||||
}
|
||||
stream << log.name << log.background() << " : ";
|
||||
if ( log.timestamp ) {
|
||||
log.StopWatch->Stop();
|
||||
GridTime now = log.StopWatch->Elapsed();
|
||||
|
||||
if ( log.timing_mode==1 ) log.StopWatch->Reset();
|
||||
log.StopWatch->Start();
|
||||
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
|
||||
stream << log.evidence()
|
||||
<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
return stream;
|
3
Grid/parallelIO/BinaryIO.cc
Normal file
3
Grid/parallelIO/BinaryIO.cc
Normal file
@ -0,0 +1,3 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
int Grid::BinaryIO::latticeWriteMaxRetry = -1;
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
@ -24,10 +24,9 @@
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BINARY_IO_H
|
||||
#define GRID_BINARY_IO_H
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
|
||||
#define USE_MPI_IO
|
||||
@ -79,7 +78,8 @@ inline void removeWhitespace(std::string &key)
|
||||
// Could just use a namespace
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BinaryIO {
|
||||
public:
|
||||
public:
|
||||
static int latticeWriteMaxRetry;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// more byte manipulation helpers
|
||||
@ -90,7 +90,7 @@ public:
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = lat.Grid();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
unvectorizeToLexOrdArray(scalardata,lat);
|
||||
@ -104,22 +104,26 @@ public:
|
||||
const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
|
||||
|
||||
uint64_t lsites = grid->lSites();
|
||||
if (fbuf.size() == 1) {
|
||||
if (fbuf.size() == 1)
|
||||
{
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
thread_region {
|
||||
|
||||
thread_region
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
thread_loop( (uint64_t local_site = 0; local_site < lsites; local_site++), {
|
||||
thread_loop_in_region( (uint64_t local_site = 0; local_site < lsites; local_site++),
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
for (uint64_t j = 0; j < size32; j++) {
|
||||
for (uint64_t j = 0; j < size32; j++)
|
||||
{
|
||||
nersc_csum_thr = nersc_csum_thr + site_buf[j];
|
||||
}
|
||||
});
|
||||
|
||||
thread_critical {
|
||||
thread_critical
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
}
|
||||
@ -137,20 +141,22 @@ public:
|
||||
Coordinate local_start =grid->LocalStarts();
|
||||
Coordinate global_vol =grid->FullDimensions();
|
||||
|
||||
thread_region {
|
||||
|
||||
thread_region
|
||||
{
|
||||
Coordinate coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
thread_loop( (uint64_t local_site=0;local_site<lsites;local_site++),{
|
||||
thread_loop_in_region( (uint64_t local_site=0;local_site<lsites;local_site++),{
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
|
||||
/*
|
||||
* Scidac csum is rather more heavyweight
|
||||
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||
*/
|
||||
|
||||
int global_site;
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||
@ -169,10 +175,10 @@ public:
|
||||
// std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
|
||||
scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
|
||||
});
|
||||
|
||||
thread_critical {
|
||||
thread_critical
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
}
|
||||
@ -189,7 +195,7 @@ public:
|
||||
{
|
||||
uint32_t * f = (uint32_t *)file_object;
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
thread_loop( (uint64_t i=0;i<count;i++), {
|
||||
thread_loop( (uint64_t i=0;i<count;i++),{
|
||||
f[i] = ntohl(f[i]);
|
||||
});
|
||||
}
|
||||
@ -200,7 +206,7 @@ public:
|
||||
uint32_t f;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
thread_loop( (uint64_t i=0;i<count;i++), {
|
||||
thread_loop( (uint64_t i=0;i<count;i++),{
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
@ -225,7 +231,7 @@ public:
|
||||
uint64_t f,g;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint64_t);
|
||||
thread_loop( (uint64_t i=0;i<count;i++), {
|
||||
thread_loop( (uint64_t i=0;i<count;i++),{
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
@ -251,7 +257,7 @@ public:
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -358,7 +364,7 @@ public:
|
||||
#endif
|
||||
} else {
|
||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
std::ifstream fin;
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
if (control & BINARYIO_MASTER_APPEND)
|
||||
@ -419,14 +425,20 @@ public:
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O set view " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI read I/O write all " << file << std::endl;
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
@ -436,16 +448,20 @@ public:
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
try {
|
||||
if (offset) { // Must already exist and contain data
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
} else { // Allow create
|
||||
fout.open(file,std::ios::binary|std::ios::out);
|
||||
}
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||
std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
@ -479,6 +495,7 @@ public:
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
@ -513,7 +530,7 @@ public:
|
||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -523,7 +540,7 @@ public:
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@ -534,7 +551,7 @@ public:
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
thread_loop( (int x=0;x<lsites;x++), { munge(iodata[x], scalardata[x]); });
|
||||
thread_loop( (uint64_t x=0;x<lsites;x++), { munge(iodata[x], scalardata[x]); });
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||
grid->Barrier();
|
||||
@ -550,7 +567,7 @@ public:
|
||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
@ -559,7 +576,9 @@ public:
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
GridBase *grid = Umu.Grid();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t lsites = grid->lSites(), offsetCopy = offset;
|
||||
int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry);
|
||||
bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0);
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
@ -570,13 +589,39 @@ public:
|
||||
GridStopWatch timer; timer.Start();
|
||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||
|
||||
thread_loop( (int x=0;x<lsites;x++), { munge(scalardata[x],iodata[x]);});
|
||||
thread_loop( (uint64_t x=0;x<lsites;x++), { munge(scalardata[x],iodata[x]); });
|
||||
|
||||
grid->Barrier();
|
||||
timer.Stop();
|
||||
|
||||
while (attemptsLeft >= 0)
|
||||
{
|
||||
grid->Barrier();
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
if (checkWrite)
|
||||
{
|
||||
std::vector<fobj> ckiodata(lsites);
|
||||
uint32_t cknersc_csum, ckscidac_csuma, ckscidac_csumb;
|
||||
uint64_t ckoffset = offsetCopy;
|
||||
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read back object" << std::endl;
|
||||
grid->Barrier();
|
||||
IOobject(w,grid,ckiodata,file,ckoffset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
cknersc_csum,ckscidac_csuma,ckscidac_csumb);
|
||||
if ((cknersc_csum != nersc_csum) or (ckscidac_csuma != scidac_csuma) or (ckscidac_csumb != scidac_csumb))
|
||||
{
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
|
||||
offset = offsetCopy;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << GridLogMessage << "writeLatticeObject: read test checksum correct" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
attemptsLeft--;
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
@ -587,7 +632,7 @@ public:
|
||||
static inline void readRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@ -600,8 +645,8 @@ public:
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
GridBase *grid = parallel.Grid();
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp = 0;
|
||||
uint32_t scidac_csuma_tmp = 0;
|
||||
@ -616,7 +661,7 @@ public:
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
timer.Start();
|
||||
thread_loop( (int lidx=0;lidx<lsites;lidx++),{
|
||||
thread_loop( (uint64_t lidx=0;lidx<lsites;lidx++),{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||
parallel.SetState(tmp,lidx);
|
||||
@ -649,7 +694,7 @@ public:
|
||||
static inline void writeRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
Integer offset,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
@ -660,8 +705,8 @@ public:
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
|
||||
GridBase *grid = parallel.Grid();
|
||||
int gsites = grid->gSites();
|
||||
int lsites = grid->lSites();
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp;
|
||||
uint32_t scidac_csuma_tmp;
|
||||
@ -674,7 +719,7 @@ public:
|
||||
|
||||
timer.Start();
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
thread_loop( (int lidx=0;lidx<lsites;lidx++),{
|
||||
thread_loop( (uint64_t lidx=0;lidx<lsites;lidx++),{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
parallel.GetState(tmp,lidx);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||
@ -683,7 +728,6 @@ public:
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
@ -703,5 +747,5 @@ public:
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
@ -23,9 +23,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ILDG_IO_H
|
||||
#define GRID_ILDG_IO_H
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
#include <algorithm>
|
||||
@ -38,28 +37,28 @@ directory
|
||||
#include <sys/utsname.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//C-Lime is a must have for this functionality
|
||||
//C-Lime is a must have for this functionality
|
||||
extern "C" {
|
||||
#include "lime.h"
|
||||
}
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////
|
||||
// Encode word types as strings
|
||||
/////////////////////////////////
|
||||
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
|
||||
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
|
||||
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
|
||||
/////////////////////////////////
|
||||
// Encode word types as strings
|
||||
/////////////////////////////////
|
||||
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
|
||||
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
|
||||
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Encode a generic tensor as a string
|
||||
/////////////////////////////////////////
|
||||
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
|
||||
/////////////////////////////////////////
|
||||
// Encode a generic tensor as a string
|
||||
/////////////////////////////////////////
|
||||
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
|
||||
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
@ -108,21 +107,21 @@ template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins,
|
||||
datacount = _LorentzN;
|
||||
|
||||
return stream.str();
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
|
||||
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
|
||||
return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Helper to fill out metadata
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||
////////////////////////////////////////////////////////////
|
||||
// Helper to fill out metadata
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||
FieldMetaData &header,
|
||||
scidacRecord & _scidacRecord,
|
||||
scidacFile & _scidacFile)
|
||||
{
|
||||
{
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
/////////////////////////////////////
|
||||
@ -147,25 +146,25 @@ template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||
_scidacRecord = sr;
|
||||
|
||||
// std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Scidac checksum
|
||||
///////////////////////////////////////////////////////
|
||||
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// Scidac checksum
|
||||
///////////////////////////////////////////////////////
|
||||
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
|
||||
{
|
||||
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
|
||||
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
|
||||
if ( scidac_csuma !=scidac_checksuma) return 0;
|
||||
if ( scidac_csumb !=scidac_checksumb) return 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lime, ILDG and Scidac I/O classes
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
class GridLimeReader : public BinaryIO {
|
||||
public:
|
||||
public:
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
///////////////////////////////////////////////////
|
||||
@ -181,6 +180,11 @@ public:
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
@ -227,7 +231,8 @@ public:
|
||||
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout << GridLogMessage << "SciDAC checksum A " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "SciDAC checksum B " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
/////////////////////////////////////////////
|
||||
// Insist checksum is next record
|
||||
/////////////////////////////////////////////
|
||||
@ -244,10 +249,8 @@ public:
|
||||
////////////////////////////////////////////
|
||||
// Read a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void readLimeObject(std::string &xmlstring,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
@ -261,18 +264,29 @@ public:
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
read(RD,object_name,object);
|
||||
xmlstring = std::string(&xmlc[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
|
||||
readLimeObject(xmlstring, record_name);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
}
|
||||
};
|
||||
|
||||
class GridLimeWriter : public BinaryIO {
|
||||
public:
|
||||
class GridLimeWriter : public BinaryIO
|
||||
{
|
||||
public:
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
// FIXME: collective calls or not ?
|
||||
@ -281,17 +295,24 @@ public:
|
||||
FILE *File;
|
||||
LimeWriter *LimeW;
|
||||
std::string filename;
|
||||
|
||||
bool boss_node;
|
||||
GridLimeWriter( bool isboss = true) {
|
||||
boss_node = isboss;
|
||||
}
|
||||
void open(const std::string &_filename) {
|
||||
filename= _filename;
|
||||
if ( boss_node ) {
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void) {
|
||||
if ( boss_node ) {
|
||||
fclose(File);
|
||||
}
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
@ -299,24 +320,22 @@ public:
|
||||
///////////////////////////////////////////////////////
|
||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
return LIME_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Write a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
|
||||
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||
{
|
||||
std::string xmlstring;
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
write(WR,object_name,object);
|
||||
xmlstring = WR.XmlString();
|
||||
}
|
||||
if ( boss_node ) {
|
||||
std::string xmlstring = writer.docString();
|
||||
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
@ -328,48 +347,95 @@ public:
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
// std::cout << " File offset is now"<<ftello(File) << std::endl;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
|
||||
if (scientificPrec)
|
||||
{
|
||||
WR.scientificFormat(true);
|
||||
WR.setPrecision(scientificPrec);
|
||||
}
|
||||
write(WR,object_name,object);
|
||||
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
// Write a generic lattice field and csum
|
||||
////////////////////////////////////////////
|
||||
// This routine is Collectively called by all nodes
|
||||
// in communicator used by the field.Grid()
|
||||
////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * field.Grid()->_gsites;
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field.Grid()->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||
// the same file through different file handles (integer units).
|
||||
//
|
||||
// These are both buffered, so why I think this code is right is as follows.
|
||||
//
|
||||
// i) write record header to FILE *File, telegraphing the size.
|
||||
// i) write record header to FILE *File, telegraphing the size; flush
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||
// Closes iostream and flushes.
|
||||
// iv) fseek on FILE * to end of this disjoint section.
|
||||
// v) Continue writing scidac record.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
uint64_t offset = ftello(File);
|
||||
// std::cout << " Writing to offset "<<offset << std::endl;
|
||||
|
||||
GridBase *grid = field.Grid();
|
||||
assert(boss_node == field.Grid()->IsBoss() );
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||
if ( boss_node ) {
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
fflush(File);
|
||||
}
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field.Grid()->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Check all nodes agree on file position
|
||||
////////////////////////////////////////////////
|
||||
uint64_t offset1;
|
||||
if ( boss_node ) {
|
||||
offset1 = ftello(File);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||
|
||||
///////////////////////////////////////////
|
||||
// The above is collective. Write by other means into the binary record
|
||||
///////////////////////////////////////////
|
||||
std::string format = getFormatString<vobj>();
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
// fseek(File,0,SEEK_END); offset = ftello(File);std::cout << " offset now "<<offset << std::endl;
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Wind forward and close the record
|
||||
///////////////////////////////////////////
|
||||
if ( boss_node ) {
|
||||
fseek(File,0,SEEK_END);
|
||||
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||
assert( (offset2-offset1) == PayloadSize);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Check MPI-2 I/O did what we expect to file
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
if ( boss_node ) {
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Write checksum element, propagaing forward from the BinaryIO
|
||||
// Always pair a checksum with a binary object, and close message
|
||||
@ -379,26 +445,32 @@ public:
|
||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||
checksum.suma= streama.str();
|
||||
checksum.sumb= streamb.str();
|
||||
// std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
|
||||
if ( boss_node ) {
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ScidacWriter : public GridLimeWriter {
|
||||
public:
|
||||
public:
|
||||
|
||||
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||
const unsigned int recordScientificPrec = 0)
|
||||
{
|
||||
GridBase * grid = field.Grid();
|
||||
|
||||
@ -414,16 +486,19 @@ public:
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
}
|
||||
// Collective call
|
||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ScidacReader : public GridLimeReader {
|
||||
public:
|
||||
public:
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
@ -482,7 +557,9 @@ public:
|
||||
|
||||
|
||||
class IldgWriter : public ScidacWriter {
|
||||
public:
|
||||
public:
|
||||
|
||||
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||
|
||||
///////////////////////////////////
|
||||
// A little helper
|
||||
@ -567,12 +644,11 @@ public:
|
||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
// limeDestroyWriter(LimeW);
|
||||
fclose(File);
|
||||
}
|
||||
};
|
||||
|
||||
class IldgReader : public GridLimeReader {
|
||||
public:
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Read either Grid/SciDAC/ILDG configuration
|
||||
@ -593,7 +669,7 @@ public:
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
auto dims = Umu.Grid()->FullDimensions();
|
||||
Coordinate dims = Umu.Grid()->FullDimensions();
|
||||
|
||||
assert(dims.size()==4);
|
||||
|
||||
@ -643,9 +719,11 @@ public:
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
@ -660,13 +738,13 @@ public:
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
@ -680,18 +758,17 @@ public:
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
std::string xmls(&xmlc[0]);
|
||||
// is it a USQCD info field
|
||||
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(&xmlc[0],"");
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
||||
@ -722,7 +799,6 @@ public:
|
||||
assert(found_ildgBinary);
|
||||
assert(found_ildgFormat);
|
||||
assert(found_scidacChecksum);
|
||||
assert(found_ildgLFN==0);
|
||||
|
||||
// Must find something with the lattice dimensions
|
||||
assert(found_FieldMetaData||found_ildgFormat);
|
||||
@ -788,11 +864,11 @@ public:
|
||||
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
//HAVE_LIME
|
||||
#endif
|
||||
|
||||
#endif
|
@ -136,8 +136,9 @@ public:
|
||||
int, typesize,
|
||||
int, datacount);
|
||||
|
||||
scidacRecord() { version =1.0; }
|
||||
|
||||
scidacRecord()
|
||||
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||
{}
|
||||
};
|
||||
|
||||
////////////////////////
|
@ -81,22 +81,19 @@ public:
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
FieldMetaData(void) {
|
||||
nd=4;
|
||||
dimension.resize(4);
|
||||
boundary.resize(4);
|
||||
scidac_checksuma=0;
|
||||
scidac_checksumb=0;
|
||||
checksum=0;
|
||||
}
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
// PB disable using namespace - this is a header and forces namesapce visibility for all
|
||||
// including files
|
||||
//using namespace Grid;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
@ -56,6 +56,7 @@ public:
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
uint64_t offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
@ -137,7 +138,7 @@ public:
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
int offset = readHeader(file,grid,header);
|
||||
uint64_t offset = readHeader(file,Umu.Grid(),header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
@ -232,21 +233,25 @@ public:
|
||||
GaugeStatistics(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
|
||||
truncate(file);
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
@ -274,7 +279,7 @@ public:
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
int offset;
|
||||
uint64_t offset;
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
@ -289,12 +294,18 @@ public:
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
@ -309,7 +320,7 @@ public:
|
||||
|
||||
GridBase *grid = parallel.Grid();
|
||||
|
||||
int offset = readHeader(file,grid,header);
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
@ -47,14 +47,38 @@ inline double usecond(void) {
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
typedef std::chrono::milliseconds GridTime;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||
typedef std::chrono::seconds GridSecs;
|
||||
typedef std::chrono::milliseconds GridMillisecs;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
typedef std::chrono::microseconds GridTime;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
||||
{
|
||||
stream << time.count()<<" ms";
|
||||
stream << time.count()<<" s";
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridMillisecs & now)
|
||||
{
|
||||
GridSecs second(1);
|
||||
auto secs = now/second ;
|
||||
auto subseconds = now%second ;
|
||||
auto fill = stream.fill();
|
||||
stream << secs<<"."<<std::setw(3)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||
stream.fill(fill);
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now)
|
||||
{
|
||||
GridSecs second(1);
|
||||
auto seconds = now/second ;
|
||||
auto subseconds = now%second ;
|
||||
auto fill = stream.fill();
|
||||
stream << seconds<<"."<<std::setw(6)<<std::setfill('0')<<subseconds.count()<<" s";
|
||||
stream.fill(fill);
|
||||
return stream;
|
||||
}
|
||||
|
||||
|
||||
class GridStopWatch {
|
||||
private:
|
||||
@ -94,6 +118,9 @@ public:
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
bool isRunning(void){
|
||||
return running;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid)
|
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.6
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -17,6 +17,9 @@
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to enable compact mode
|
||||
// #define PUGIXML_COMPACT
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
@ -46,7 +49,7 @@
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* pugixml parser - version 1.8
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
#ifndef PUGIXML_VERSION
|
||||
// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons
|
||||
# define PUGIXML_VERSION 180
|
||||
# define PUGIXML_VERSION 190
|
||||
#endif
|
||||
|
||||
// Include user configuration file (this can define various configuration macros)
|
||||
@ -81,10 +81,30 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'noexcept' specifiers
|
||||
#ifndef PUGIXML_NOEXCEPT
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1900
|
||||
# define PUGIXML_NOEXCEPT noexcept
|
||||
# else
|
||||
# define PUGIXML_NOEXCEPT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Some functions can not be noexcept in compact mode
|
||||
#ifdef PUGIXML_COMPACT
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT
|
||||
#else
|
||||
# define PUGIXML_NOEXCEPT_IF_NOT_COMPACT PUGIXML_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// If C++ is 2011 or higher, add 'override' qualifiers
|
||||
#ifndef PUGIXML_OVERRIDE
|
||||
# if __cplusplus >= 201103
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# elif defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
# define PUGIXML_OVERRIDE override
|
||||
# else
|
||||
# define PUGIXML_OVERRIDE
|
||||
# endif
|
||||
@ -631,8 +651,8 @@ namespace pugi
|
||||
xpath_node_set select_nodes(const xpath_query& query) const;
|
||||
|
||||
// (deprecated: use select_node instead) Select single node by evaluating XPath query.
|
||||
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
xpath_node select_single_node(const xpath_query& query) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
|
||||
PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const;
|
||||
|
||||
#endif
|
||||
|
||||
@ -983,6 +1003,7 @@ namespace pugi
|
||||
|
||||
void _create();
|
||||
void _destroy();
|
||||
void _move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
|
||||
public:
|
||||
// Default constructor, makes empty document
|
||||
@ -991,6 +1012,12 @@ namespace pugi
|
||||
// Destructor, invalidates all node/attribute handles to this document
|
||||
~xml_document();
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
xml_document& operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT;
|
||||
#endif
|
||||
|
||||
// Removes all nodes, leaving the empty document
|
||||
void reset();
|
||||
|
||||
@ -1004,7 +1031,7 @@ namespace pugi
|
||||
#endif
|
||||
|
||||
// (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
PUGIXML_DEPRECATED xml_parse_result load(const char_t* contents, unsigned int options = parse_default);
|
||||
|
||||
// Load document from zero-terminated string. No encoding conversions are applied.
|
||||
xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default);
|
||||
@ -1131,8 +1158,8 @@ namespace pugi
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_variable_set(xpath_variable_set&& rhs);
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs);
|
||||
xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_variable_set& operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Add a new variable or get the existing one, if the types match
|
||||
@ -1175,8 +1202,8 @@ namespace pugi
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_query(xpath_query&& rhs);
|
||||
xpath_query& operator=(xpath_query&& rhs);
|
||||
xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_query& operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get query expression return type
|
||||
@ -1316,8 +1343,8 @@ namespace pugi
|
||||
|
||||
#ifdef PUGIXML_HAS_MOVE
|
||||
// Move semantics support
|
||||
xpath_node_set(xpath_node_set&& rhs);
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs);
|
||||
xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
xpath_node_set& operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT;
|
||||
#endif
|
||||
|
||||
// Get collection type
|
||||
@ -1351,7 +1378,7 @@ namespace pugi
|
||||
xpath_node* _end;
|
||||
|
||||
void _assign(const_iterator begin, const_iterator end, type_t type);
|
||||
void _move(xpath_node_set& rhs);
|
||||
void _move(xpath_node_set& rhs) PUGIXML_NOEXCEPT;
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -1409,7 +1436,7 @@ namespace std
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2016 Arseny Kapoulkine
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
@ -1,6 +1,6 @@
|
||||
pugixml 1.6 - an XML processing library
|
||||
pugixml 1.9 - an XML processing library
|
||||
|
||||
Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Report bugs and download new versions at http://pugixml.org/
|
||||
|
||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||
@ -28,7 +28,7 @@ The distribution contains the following folders:
|
||||
|
||||
This library is distributed under the MIT License:
|
||||
|
||||
Copyright (c) 2006-2015 Arseny Kapoulkine
|
||||
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user