mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-14 22:07:05 +01:00
Hadrons: moving Hadrons to root directory, build system improvements
This commit is contained in:
37
Grid/DisableWarnings.h
Normal file
37
Grid/DisableWarnings.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/DisableWarnings.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef DISABLE_WARNINGS_H
|
||||
#define DISABLE_WARNINGS_H
|
||||
|
||||
//disables and intel compiler specific warning (in json.hpp)
|
||||
#pragma warning disable 488
|
||||
|
||||
|
||||
#endif
|
49
Grid/Grid.h
Normal file
49
Grid/Grid.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Grid.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
//
|
||||
// Grid.h
|
||||
// simd
|
||||
//
|
||||
// Created by Peter Boyle on 09/05/2014.
|
||||
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef GRID_H
|
||||
#define GRID_H
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/GridQCDcore.h>
|
||||
#include <Grid/qcd/action/Action.h>
|
||||
#include <Grid/qcd/utils/GaugeFix.h>
|
||||
#include <Grid/qcd/smearing/Smearing.h>
|
||||
#include <Grid/parallelIO/MetaData.h>
|
||||
#include <Grid/qcd/hmc/HMC_aggregate.h>
|
||||
|
||||
#endif
|
61
Grid/GridCore.h
Normal file
61
Grid/GridCore.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Grid.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
//
|
||||
// Grid.h
|
||||
// simd
|
||||
//
|
||||
// Created by Peter Boyle on 09/05/2014.
|
||||
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef GRID_BASE_H
|
||||
#define GRID_BASE_H
|
||||
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#include <Grid/perfmon/Timer.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/log/Log.h>
|
||||
#include <Grid/allocator/AlignedAllocator.h>
|
||||
#include <Grid/simd/Simd.h>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include <Grid/threads/Threads.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/util/Sha.h>
|
||||
#include <Grid/communicator/Communicator.h>
|
||||
#include <Grid/cartesian/Cartesian.h>
|
||||
#include <Grid/tensors/Tensors.h>
|
||||
#include <Grid/lattice/Lattice.h>
|
||||
#include <Grid/cshift/Cshift.h>
|
||||
#include <Grid/stencil/Stencil.h>
|
||||
#include <Grid/parallelIO/BinaryIO.h>
|
||||
#include <Grid/algorithms/Algorithms.h>
|
||||
|
||||
#endif
|
42
Grid/GridQCDcore.h
Normal file
42
Grid/GridQCDcore.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Grid.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_CORE_H
|
||||
#define GRID_QCD_CORE_H
|
||||
|
||||
/////////////////////////
|
||||
// Core Grid QCD headers
|
||||
/////////////////////////
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/qcd/QCD.h>
|
||||
#include <Grid/qcd/spin/Spin.h>
|
||||
#include <Grid/qcd/utils/Utils.h>
|
||||
#include <Grid/qcd/representations/Representations.h>
|
||||
|
||||
#endif
|
29
Grid/GridStd.h
Normal file
29
Grid/GridStd.h
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef GRID_STD_H
|
||||
#define GRID_STD_H
|
||||
|
||||
///////////////////
|
||||
// Std C++ dependencies
|
||||
///////////////////
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <ctime>
|
||||
#include <sys/time.h>
|
||||
#include <chrono>
|
||||
#include <zlib.h>
|
||||
|
||||
///////////////////
|
||||
// Grid config
|
||||
///////////////////
|
||||
#include "Config.h"
|
||||
|
||||
#endif /* GRID_STD_H */
|
9
Grid/Grid_Eigen_Dense.h
Normal file
9
Grid/Grid_Eigen_Dense.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
#include <Grid/Eigen/Dense>
|
||||
#if defined __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
37
Grid/Makefile.am
Normal file
37
Grid/Makefile.am
Normal file
@ -0,0 +1,37 @@
|
||||
extra_sources=
|
||||
extra_headers=
|
||||
|
||||
if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_mpi3.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryMPI.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_NONE
|
||||
extra_sources+=communicator/Communicator_none.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
extra_sources+=communicator/SharedMemoryNone.cc
|
||||
extra_sources+=communicator/SharedMemory.cc
|
||||
endif
|
||||
|
||||
if BUILD_HDF5
|
||||
extra_sources+=serialisation/Hdf5IO.cc
|
||||
extra_headers+=serialisation/Hdf5IO.h
|
||||
extra_headers+=serialisation/Hdf5Type.h
|
||||
endif
|
||||
|
||||
#
|
||||
# Libraries
|
||||
#
|
||||
include Make.inc
|
||||
include Eigen.inc
|
||||
|
||||
lib_LIBRARIES = libGrid.a
|
||||
|
||||
CCFILES += $(extra_sources)
|
||||
HFILES += $(extra_headers)
|
||||
|
||||
libGrid_a_SOURCES = $(CCFILES)
|
||||
libGrid_adir = $(includedir)/Grid
|
||||
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) $(eigen_unsupp_files) Config.h
|
61
Grid/algorithms/Algorithms.h
Normal file
61
Grid/algorithms/Algorithms.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Algorithms.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHMS_H
|
||||
#define GRID_ALGORITHMS_H
|
||||
|
||||
#include <Grid/algorithms/SparseMatrix.h>
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
#include <Grid/algorithms/Preconditioner.h>
|
||||
|
||||
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||
#include <Grid/algorithms/approx/Remez.h>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/FFT.h>
|
||||
|
||||
// EigCg
|
||||
// Pcg
|
||||
// Hdcg
|
||||
// GCR
|
||||
// etc..
|
||||
|
||||
#endif
|
480
Grid/algorithms/CoarsenedMatrix.h
Normal file
480
Grid/algorithms/CoarsenedMatrix.h
Normal file
@ -0,0 +1,480 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/CoarsenedMatrix.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class Geometry {
|
||||
// int dimension;
|
||||
public:
|
||||
int npoint;
|
||||
std::vector<int> directions ;
|
||||
std::vector<int> displacements;
|
||||
|
||||
Geometry(int _d) {
|
||||
|
||||
int base = (_d==5) ? 1:0;
|
||||
|
||||
// make coarse grid stencil for 4d , not 5d
|
||||
if ( _d==5 ) _d=4;
|
||||
|
||||
npoint = 2*_d+1;
|
||||
directions.resize(npoint);
|
||||
displacements.resize(npoint);
|
||||
for(int d=0;d<_d;d++){
|
||||
directions[2*d ] = d+base;
|
||||
directions[2*d+1] = d+base;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*_d]=0;
|
||||
displacements[2*_d]=0;
|
||||
|
||||
//// report back
|
||||
std::cout<<GridLogMessage<<"directions :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
||||
std::cout <<std::endl;
|
||||
std::cout<<GridLogMessage<<"displacements :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
|
||||
/*
|
||||
// Original cleaner code
|
||||
Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) {
|
||||
for(int d=0;d<dimension;d++){
|
||||
directions[2*d ] = d;
|
||||
directions[2*d+1] = d;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*dimension]=0;
|
||||
displacements[2*dimension]=0;
|
||||
}
|
||||
std::vector<int> GetDelta(int point) {
|
||||
std::vector<int> delta(dimension,0);
|
||||
delta[directions[point]] = displacements[point];
|
||||
return delta;
|
||||
};
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class Aggregation {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
|
||||
GridBase *CoarseGrid;
|
||||
GridBase *FineGrid;
|
||||
std::vector<Lattice<Fobj> > subspace;
|
||||
int checkerboard;
|
||||
|
||||
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
|
||||
CoarseGrid(_CoarseGrid),
|
||||
FineGrid(_FineGrid),
|
||||
subspace(nbasis,_FineGrid),
|
||||
checkerboard(_checkerboard)
|
||||
{
|
||||
};
|
||||
|
||||
void Orthogonalise(void){
|
||||
CoarseScalar InnerProd(CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
|
||||
// CheckOrthogonal();
|
||||
}
|
||||
void CheckOrthogonal(void){
|
||||
CoarseVector iProj(CoarseGrid);
|
||||
CoarseVector eProj(CoarseGrid);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
blockProject(iProj,subspace[i],subspace);
|
||||
eProj=zero;
|
||||
parallel_for(int ss=0;ss<CoarseGrid->oSites();ss++){
|
||||
eProj._odata[ss](i)=CComplex(1.0);
|
||||
}
|
||||
eProj=eProj - iProj;
|
||||
std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage <<"CheckOrthog done"<<std::endl;
|
||||
}
|
||||
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||
blockProject(CoarseVec,FineVec,subspace);
|
||||
}
|
||||
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||
FineVec.checkerboard = subspace[0].checkerboard;
|
||||
blockPromote(CoarseVec,FineVec,subspace);
|
||||
}
|
||||
void CreateSubspaceRandom(GridParallelRNG &RNG){
|
||||
for(int i=0;i<nbasis;i++){
|
||||
random(RNG,subspace[i]);
|
||||
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
|
||||
}
|
||||
Orthogonalise();
|
||||
}
|
||||
|
||||
/*
|
||||
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
|
||||
{
|
||||
// Run a Lanczos with sloppy convergence
|
||||
const int Nstop = nn;
|
||||
const int Nk = nn+20;
|
||||
const int Np = nn+20;
|
||||
const int Nm = Nk+Np;
|
||||
const int MaxIt= 10000;
|
||||
RealD resid = 1.0e-3;
|
||||
|
||||
Chebyshev<FineField> Cheb(0.5,64.0,21);
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
|
||||
// IRL.lock = 1;
|
||||
|
||||
FineField noise(FineGrid); gaussian(RNG,noise);
|
||||
FineField tmp(FineGrid);
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<FineField> evec(Nm,FineGrid);
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval,evec,
|
||||
noise,
|
||||
Nconv);
|
||||
|
||||
// pull back nn vectors
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
subspace[b] = evec[b];
|
||||
|
||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||
|
||||
hermop.Op(subspace[b],tmp);
|
||||
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
noise = tmp - sqrt(eval[b])*subspace[b] ;
|
||||
|
||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||
|
||||
noise = tmp + eval[b]*subspace[b] ;
|
||||
|
||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||
|
||||
}
|
||||
Orthogonalise();
|
||||
for(int b=0;b<nn;b++){
|
||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||
|
||||
RealD scale;
|
||||
|
||||
ConjugateGradient<FineField> CG(1.0e-2,10000);
|
||||
FineField noise(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
gaussian(RNG,noise);
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||
|
||||
for(int i=0;i<1;i++){
|
||||
|
||||
CG(hermop,noise,subspace[b]);
|
||||
|
||||
noise = subspace[b];
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
}
|
||||
|
||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
|
||||
subspace[b] = noise;
|
||||
|
||||
}
|
||||
|
||||
Orthogonalise();
|
||||
|
||||
}
|
||||
};
|
||||
// Fine Object == (per site) type of fine field
|
||||
// nbasis == number of deflation vectors
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
|
||||
////////////////////
|
||||
// Data members
|
||||
////////////////////
|
||||
Geometry geom;
|
||||
GridBase * _grid;
|
||||
CartesianStencil<siteVector,siteVector> Stencil;
|
||||
|
||||
std::vector<CoarseMatrix> A;
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
|
||||
RealD M (const CoarseVector &in, CoarseVector &out){
|
||||
|
||||
conformable(_grid,in._grid);
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
|
||||
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
siteVector res = zero;
|
||||
siteVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
for(int point=0;point<geom.npoint;point++){
|
||||
|
||||
SE=Stencil.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local&&SE->_permute) {
|
||||
permute(nbr,in._odata[SE->_offset],ptype);
|
||||
} else if(SE->_is_local) {
|
||||
nbr = in._odata[SE->_offset];
|
||||
} else {
|
||||
nbr = Stencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
res = res + A[point]._odata[ss]*nbr;
|
||||
}
|
||||
vstream(out._odata[ss],res);
|
||||
}
|
||||
return norm2(out);
|
||||
};
|
||||
|
||||
RealD Mdag (const CoarseVector &in, CoarseVector &out){
|
||||
return M(in,out);
|
||||
};
|
||||
|
||||
// Defer support for further coarsening for now
|
||||
void Mdiag (const CoarseVector &in, CoarseVector &out){};
|
||||
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){};
|
||||
|
||||
CoarsenedMatrix(GridCartesian &CoarseGrid) :
|
||||
|
||||
_grid(&CoarseGrid),
|
||||
geom(CoarseGrid._ndimension),
|
||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
{
|
||||
};
|
||||
|
||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace){
|
||||
|
||||
FineField iblock(FineGrid); // contributions from within this block
|
||||
FineField oblock(FineGrid); // contributions from outwith this block
|
||||
|
||||
FineField phi(FineGrid);
|
||||
FineField tmp(FineGrid);
|
||||
FineField zz(FineGrid); zz=zero;
|
||||
FineField Mphi(FineGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(FineGrid);
|
||||
|
||||
CoarseVector iProj(Grid());
|
||||
CoarseVector oProj(Grid());
|
||||
CoarseScalar InnerProd(Grid());
|
||||
|
||||
// Orthogonalise the subblocks over the basis
|
||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||
|
||||
// Compute the matrix elements of linop between this orthonormal
|
||||
// set of vectors.
|
||||
int self_stencil=-1;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
A[p]=zero;
|
||||
if( geom.displacements[p]==0){
|
||||
self_stencil=p;
|
||||
}
|
||||
}
|
||||
assert(self_stencil!=-1);
|
||||
|
||||
for(int i=0;i<nbasis;i++){
|
||||
phi=Subspace.subspace[i];
|
||||
|
||||
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
|
||||
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
|
||||
int dir = geom.directions[p];
|
||||
int disp = geom.displacements[p];
|
||||
|
||||
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
|
||||
|
||||
LatticeCoordinate(coor,dir);
|
||||
|
||||
if ( disp==0 ){
|
||||
linop.OpDiag(phi,Mphi);
|
||||
}
|
||||
else {
|
||||
linop.OpDir(phi,Mphi,dir,disp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Pick out contributions coming from this cell and neighbour cell
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
if ( disp==0 ) {
|
||||
iblock = Mphi;
|
||||
oblock = zero;
|
||||
} else if ( disp==1 ) {
|
||||
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
|
||||
} else if ( disp==-1 ) {
|
||||
oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Subspace.ProjectToSubspace(iProj,iblock);
|
||||
Subspace.ProjectToSubspace(oProj,oblock);
|
||||
// blockProject(iProj,iblock,Subspace.subspace);
|
||||
// blockProject(oProj,oblock,Subspace.subspace);
|
||||
parallel_for(int ss=0;ss<Grid()->oSites();ss++){
|
||||
for(int j=0;j<nbasis;j++){
|
||||
if( disp!= 0 ) {
|
||||
A[p]._odata[ss](j,i) = oProj._odata[ss](j);
|
||||
}
|
||||
A[self_stencil]._odata[ss](j,i) = A[self_stencil]._odata[ss](j,i) + iProj._odata[ss](j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
///////////////////////////
|
||||
// test code worth preserving in if block
|
||||
///////////////////////////
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
|
||||
std::cout<<GridLogMessage<< A[p] << std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
|
||||
|
||||
phi=Subspace.subspace[0];
|
||||
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||
|
||||
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||
linop.Op(tmp,Mphi); // Apply big dop
|
||||
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
|
||||
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||
#endif
|
||||
// ForceHermitian();
|
||||
AssertHermitian();
|
||||
// ForceDiagonal();
|
||||
}
|
||||
void ForceDiagonal(void) {
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||
for(int p=0;p<8;p++){
|
||||
A[p]=zero;
|
||||
}
|
||||
|
||||
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
|
||||
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
|
||||
|
||||
Complex one(1.0);
|
||||
|
||||
iMatrix<CComplex,nbasis> ident; ident=one;
|
||||
|
||||
val = val*adj(val);
|
||||
val = val + 1.0;
|
||||
|
||||
A[8] = val*ident;
|
||||
|
||||
// for(int s=0;s<Grid()->oSites();s++) {
|
||||
// A[8]._odata[s]=val._odata[s];
|
||||
// }
|
||||
}
|
||||
void ForceHermitian(void) {
|
||||
for(int d=0;d<4;d++){
|
||||
int dd=d+1;
|
||||
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
||||
}
|
||||
// A[8] = 0.5*(A[8] + adj(A[8]));
|
||||
}
|
||||
void AssertHermitian(void) {
|
||||
CoarseMatrix AA (Grid());
|
||||
CoarseMatrix AAc (Grid());
|
||||
CoarseMatrix Diff (Grid());
|
||||
for(int d=0;d<4;d++){
|
||||
|
||||
int dd=d+1;
|
||||
AAc = Cshift(A[2*d+1],dd,1);
|
||||
AA = A[2*d];
|
||||
|
||||
Diff = AA - adj(AAc);
|
||||
|
||||
std::cout<<GridLogMessage<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
|
||||
|
||||
}
|
||||
Diff = A[8] - adj(A[8]);
|
||||
std::cout<<GridLogMessage<<"Norm diff local "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Norm local "<< norm2(A[8])<<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
306
Grid/algorithms/FFT.h
Normal file
306
Grid/algorithms/FFT.h
Normal file
@ -0,0 +1,306 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cshift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_FFT_H_
|
||||
#define _GRID_FFT_H_
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
#ifdef USE_MKL
|
||||
#include <fftw/fftw3.h>
|
||||
#else
|
||||
#include <fftw3.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class scalar> struct FFTW { };
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
template<> struct FFTW<ComplexD> {
|
||||
public:
|
||||
|
||||
typedef fftw_complex FFTW_scalar;
|
||||
typedef fftw_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftw_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftw_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct FFTW<ComplexF> {
|
||||
public:
|
||||
|
||||
typedef fftwf_complex FFTW_scalar;
|
||||
typedef fftwf_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftwf_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftwf_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftwf_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef FFTW_FORWARD
|
||||
#define FFTW_FORWARD (-1)
|
||||
#define FFTW_BACKWARD (+1)
|
||||
#endif
|
||||
|
||||
class FFT {
|
||||
private:
|
||||
|
||||
GridCartesian *vgrid;
|
||||
GridCartesian *sgrid;
|
||||
|
||||
int Nd;
|
||||
double flops;
|
||||
double flops_call;
|
||||
uint64_t usec;
|
||||
|
||||
std::vector<int> dimensions;
|
||||
std::vector<int> processors;
|
||||
std::vector<int> processor_coor;
|
||||
|
||||
public:
|
||||
|
||||
static const int forward=FFTW_FORWARD;
|
||||
static const int backward=FFTW_BACKWARD;
|
||||
|
||||
double Flops(void) {return flops;}
|
||||
double MFlops(void) {return flops/usec;}
|
||||
double USec(void) {return (double)usec;}
|
||||
|
||||
FFT ( GridCartesian * grid ) :
|
||||
vgrid(grid),
|
||||
Nd(grid->_ndimension),
|
||||
dimensions(grid->_fdimensions),
|
||||
processors(grid->_processors),
|
||||
processor_coor(grid->_processor_coor)
|
||||
{
|
||||
flops=0;
|
||||
usec =0;
|
||||
std::vector<int> layout(Nd,1);
|
||||
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||
};
|
||||
|
||||
~FFT ( void) {
|
||||
delete sgrid;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim_mask(Lattice<vobj> &result,const Lattice<vobj> &source,std::vector<int> mask,int sign){
|
||||
|
||||
conformable(result._grid,vgrid);
|
||||
conformable(source._grid,vgrid);
|
||||
Lattice<vobj> tmp(vgrid);
|
||||
tmp = source;
|
||||
for(int d=0;d<Nd;d++){
|
||||
if( mask[d] ) {
|
||||
FFT_dim(result,tmp,d,sign);
|
||||
tmp=result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_all_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int sign){
|
||||
std::vector<int> mask(Nd,1);
|
||||
FFT_dim_mask(result,source,mask,sign);
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int sign){
|
||||
#ifndef HAVE_FFTW
|
||||
assert(0);
|
||||
#else
|
||||
conformable(result._grid,vgrid);
|
||||
conformable(source._grid,vgrid);
|
||||
|
||||
int L = vgrid->_ldimensions[dim];
|
||||
int G = vgrid->_fdimensions[dim];
|
||||
|
||||
std::vector<int> layout(Nd,1);
|
||||
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||
|
||||
pencil_gd[dim] = G*processors[dim];
|
||||
|
||||
// Pencil global vol LxLxGxLxL per node
|
||||
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||
|
||||
// Construct pencils
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename sobj::scalar_type scalar;
|
||||
|
||||
Lattice<sobj> pgbuf(&pencil_g);
|
||||
|
||||
|
||||
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||
|
||||
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||
int Nlow = 1;
|
||||
for(int d=0;d<dim;d++){
|
||||
Nlow*=vgrid->_ldimensions[d];
|
||||
}
|
||||
|
||||
int rank = 1; /* 1d transforms */
|
||||
int n[] = {G}; /* 1d transforms of length G */
|
||||
int howmany = Ncomp;
|
||||
int odist,idist,istride,ostride;
|
||||
idist = odist = 1; /* Distance between consecutive FT's */
|
||||
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||
int *inembed = n, *onembed = n;
|
||||
|
||||
scalar div;
|
||||
if ( sign == backward ) div = 1.0/G;
|
||||
else if ( sign == forward ) div = 1.0;
|
||||
else assert(0);
|
||||
|
||||
FFTW_plan p;
|
||||
{
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[0];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[0];
|
||||
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||
in,inembed,
|
||||
istride,idist,
|
||||
out,onembed,
|
||||
ostride, odist,
|
||||
sign,FFTW_ESTIMATE);
|
||||
}
|
||||
|
||||
// Barrel shift and collect global pencil
|
||||
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> cbuf(Nd);
|
||||
sobj s;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,result,cbuf);
|
||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||
// cbuf[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,cbuf);
|
||||
}
|
||||
}
|
||||
if (p != processors[dim] - 1)
|
||||
{
|
||||
result = Cshift(result,dim,L);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over orthog coords
|
||||
int NN=pencil_g.lSites();
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> cbuf(Nd);
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<NN;idx++) {
|
||||
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
// performance counting
|
||||
double add,mul,fma;
|
||||
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||
flops_call = add+mul+2.0*fma;
|
||||
usec += timer.useconds();
|
||||
flops+= flops_call*NN;
|
||||
|
||||
// writing out result
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||
sobj s;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||
cgbuf = clbuf;
|
||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||
peekLocalSite(s,pgbuf,cgbuf);
|
||||
pokeLocalSite(s,result,clbuf);
|
||||
}
|
||||
}
|
||||
result = result*div;
|
||||
|
||||
// destroying plan
|
||||
FFTW<scalar>::fftw_destroy_plan(p);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
481
Grid/algorithms/LinearOperator.h
Normal file
481
Grid/algorithms/LinearOperator.h
Normal file
@ -0,0 +1,481 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/LinearOperator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_LINEAR_OP_H
|
||||
#define GRID_ALGORITHM_LINEAR_OP_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LinearOperators Take a something and return a something.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real):
|
||||
//SBase
|
||||
// i) F(a x + b y) = aF(x) + b F(y).
|
||||
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
|
||||
//
|
||||
// Would be fun to have a test linearity & Herm Conj function!
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class LinearOperatorBase {
|
||||
public:
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code
|
||||
// between RB and non-RB variants. Sparse matrix is like the fermion action def, and then
|
||||
// the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising
|
||||
// replication of code.
|
||||
//
|
||||
// I'm not entirely happy with implementation; to share the Schur code between herm and non-herm
|
||||
// while still having a "OpAndNorm" in the abstract base I had to implement it in both cases
|
||||
// with an assert trap in the non-herm. This isn't right; there must be a better C++ way to
|
||||
// do it, but I fear it required multiple inheritance and mixed in abstract base classes
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op from non-herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class MdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op and shift it for mgrid smoother
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD _shift;
|
||||
public:
|
||||
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
assert(0);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
out = out + _shift*in;
|
||||
|
||||
ComplexD dot;
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Wrap an already herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.M(in,out);
|
||||
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
// ways to introduce the even odd checkerboarding
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual RealD Mpc (const Field &in, Field &out) =0;
|
||||
virtual RealD MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp(in._grid);
|
||||
tmp.checkerboard = in.checkerboard;
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
out.checkerboard = in.checkerboard;
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
tmp.checkerboard = !in.checkerboard;
|
||||
//std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
//std::cout << "cb in " << in.checkerboard << " cb out " << out.checkerboard << std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
_Mat.MooeeDag(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
_Mat.Meooe(tmp,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MooeeInvDag(in,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MooeeInv(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in._grid);
|
||||
|
||||
_Mat.MeooeDag(in,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
_Mat.MeooeDag(tmp,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
assert(0);// Never need with staggered
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for functions of operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
};
|
||||
|
||||
template<class Field> class LinearFunction {
|
||||
public:
|
||||
virtual void operator() (const Field &in, Field &out) = 0;
|
||||
};
|
||||
|
||||
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
|
||||
public:
|
||||
void operator() (const Field &in, Field &out){
|
||||
out = in;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for Multishift solvers for operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorMultiFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, std::vector<Field> &out) = 0;
|
||||
};
|
||||
|
||||
// FIXME : To think about
|
||||
|
||||
// Chroma functionality list defining LinearOperator
|
||||
/*
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign) const = 0;
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign, Real epsilon) const
|
||||
virtual const Subset& subset() const = 0;
|
||||
virtual unsigned long nFlops() const { return 0; }
|
||||
virtual void deriv(P& ds_u, const T& chi, const T& psi, enum PlusMinus isign) const
|
||||
class UnprecLinearOperator : public DiffLinearOperator<T,P,Q>
|
||||
const Subset& subset() const {return all;}
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hermitian operator Linear function and operator function
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class HermOpOperatorFunction : public OperatorFunction<Field> {
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
Linop.HermOp(in,out);
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class PlainHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
|
||||
{}
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_Linop.HermOp(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class FunctionHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
OperatorFunction<Field> & _poly;
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
|
||||
: _poly(poly), _Linop(linop) {};
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_poly(_Linop,in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class Polynomial : public OperatorFunction<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
public:
|
||||
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
Field AtoN(in._grid);
|
||||
Field Mtmp(in._grid);
|
||||
AtoN = in;
|
||||
out = AtoN*Coeffs[0];
|
||||
for(int n=1;n<Coeffs.size();n++){
|
||||
Mtmp = AtoN;
|
||||
Linop.HermOp(Mtmp,AtoN);
|
||||
out=out+AtoN*Coeffs[n];
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
46
Grid/algorithms/Preconditioner.h
Normal file
46
Grid/algorithms/Preconditioner.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/Preconditioner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PRECONDITIONER_H
|
||||
#define GRID_PRECONDITIONER_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field> class Preconditioner : public LinearFunction<Field> {
|
||||
virtual void operator()(const Field &src, Field & psi)=0;
|
||||
};
|
||||
|
||||
template<class Field> class TrivialPrecon : public Preconditioner<Field> {
|
||||
public:
|
||||
void operator()(const Field &src, Field & psi){
|
||||
psi = src;
|
||||
}
|
||||
TrivialPrecon(void){};
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
71
Grid/algorithms/SparseMatrix.h
Normal file
71
Grid/algorithms/SparseMatrix.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/SparseMatrix.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface defining what I expect of a general sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SparseMatrixBase {
|
||||
public:
|
||||
virtual GridBase *Grid(void) =0;
|
||||
// Full checkerboar operations
|
||||
virtual RealD M (const Field &in, Field &out)=0;
|
||||
virtual RealD Mdag (const Field &in, Field &out)=0;
|
||||
virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp (in._grid);
|
||||
ni=M(in,tmp);
|
||||
no=Mdag(tmp,out);
|
||||
}
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface augmented by a red black sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
virtual GridBase *RedBlackGrid(void)=0;
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInv (const Field &in, Field &out)=0;
|
||||
|
||||
virtual void MeooeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
377
Grid/algorithms/approx/Chebyshev.h
Normal file
377
Grid/algorithms/approx/Chebyshev.h
Normal file
@ -0,0 +1,377 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Chebyshev.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CHEBYSHEV_H
|
||||
#define GRID_CHEBYSHEV_H
|
||||
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct ChebyParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
|
||||
RealD, alpha,
|
||||
RealD, beta,
|
||||
int, Npoly);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Chebyshev approximations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class Chebyshev : public OperatorFunction<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD hi;
|
||||
RealD lo;
|
||||
|
||||
public:
|
||||
void csv(std::ostream &out){
|
||||
RealD diff = hi-lo;
|
||||
RealD delta = (hi-lo)*1.0e-9;
|
||||
for (RealD x=lo; x<hi; x+=delta) {
|
||||
delta*=1.1;
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Convenience for plotting the approximation
|
||||
void PlotApprox(std::ostream &out) {
|
||||
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||
for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
|
||||
out <<x<<"\t"<<approx(x)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
Chebyshev(){};
|
||||
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// CJ: the one we need for Lanczos
|
||||
void Init(RealD _lo,RealD _hi,int _order)
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
Coeffs.assign(0.,order);
|
||||
Coeffs[order-1] = 1.;
|
||||
};
|
||||
|
||||
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
for(int j=0;j<order;j++){
|
||||
RealD s=0;
|
||||
for(int k=0;k<order;k++){
|
||||
RealD y=std::cos(M_PI*(k+0.5)/order);
|
||||
RealD x=0.5*(y*(hi-lo)+(hi+lo));
|
||||
RealD f=func(x);
|
||||
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
||||
}
|
||||
Coeffs[j] = s * 2.0/order;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void JacksonSmooth(void){
|
||||
RealD M=order;
|
||||
RealD alpha = M_PI/(M+2);
|
||||
RealD lmax = std::cos(alpha);
|
||||
RealD sumUsq =0;
|
||||
std::vector<RealD> U(M);
|
||||
std::vector<RealD> a(M);
|
||||
std::vector<RealD> g(M);
|
||||
for(int n=0;n<=M;n++){
|
||||
U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
|
||||
sumUsq += U[n]*U[n];
|
||||
}
|
||||
sumUsq = std::sqrt(sumUsq);
|
||||
|
||||
for(int i=1;i<=M;i++){
|
||||
a[i] = U[i]/sumUsq;
|
||||
}
|
||||
g[0] = 1.0;
|
||||
for(int m=1;m<=M;m++){
|
||||
g[m] = 0;
|
||||
for(int i=0;i<=M-m;i++){
|
||||
g[m]+= a[i]*a[m+i];
|
||||
}
|
||||
}
|
||||
for(int m=1;m<=M;m++){
|
||||
Coeffs[m]*=g[m];
|
||||
}
|
||||
}
|
||||
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
RealD approxD(RealD x)
|
||||
{
|
||||
RealD Un;
|
||||
RealD Unm;
|
||||
RealD Unp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD U0=1;
|
||||
RealD U1=2*y;
|
||||
|
||||
RealD sum;
|
||||
sum = Coeffs[1]*U0;
|
||||
sum+= Coeffs[2]*U1*2.0;
|
||||
|
||||
Un =U1;
|
||||
Unm=U0;
|
||||
for(int i=2;i<order-1;i++){
|
||||
Unp=2*y*Un-Unm;
|
||||
Unm=Un;
|
||||
Un =Unp;
|
||||
sum+= Un*Coeffs[i+1]*(i+1.0);
|
||||
}
|
||||
return sum/(0.5*(hi-lo));
|
||||
};
|
||||
|
||||
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
|
||||
RealD x = x0;
|
||||
RealD eps;
|
||||
|
||||
int i;
|
||||
for (i=0;i<maxiter;i++) {
|
||||
eps = approx(x) - z;
|
||||
if (fabs(eps / z) < resid)
|
||||
return x;
|
||||
x = x - eps / approxD(x);
|
||||
}
|
||||
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in._grid;
|
||||
|
||||
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
|
||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M + mscale)in
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
T1=y*xscale+in*mscale;
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
Linop.HermOp(*Tn,y);
|
||||
|
||||
y=xscale*y+mscale*(*Tn);
|
||||
|
||||
*Tnp=2.0*y-(*Tnm);
|
||||
|
||||
out=out+Coeffs[n]* (*Tnp);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Field>
|
||||
class ChebyshevLanczos : public Chebyshev<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD alpha;
|
||||
RealD beta;
|
||||
RealD mu;
|
||||
|
||||
public:
|
||||
ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
|
||||
alpha(_alpha),
|
||||
beta(_beta),
|
||||
mu(_mu)
|
||||
{
|
||||
order=_order;
|
||||
Coeffs.resize(order);
|
||||
for(int i=0;i<_order;i++){
|
||||
Coeffs[i] = 0.0;
|
||||
}
|
||||
Coeffs[order-1]=1.0;
|
||||
};
|
||||
|
||||
void csv(std::ostream &out){
|
||||
for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
RealD approx(RealD xx) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
Real aa = alpha * alpha;
|
||||
Real bb = beta * beta;
|
||||
|
||||
RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
|
||||
|
||||
RealD y= x;
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
// shift_Multiply in Rudy's code
|
||||
void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out)
|
||||
{
|
||||
GridBase *grid=in._grid;
|
||||
Field tmp(grid);
|
||||
|
||||
RealD aa= alpha*alpha;
|
||||
RealD bb= beta * beta;
|
||||
|
||||
Linop.HermOp(in,out);
|
||||
out = out - mu*in;
|
||||
|
||||
Linop.HermOp(out,tmp);
|
||||
tmp = tmp - mu * out;
|
||||
|
||||
out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in;
|
||||
};
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in._grid;
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M )*in
|
||||
AminusMuSq(Linop,T0,T1);
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
AminusMuSq(Linop,*Tn,y);
|
||||
|
||||
*Tnp=2.0*y-(*Tnm);
|
||||
|
||||
out=out+Coeffs[n]* (*Tnp);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
152
Grid/algorithms/approx/Forecast.h
Normal file
152
Grid/algorithms/approx/Forecast.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Forecast.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_FORECAST_H
|
||||
#define INCLUDED_FORECAST_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Abstract base class.
|
||||
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
|
||||
// and returns a forecasted solution to the system D*psi = phi (psi).
|
||||
template<class Matrix, class Field>
|
||||
class Forecast
|
||||
{
|
||||
public:
|
||||
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
|
||||
};
|
||||
|
||||
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
|
||||
// used to forecast solutions across poles of the EOFA heatbath.
|
||||
//
|
||||
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
|
||||
template<class Matrix, class Field>
|
||||
class ChronoForecast : public Forecast<Matrix,Field>
|
||||
{
|
||||
public:
|
||||
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
|
||||
{
|
||||
int degree = prev_solns.size();
|
||||
Field chi(phi); // forecasted solution
|
||||
|
||||
// Trivial cases
|
||||
if(degree == 0){ chi = zero; return chi; }
|
||||
else if(degree == 1){ return prev_solns[0]; }
|
||||
|
||||
RealD dot;
|
||||
ComplexD xp;
|
||||
Field r(phi); // residual
|
||||
Field Mv(phi);
|
||||
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
|
||||
std::vector<Field> MdagMv(degree,phi);
|
||||
|
||||
// Array to hold the matrix elements
|
||||
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
|
||||
|
||||
// Solution and source vectors
|
||||
std::vector<ComplexD> a(degree);
|
||||
std::vector<ComplexD> b(degree);
|
||||
|
||||
// Orthonormalize the vector basis
|
||||
for(int i=0; i<degree; i++){
|
||||
v[i] *= 1.0/std::sqrt(norm2(v[i]));
|
||||
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
|
||||
}
|
||||
|
||||
// Perform sparse matrix multiplication and construct rhs
|
||||
for(int i=0; i<degree; i++){
|
||||
b[i] = innerProduct(v[i],phi);
|
||||
Mat.M(v[i],Mv);
|
||||
Mat.Mdag(Mv,MdagMv[i]);
|
||||
G[i][i] = innerProduct(v[i],MdagMv[i]);
|
||||
}
|
||||
|
||||
// Construct the matrix
|
||||
for(int j=0; j<degree; j++){
|
||||
for(int k=j+1; k<degree; k++){
|
||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||
G[k][j] = std::conj(G[j][k]);
|
||||
}}
|
||||
|
||||
// Gauss-Jordan elimination with partial pivoting
|
||||
for(int i=0; i<degree; i++){
|
||||
|
||||
// Perform partial pivoting
|
||||
int k = i;
|
||||
for(int j=i+1; j<degree; j++){ if(std::abs(G[j][j]) > std::abs(G[k][k])){ k = j; } }
|
||||
if(k != i){
|
||||
xp = b[k];
|
||||
b[k] = b[i];
|
||||
b[i] = xp;
|
||||
for(int j=0; j<degree; j++){
|
||||
xp = G[k][j];
|
||||
G[k][j] = G[i][j];
|
||||
G[i][j] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert matrix to upper triangular form
|
||||
for(int j=i+1; j<degree; j++){
|
||||
xp = G[j][i]/G[i][i];
|
||||
b[j] -= xp * b[i];
|
||||
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
|
||||
}
|
||||
}
|
||||
|
||||
// Use Gaussian elimination to solve equations and calculate initial guess
|
||||
chi = zero;
|
||||
r = phi;
|
||||
for(int i=degree-1; i>=0; i--){
|
||||
a[i] = 0.0;
|
||||
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
|
||||
a[i] = (b[i]-a[i])/G[i][i];
|
||||
chi += a[i]*v[i];
|
||||
r -= a[i]*MdagMv[i];
|
||||
}
|
||||
|
||||
RealD true_r(0.0);
|
||||
ComplexD tmp;
|
||||
for(int i=0; i<degree; i++){
|
||||
tmp = -b[i];
|
||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||
tmp = std::conj(tmp)*tmp;
|
||||
true_r += std::sqrt(tmp.real());
|
||||
}
|
||||
|
||||
RealD error = std::sqrt(norm2(r)/norm2(phi));
|
||||
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
|
||||
|
||||
return chi;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
21
Grid/algorithms/approx/LICENSE
Normal file
21
Grid/algorithms/approx/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
|
||||
Copyright (c) 2011 Michael Clark
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
56
Grid/algorithms/approx/MultiShiftFunction.cc
Normal file
56
Grid/algorithms/approx/MultiShiftFunction.cc
Normal file
@ -0,0 +1,56 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
double MultiShiftFunction::approx(double x)
|
||||
{
|
||||
double a = norm;
|
||||
for(int n=0;n<poles.size();n++){
|
||||
a = a + residues[n]/(x+poles[n]);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
void MultiShiftFunction::gnuplot(std::ostream &out)
|
||||
{
|
||||
out<<"f(x) = "<<norm<<"";
|
||||
for(int n=0;n<poles.size();n++){
|
||||
out<<"+("<<residues[n]<<"/(x+"<<poles[n]<<"))";
|
||||
}
|
||||
out<<";"<<std::endl;
|
||||
}
|
||||
void MultiShiftFunction::csv(std::ostream &out)
|
||||
{
|
||||
for (double x=lo; x<hi; x*=1.05) {
|
||||
double f = approx(x);
|
||||
double r = sqrt(x);
|
||||
out<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
67
Grid/algorithms/approx/MultiShiftFunction.h
Normal file
67
Grid/algorithms/approx/MultiShiftFunction.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/MultiShiftFunction.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef MULTI_SHIFT_FUNCTION
|
||||
#define MULTI_SHIFT_FUNCTION
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class MultiShiftFunction {
|
||||
public:
|
||||
int order;
|
||||
std::vector<RealD> poles;
|
||||
std::vector<RealD> residues;
|
||||
std::vector<RealD> tolerances;
|
||||
RealD norm;
|
||||
RealD lo,hi;
|
||||
|
||||
MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
|
||||
RealD approx(RealD x);
|
||||
void csv(std::ostream &out);
|
||||
void gnuplot(std::ostream &out);
|
||||
|
||||
void Init(AlgRemez & remez,double tol,bool inverse)
|
||||
{
|
||||
order=remez.getDegree();
|
||||
tolerances.resize(remez.getDegree(),tol);
|
||||
poles.resize(remez.getDegree());
|
||||
residues.resize(remez.getDegree());
|
||||
remez.getBounds(lo,hi);
|
||||
if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
|
||||
else remez.getPFE (&residues[0],&poles[0],&norm);
|
||||
}
|
||||
// Allow deferred initialisation
|
||||
MultiShiftFunction(void){};
|
||||
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
|
||||
{
|
||||
Init(remez,tol,inverse);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
80
Grid/algorithms/approx/README
Normal file
80
Grid/algorithms/approx/README
Normal file
@ -0,0 +1,80 @@
|
||||
-----------------------------------------------------------------------------------
|
||||
|
||||
PAB. Took Mike Clark's AlgRemez from GitHub and (modified a little) include.
|
||||
This is open source and license and readme and comments are preserved consistent
|
||||
with the license. Mike, thankyou!
|
||||
-----------------------------------------------------------------------------------
|
||||
-----------------------------------------------------------------------------------
|
||||
AlgRemez
|
||||
|
||||
The archive downloadable here contains an implementation of the Remez
|
||||
algorithm which calculates optimal rational (and polynomial)
|
||||
approximations to the nth root over a given spectral range. The Remez
|
||||
algorithm, although in principle is extremely straightforward to
|
||||
program, is quite difficult to get completely correct, e.g., the Maple
|
||||
implementation of the algorithm does not always converge to the
|
||||
correct answer.
|
||||
|
||||
To use this algorithm you need to install GMP, the GNU Multiple
|
||||
Precision Library, and when configuring the install, you must include
|
||||
the --enable-mpfr option (see the GMP manual for more details). You
|
||||
also have to edit the Makefile for AlgRemez appropriately for your
|
||||
system, namely to point corrrectly to the location of the GMP library.
|
||||
|
||||
The simple main program included with this archive invokes the
|
||||
AlgRemez class to calculate an approximation given by command line
|
||||
arguments. It is invoked by the following
|
||||
|
||||
./test y z n d lambda_low lambda_high precision,
|
||||
|
||||
where the function to be approximated is f(x) = x^(y/z), with degree
|
||||
(n,d) over the spectral range [lambda_low, lambda_high], using
|
||||
precision digits of precision in the arithmetic. So an example would
|
||||
be
|
||||
|
||||
./test 1 2 5 5 0.0004 64 40
|
||||
|
||||
which corresponds to constructing a rational approximation to the
|
||||
square root function, with degree (5,5) over the range [0.0004,64]
|
||||
with 40 digits of precision used for the arithmetic. The parameters y
|
||||
and z must be positive, the approximation to f(x) = x^(-y/z) is simply
|
||||
the inverse of the approximation to f(x) = x^(y/z). After the
|
||||
approximation has been constructed, the roots and poles of the
|
||||
rational function are found, and then the partial fraction expansion
|
||||
of both the rational function and it's inverse are found, the results
|
||||
of which are output to a file called "approx.dat". In addition, the
|
||||
error function of the approximation is output to "error.dat", where it
|
||||
can be checked that the resultant approximation satisfies Chebychev's
|
||||
criterion, namely all error maxima are equal in magnitude, and
|
||||
adjacent maxima are oppostie in sign. There are some caveats here
|
||||
however, the optimal polynomial approximation has complex roots, and
|
||||
the root finding implemented here cannot (yet) handle complex roots.
|
||||
In addition, the partial fraction expansion of rational approximations
|
||||
is only found for the case n = d, i.e., the degree of numerator
|
||||
polynomial equals that of the denominator polynomial. The convention
|
||||
for the partial fraction expansion is that polar shifts are always
|
||||
written added to x, not subtracted.
|
||||
|
||||
To do list
|
||||
|
||||
1. Include an exponential dampening factor in the function to be
|
||||
approximated. This may sound trivial to implement, but for some
|
||||
parameters, the algorithm seems to breakdown. Also, the roots in the
|
||||
rational approximation sometimes become complex, which currently
|
||||
breaks the stupidly simple root finding code.
|
||||
|
||||
2. Make the algorithm faster - it's too slow when running on qcdoc.
|
||||
|
||||
3. Add complex root finding.
|
||||
|
||||
4. Add more options for error minimisation - currently the code
|
||||
minimises the relative error, should add options for absolute error,
|
||||
and other norms.
|
||||
|
||||
There will be a forthcoming publication concerning the results
|
||||
generated by this software, but in the meantime, if you use this
|
||||
software, please cite it as
|
||||
"M.A. Clark and A.D. Kennedy, https://github.com/mikeaclark/AlgRemez, 2005".
|
||||
|
||||
If you have any problems using the software, then please email scientist.mike@gmail.com.
|
||||
|
760
Grid/algorithms/approx/Remez.cc
Normal file
760
Grid/algorithms/approx/Remez.cc
Normal file
@ -0,0 +1,760 @@
|
||||
/*
|
||||
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
alg_remez.C
|
||||
|
||||
AlgRemez is an implementation of the Remez algorithm, which in this
|
||||
case is used for generating the optimal nth root rational
|
||||
approximation.
|
||||
|
||||
Note this class requires the gnu multiprecision (GNU MP) library.
|
||||
|
||||
*/
|
||||
|
||||
#include<math.h>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string>
|
||||
#include<iostream>
|
||||
#include<iomanip>
|
||||
#include<cassert>
|
||||
|
||||
#include<Grid/algorithms/approx/Remez.h>
|
||||
|
||||
// Constructor
|
||||
AlgRemez::AlgRemez(double lower, double upper, long precision)
|
||||
{
|
||||
prec = precision;
|
||||
bigfloat::setDefaultPrecision(prec);
|
||||
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
|
||||
std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
|
||||
std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
|
||||
|
||||
alloc = 0;
|
||||
n = 0;
|
||||
d = 0;
|
||||
|
||||
foundRoots = 0;
|
||||
|
||||
// Only require the approximation spread to be less than 1 ulp
|
||||
tolerance = 1e-15;
|
||||
}
|
||||
|
||||
// Destructor
|
||||
AlgRemez::~AlgRemez()
|
||||
{
|
||||
if (alloc) {
|
||||
delete [] param;
|
||||
delete [] roots;
|
||||
delete [] poles;
|
||||
delete [] xx;
|
||||
delete [] mm;
|
||||
delete [] a_power;
|
||||
delete [] a;
|
||||
}
|
||||
}
|
||||
|
||||
// Free memory and reallocate as necessary
|
||||
void AlgRemez::allocate(int num_degree, int den_degree)
|
||||
{
|
||||
// Arrays have previously been allocated, deallocate first, then allocate
|
||||
if (alloc) {
|
||||
delete [] param;
|
||||
delete [] roots;
|
||||
delete [] poles;
|
||||
delete [] xx;
|
||||
delete [] mm;
|
||||
}
|
||||
|
||||
// Note use of new and delete in memory allocation - cannot run on qcdsp
|
||||
param = new bigfloat[num_degree+den_degree+1];
|
||||
roots = new bigfloat[num_degree];
|
||||
poles = new bigfloat[den_degree];
|
||||
xx = new bigfloat[num_degree+den_degree+3];
|
||||
mm = new bigfloat[num_degree+den_degree+2];
|
||||
|
||||
if (!alloc) {
|
||||
// The coefficients of the sum in the exponential
|
||||
a = new bigfloat[SUM_MAX];
|
||||
a_power = new int[SUM_MAX];
|
||||
}
|
||||
|
||||
alloc = 1;
|
||||
}
|
||||
|
||||
// Reset the bounds of the approximation
|
||||
void AlgRemez::setBounds(double lower, double upper)
|
||||
{
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double AlgRemez::generateApprox(int degree, unsigned long pnum,
|
||||
unsigned long pden)
|
||||
{
|
||||
return generateApprox(degree, degree, pnum, pden);
|
||||
}
|
||||
|
||||
double AlgRemez::generateApprox(int num_degree, int den_degree,
|
||||
unsigned long pnum, unsigned long pden)
|
||||
{
|
||||
double *a_param = 0;
|
||||
int *a_pow = 0;
|
||||
return generateApprox(num_degree, den_degree, pnum, pden, 0, a_param, a_pow);
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double AlgRemez::generateApprox(int num_degree, int den_degree,
|
||||
unsigned long pnum, unsigned long pden,
|
||||
int a_len, double *a_param, int *a_pow)
|
||||
{
|
||||
std::cout<<"Degree of the approximation is ("<<num_degree<<","<<den_degree<<")\n";
|
||||
std::cout<<"Approximating the function x^("<<pnum<<"/"<<pden<<")\n";
|
||||
|
||||
// Reallocate arrays, since degree has changed
|
||||
if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
|
||||
|
||||
assert(a_len<=SUM_MAX);
|
||||
|
||||
step = new bigfloat[num_degree+den_degree+2];
|
||||
|
||||
a_length = a_len;
|
||||
for (int j=0; j<a_len; j++) {
|
||||
a[j]= a_param[j];
|
||||
a_power[j] = a_pow[j];
|
||||
}
|
||||
|
||||
power_num = pnum;
|
||||
power_den = pden;
|
||||
spread = 1.0e37;
|
||||
iter = 0;
|
||||
|
||||
n = num_degree;
|
||||
d = den_degree;
|
||||
neq = n + d + 1;
|
||||
|
||||
initialGuess();
|
||||
stpini(step);
|
||||
|
||||
while (spread > tolerance) { //iterate until convergance
|
||||
|
||||
if (iter++%100==0)
|
||||
std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta<<std::endl;
|
||||
|
||||
equations();
|
||||
if (delta < tolerance) {
|
||||
std::cout<<"Delta too small, try increasing precision\n";
|
||||
assert(0);
|
||||
};
|
||||
assert( delta>= tolerance);
|
||||
|
||||
search(step);
|
||||
}
|
||||
|
||||
int sign;
|
||||
double error = (double)getErr(mm[0],&sign);
|
||||
std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
|
||||
|
||||
// Once the approximation has been generated, calculate the roots
|
||||
if(!root()) {
|
||||
std::cout<<"Root finding failed\n";
|
||||
} else {
|
||||
foundRoots = 1;
|
||||
}
|
||||
|
||||
delete [] step;
|
||||
|
||||
// Return the maximum error in the approximation
|
||||
return error;
|
||||
}
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(pnum/pden)
|
||||
int AlgRemez::getPFE(double *Res, double *Pole, double *Norm) {
|
||||
|
||||
if (n!=d) {
|
||||
std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!alloc) {
|
||||
std::cout<<"Approximation not yet generated\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!foundRoots) {
|
||||
std::cout<<"Roots not found, so PFE cannot be taken\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
bigfloat *r = new bigfloat[n];
|
||||
bigfloat *p = new bigfloat[d];
|
||||
|
||||
for (int i=0; i<n; i++) r[i] = roots[i];
|
||||
for (int i=0; i<d; i++) p[i] = poles[i];
|
||||
|
||||
// Perform a partial fraction expansion
|
||||
pfe(r, p, norm);
|
||||
|
||||
// Convert to double and return
|
||||
*Norm = (double)norm;
|
||||
for (int i=0; i<n; i++) Res[i] = (double)r[i];
|
||||
for (int i=0; i<d; i++) Pole[i] = (double)p[i];
|
||||
|
||||
delete [] r;
|
||||
delete [] p;
|
||||
|
||||
// Where the smallest shift is located
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(-pnum/pden)
|
||||
int AlgRemez::getIPFE(double *Res, double *Pole, double *Norm) {
|
||||
|
||||
if (n!=d) {
|
||||
std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!alloc) {
|
||||
std::cout<<"Approximation not yet generated\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!foundRoots) {
|
||||
std::cout<<"Roots not found, so PFE cannot be taken\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
bigfloat *r = new bigfloat[d];
|
||||
bigfloat *p = new bigfloat[n];
|
||||
|
||||
// Want the inverse function
|
||||
for (int i=0; i<n; i++) {
|
||||
r[i] = poles[i];
|
||||
p[i] = roots[i];
|
||||
}
|
||||
|
||||
// Perform a partial fraction expansion
|
||||
pfe(r, p, (bigfloat)1l/norm);
|
||||
|
||||
// Convert to double and return
|
||||
*Norm = (double)((bigfloat)1l/(norm));
|
||||
for (int i=0; i<n; i++) {
|
||||
Res[i] = (double)r[i];
|
||||
Pole[i] = (double)p[i];
|
||||
}
|
||||
|
||||
delete [] r;
|
||||
delete [] p;
|
||||
|
||||
// Where the smallest shift is located
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initial values of maximal and minimal errors
|
||||
void AlgRemez::initialGuess() {
|
||||
|
||||
// Supply initial guesses for solution points
|
||||
long ncheb = neq; // Degree of Chebyshev error estimate
|
||||
bigfloat a, r;
|
||||
|
||||
// Find ncheb+1 extrema of Chebyshev polynomial
|
||||
|
||||
a = ncheb;
|
||||
mm[0] = apstrt;
|
||||
for (long i = 1; i < ncheb; i++) {
|
||||
r = 0.5 * (1 - cos((M_PI * i)/(double) a));
|
||||
//r *= sqrt_bf(r);
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
mm[i] = apstrt + r * apwidt;
|
||||
}
|
||||
mm[ncheb] = apend;
|
||||
|
||||
a = 2.0 * ncheb;
|
||||
for (long i = 0; i <= ncheb; i++) {
|
||||
r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
|
||||
//r *= sqrt_bf(r); // Squeeze to low end of interval
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
xx[i] = apstrt + r * apwidt;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise step sizes
|
||||
void AlgRemez::stpini(bigfloat *step) {
|
||||
xx[neq+1] = apend;
|
||||
delta = 0.25;
|
||||
step[0] = xx[0] - apstrt;
|
||||
for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
|
||||
step[neq] = step[neq-1];
|
||||
}
|
||||
|
||||
// Search for error maxima and minima
|
||||
void AlgRemez::search(bigfloat *step) {
|
||||
bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
|
||||
int i, j, meq, emsign, ensign, steps;
|
||||
|
||||
meq = neq + 1;
|
||||
bigfloat *yy = new bigfloat[meq];
|
||||
|
||||
bigfloat eclose = 1.0e30;
|
||||
bigfloat farther = 0l;
|
||||
|
||||
j = 1;
|
||||
xx0 = apstrt;
|
||||
|
||||
for (i = 0; i < meq; i++) {
|
||||
steps = 0;
|
||||
xx1 = xx[i]; // Next zero
|
||||
if (i == meq-1) xx1 = apend;
|
||||
xm = mm[i];
|
||||
ym = getErr(xm,&emsign);
|
||||
q = step[i];
|
||||
xn = xm + q;
|
||||
if (xn < xx0 || xn >= xx1) { // Cannot skip over adjacent boundaries
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
} else {
|
||||
yn = getErr(xn,&ensign);
|
||||
if (yn < ym) {
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
}
|
||||
}
|
||||
|
||||
while(yn >= ym) { // March until error becomes smaller.
|
||||
if (++steps > 10) break;
|
||||
ym = yn;
|
||||
xm = xn;
|
||||
emsign = ensign;
|
||||
a = xm + q;
|
||||
if (a == xm || a <= xx0 || a >= xx1) break;// Must not skip over the zeros either side.
|
||||
xn = a;
|
||||
yn = getErr(xn,&ensign);
|
||||
}
|
||||
|
||||
mm[i] = xm; // Position of maximum
|
||||
yy[i] = ym; // Value of maximum
|
||||
|
||||
if (eclose > ym) eclose = ym;
|
||||
if (farther < ym) farther = ym;
|
||||
|
||||
xx0 = xx1; // Walk to next zero.
|
||||
} // end of search loop
|
||||
|
||||
q = (farther - eclose); // Decrease step size if error spread increased
|
||||
if (eclose != 0.0) q /= eclose; // Relative error spread
|
||||
if (q >= spread) delta *= 0.5; // Spread is increasing; decrease step size
|
||||
spread = q;
|
||||
|
||||
for (i = 0; i < neq; i++) {
|
||||
q = yy[i+1];
|
||||
if (q != 0.0) q = yy[i] / q - (bigfloat)1l;
|
||||
else q = 0.0625;
|
||||
if (q > (bigfloat)0.25) q = 0.25;
|
||||
q *= mm[i+1] - mm[i];
|
||||
step[i] = q * delta;
|
||||
}
|
||||
step[neq] = step[neq-1];
|
||||
|
||||
for (i = 0; i < neq; i++) { // Insert new locations for the zeros.
|
||||
xm = xx[i] - step[i];
|
||||
if (xm <= apstrt) continue;
|
||||
if (xm >= apend) continue;
|
||||
if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
|
||||
if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
|
||||
xx[i] = xm;
|
||||
}
|
||||
|
||||
delete [] yy;
|
||||
}
|
||||
|
||||
// Solve the equations
|
||||
void AlgRemez::equations(void) {
|
||||
bigfloat x, y, z;
|
||||
int i, j, ip;
|
||||
bigfloat *aa;
|
||||
|
||||
bigfloat *AA = new bigfloat[(neq)*(neq)];
|
||||
bigfloat *BB = new bigfloat[neq];
|
||||
|
||||
for (i = 0; i < neq; i++) { // set up the equations for solution by simq()
|
||||
ip = neq * i; // offset to 1st element of this row of matrix
|
||||
x = xx[i]; // the guess for this row
|
||||
y = func(x); // right-hand-side vector
|
||||
|
||||
z = (bigfloat)1l;
|
||||
aa = AA+ip;
|
||||
for (j = 0; j <= n; j++) {
|
||||
*aa++ = z;
|
||||
z *= x;
|
||||
}
|
||||
|
||||
z = (bigfloat)1l;
|
||||
for (j = 0; j < d; j++) {
|
||||
*aa++ = -y * z;
|
||||
z *= x;
|
||||
}
|
||||
BB[i] = y * z; // Right hand side vector
|
||||
}
|
||||
|
||||
// Solve the simultaneous linear equations.
|
||||
if (simq(AA, BB, param, neq)) {
|
||||
std::cout<<"simq failed\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
delete [] AA;
|
||||
delete [] BB;
|
||||
|
||||
}
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients
|
||||
// from the solution vector param
|
||||
bigfloat AlgRemez::approx(const bigfloat x) {
|
||||
bigfloat yn, yd;
|
||||
int i;
|
||||
|
||||
// Work backwards toward the constant term.
|
||||
yn = param[n]; // Highest order numerator coefficient
|
||||
for (i = n-1; i >= 0; i--) yn = x * yn + param[i];
|
||||
yd = x + param[n+d]; // Highest degree coefficient = 1.0
|
||||
for (i = n+d-1; i > n; i--) yd = x * yd + param[i];
|
||||
|
||||
return(yn/yd);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat AlgRemez::getErr(bigfloat x, int *sign) {
|
||||
bigfloat e, f;
|
||||
|
||||
f = func(x);
|
||||
e = approx(x) - f;
|
||||
if (f != 0) e /= f;
|
||||
if (e < (bigfloat)0.0) {
|
||||
*sign = -1;
|
||||
e = -e;
|
||||
}
|
||||
else *sign = 1;
|
||||
|
||||
return(e);
|
||||
}
|
||||
|
||||
// Calculate function required for the approximation.
|
||||
bigfloat AlgRemez::func(const bigfloat x) {
|
||||
|
||||
bigfloat z = (bigfloat)power_num / (bigfloat)power_den;
|
||||
bigfloat y;
|
||||
|
||||
if (x == (bigfloat)1.0) y = (bigfloat)1.0;
|
||||
else y = pow_bf(x,z);
|
||||
|
||||
if (a_length > 0) {
|
||||
bigfloat sum = 0l;
|
||||
for (int j=0; j<a_length; j++) sum += a[j]*pow_bf(x,a_power[j]);
|
||||
return y * exp_bf(sum);
|
||||
} else {
|
||||
return y;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Solve the system AX=B
|
||||
int AlgRemez::simq(bigfloat A[], bigfloat B[], bigfloat X[], int n) {
|
||||
|
||||
int i, j, ij, ip, ipj, ipk, ipn;
|
||||
int idxpiv, iback;
|
||||
int k, kp, kp1, kpk, kpn;
|
||||
int nip, nkp, nm1;
|
||||
bigfloat em, q, rownrm, big, size, pivot, sum;
|
||||
bigfloat *aa;
|
||||
|
||||
// simq() work vector
|
||||
int *IPS = new int[(neq) * sizeof(int)];
|
||||
|
||||
nm1 = n - 1;
|
||||
// Initialize IPS and X
|
||||
|
||||
ij = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
IPS[i] = i;
|
||||
rownrm = 0.0;
|
||||
for(j = 0; j < n; j++) {
|
||||
q = abs_bf(A[ij]);
|
||||
if(rownrm < q) rownrm = q;
|
||||
++ij;
|
||||
}
|
||||
if (rownrm == (bigfloat)0l) {
|
||||
std::cout<<"simq rownrm=0\n";
|
||||
delete [] IPS;
|
||||
return(1);
|
||||
}
|
||||
X[i] = (bigfloat)1.0 / rownrm;
|
||||
}
|
||||
|
||||
for (k = 0; k < nm1; k++) {
|
||||
big = 0.0;
|
||||
idxpiv = 0;
|
||||
|
||||
for (i = k; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
size = abs_bf(A[ipk]) * X[ip];
|
||||
if (size > big) {
|
||||
big = size;
|
||||
idxpiv = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (big == (bigfloat)0l) {
|
||||
std::cout<<"simq big=0\n";
|
||||
delete [] IPS;
|
||||
return(2);
|
||||
}
|
||||
if (idxpiv != k) {
|
||||
j = IPS[k];
|
||||
IPS[k] = IPS[idxpiv];
|
||||
IPS[idxpiv] = j;
|
||||
}
|
||||
kp = IPS[k];
|
||||
kpk = n*kp + k;
|
||||
pivot = A[kpk];
|
||||
kp1 = k+1;
|
||||
for (i = kp1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
em = -A[ipk] / pivot;
|
||||
A[ipk] = -em;
|
||||
nip = n*ip;
|
||||
nkp = n*kp;
|
||||
aa = A+nkp+kp1;
|
||||
for (j = kp1; j < n; j++) {
|
||||
ipj = nip + j;
|
||||
A[ipj] = A[ipj] + em * *aa++;
|
||||
}
|
||||
}
|
||||
}
|
||||
kpn = n * IPS[n-1] + n - 1; // last element of IPS[n] th row
|
||||
if (A[kpn] == (bigfloat)0l) {
|
||||
std::cout<<"simq A[kpn]=0\n";
|
||||
delete [] IPS;
|
||||
return(3);
|
||||
}
|
||||
|
||||
|
||||
ip = IPS[0];
|
||||
X[0] = B[ip];
|
||||
for (i = 1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipj = n * ip;
|
||||
sum = 0.0;
|
||||
for (j = 0; j < i; j++) {
|
||||
sum += A[ipj] * X[j];
|
||||
++ipj;
|
||||
}
|
||||
X[i] = B[ip] - sum;
|
||||
}
|
||||
|
||||
ipn = n * IPS[n-1] + n - 1;
|
||||
X[n-1] = X[n-1] / A[ipn];
|
||||
|
||||
for (iback = 1; iback < n; iback++) {
|
||||
//i goes (n-1),...,1
|
||||
i = nm1 - iback;
|
||||
ip = IPS[i];
|
||||
nip = n*ip;
|
||||
sum = 0.0;
|
||||
aa = A+nip+i+1;
|
||||
for (j= i + 1; j < n; j++)
|
||||
sum += *aa++ * X[j];
|
||||
X[i] = (X[i] - sum) / A[nip+i];
|
||||
}
|
||||
|
||||
delete [] IPS;
|
||||
return(0);
|
||||
}
|
||||
|
||||
// Calculate the roots of the approximation
|
||||
int AlgRemez::root() {
|
||||
|
||||
long i,j;
|
||||
bigfloat x,dx=0.05;
|
||||
bigfloat upper=1, lower=-100000;
|
||||
bigfloat tol = 1e-20;
|
||||
|
||||
bigfloat *poly = new bigfloat[neq+1];
|
||||
|
||||
// First find the numerator roots
|
||||
for (i=0; i<=n; i++) poly[i] = param[i];
|
||||
|
||||
for (i=n-1; i>=0; i--) {
|
||||
roots[i] = rtnewt(poly,i+1,lower,upper,tol);
|
||||
if (roots[i] == 0.0) {
|
||||
std::cout<<"Failure to converge on root "<<i+1<<"/"<<n<<"\n";
|
||||
return 0;
|
||||
}
|
||||
poly[0] = -poly[0]/roots[i];
|
||||
for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
|
||||
}
|
||||
|
||||
// Now find the denominator roots
|
||||
poly[d] = 1l;
|
||||
for (i=0; i<d; i++) poly[i] = param[n+1+i];
|
||||
|
||||
for (i=d-1; i>=0; i--) {
|
||||
poles[i]=rtnewt(poly,i+1,lower,upper,tol);
|
||||
if (poles[i] == 0.0) {
|
||||
std::cout<<"Failure to converge on pole "<<i+1<<"/"<<d<<"\n";
|
||||
return 0;
|
||||
}
|
||||
poly[0] = -poly[0]/poles[i];
|
||||
for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
|
||||
}
|
||||
|
||||
norm = param[n];
|
||||
|
||||
delete [] poly;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Evaluate the polynomial
|
||||
bigfloat AlgRemez::polyEval(bigfloat x, bigfloat *poly, long size) {
|
||||
bigfloat f = poly[size];
|
||||
for (int i=size-1; i>=0; i--) f = f*x + poly[i];
|
||||
return f;
|
||||
}
|
||||
|
||||
// Evaluate the differential of the polynomial
|
||||
bigfloat AlgRemez::polyDiff(bigfloat x, bigfloat *poly, long size) {
|
||||
bigfloat df = (bigfloat)size*poly[size];
|
||||
for (int i=size-1; i>0; i--) df = df*x + (bigfloat)i*poly[i];
|
||||
return df;
|
||||
}
|
||||
|
||||
|
||||
// Newton's method to calculate roots
|
||||
bigfloat AlgRemez::rtnewt(bigfloat *poly, long i, bigfloat x1,
|
||||
bigfloat x2, bigfloat xacc) {
|
||||
int j;
|
||||
bigfloat df, dx, f, rtn;
|
||||
|
||||
rtn=(bigfloat)0.5*(x1+x2);
|
||||
for (j=1; j<=JMAX;j++) {
|
||||
f = polyEval(rtn, poly, i);
|
||||
df = polyDiff(rtn, poly, i);
|
||||
dx = f/df;
|
||||
rtn -= dx;
|
||||
if (abs_bf(dx) < xacc) return rtn;
|
||||
}
|
||||
std::cout<<"Maximum number of iterations exceeded in rtnewt\n";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Evaluate the partial fraction expansion of the rational function
|
||||
// with res roots and poles poles. Result is overwritten on input
|
||||
// arrays.
|
||||
void AlgRemez::pfe(bigfloat *res, bigfloat *poles, bigfloat norm) {
|
||||
int i,j,small;
|
||||
bigfloat temp;
|
||||
bigfloat *numerator = new bigfloat[n];
|
||||
bigfloat *denominator = new bigfloat[d];
|
||||
|
||||
// Construct the polynomials explicitly
|
||||
for (i=1; i<n; i++) {
|
||||
numerator[i] = 0l;
|
||||
denominator[i] = 0l;
|
||||
}
|
||||
numerator[0]=1l;
|
||||
denominator[0]=1l;
|
||||
|
||||
for (j=0; j<n; j++) {
|
||||
for (i=n-1; i>=0; i--) {
|
||||
numerator[i] *= -res[j];
|
||||
denominator[i] *= -poles[j];
|
||||
if (i>0) {
|
||||
numerator[i] += numerator[i-1];
|
||||
denominator[i] += denominator[i-1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to proper fraction form.
|
||||
// Fraction is now in the form 1 + n/d, where O(n)+1=O(d)
|
||||
for (i=0; i<n; i++) numerator[i] -= denominator[i];
|
||||
|
||||
// Find the residues of the partial fraction expansion and absorb the
|
||||
// coefficients.
|
||||
for (i=0; i<n; i++) {
|
||||
res[i] = 0l;
|
||||
for (j=n-1; j>=0; j--) {
|
||||
res[i] = poles[i]*res[i]+numerator[j];
|
||||
}
|
||||
for (j=n-1; j>=0; j--) {
|
||||
if (i!=j) res[i] /= poles[i]-poles[j];
|
||||
}
|
||||
res[i] *= norm;
|
||||
}
|
||||
|
||||
// res now holds the residues
|
||||
j = 0;
|
||||
for (i=0; i<n; i++) poles[i] = -poles[i];
|
||||
|
||||
// Move the ordering of the poles from smallest to largest
|
||||
for (j=0; j<n; j++) {
|
||||
small = j;
|
||||
for (i=j+1; i<n; i++) {
|
||||
if (poles[i] < poles[small]) small = i;
|
||||
}
|
||||
if (small != j) {
|
||||
temp = poles[small];
|
||||
poles[small] = poles[j];
|
||||
poles[j] = temp;
|
||||
temp = res[small];
|
||||
res[small] = res[j];
|
||||
res[j] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] numerator;
|
||||
delete [] denominator;
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateApprox(double x) {
|
||||
return (double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateInverseApprox(double x) {
|
||||
return 1.0/(double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateFunc(double x) {
|
||||
return (double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateInverseFunc(double x) {
|
||||
return 1.0/(double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
void AlgRemez::csv(std::ostream & os)
|
||||
{
|
||||
double lambda_low = apstrt;
|
||||
double lambda_high= apend;
|
||||
for (double x=lambda_low; x<lambda_high; x*=1.05) {
|
||||
double f = evaluateFunc(x);
|
||||
double r = evaluateApprox(x);
|
||||
os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
184
Grid/algorithms/approx/Remez.h
Normal file
184
Grid/algorithms/approx/Remez.h
Normal file
@ -0,0 +1,184 @@
|
||||
/*
|
||||
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
alg_remez.h
|
||||
|
||||
AlgRemez is an implementation of the Remez algorithm, which in this
|
||||
case is used for generating the optimal nth root rational
|
||||
approximation.
|
||||
|
||||
Note this class requires the gnu multiprecision (GNU MP) library.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_ALG_REMEZ_H
|
||||
#define INCLUDED_ALG_REMEZ_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#ifdef HAVE_LIBGMP
|
||||
#include "bigfloat.h"
|
||||
#else
|
||||
#include "bigfloat_double.h"
|
||||
#endif
|
||||
|
||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||
|
||||
/*
|
||||
*Usage examples
|
||||
AlgRemez remez(lambda_low,lambda_high,precision);
|
||||
error = remez.generateApprox(n,d,y,z);
|
||||
remez.getPFE(res,pole,&norm);
|
||||
remez.getIPFE(res,pole,&norm);
|
||||
remez.csv(ostream &os);
|
||||
*/
|
||||
|
||||
class AlgRemez
|
||||
{
|
||||
private:
|
||||
char *cname;
|
||||
|
||||
// The approximation parameters
|
||||
bigfloat *param, *roots, *poles;
|
||||
bigfloat norm;
|
||||
|
||||
// The numerator and denominator degree (n=d)
|
||||
int n, d;
|
||||
|
||||
// The bounds of the approximation
|
||||
bigfloat apstrt, apwidt, apend;
|
||||
|
||||
// the numerator and denominator of the power we are approximating
|
||||
unsigned long power_num;
|
||||
unsigned long power_den;
|
||||
|
||||
// Flag to determine whether the arrays have been allocated
|
||||
int alloc;
|
||||
|
||||
// Flag to determine whether the roots have been found
|
||||
int foundRoots;
|
||||
|
||||
// Variables used to calculate the approximation
|
||||
int nd1, iter;
|
||||
bigfloat *xx, *mm, *step;
|
||||
bigfloat delta, spread, tolerance;
|
||||
|
||||
// The exponential summation coefficients
|
||||
bigfloat *a;
|
||||
int *a_power;
|
||||
int a_length;
|
||||
|
||||
// The number of equations we must solve at each iteration (n+d+1)
|
||||
int neq;
|
||||
|
||||
// The precision of the GNU MP library
|
||||
long prec;
|
||||
|
||||
// Initial values of maximal and minmal errors
|
||||
void initialGuess();
|
||||
|
||||
// Solve the equations
|
||||
void equations();
|
||||
|
||||
// Search for error maxima and minima
|
||||
void search(bigfloat *step);
|
||||
|
||||
// Initialise step sizes
|
||||
void stpini(bigfloat *step);
|
||||
|
||||
// Calculate the roots of the approximation
|
||||
int root();
|
||||
|
||||
// Evaluate the polynomial
|
||||
bigfloat polyEval(bigfloat x, bigfloat *poly, long size);
|
||||
//complex_bf polyEval(complex_bf x, complex_bf *poly, long size);
|
||||
|
||||
// Evaluate the differential of the polynomial
|
||||
bigfloat polyDiff(bigfloat x, bigfloat *poly, long size);
|
||||
//complex_bf polyDiff(complex_bf x, complex_bf *poly, long size);
|
||||
|
||||
// Newton's method to calculate roots
|
||||
bigfloat rtnewt(bigfloat *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
|
||||
//complex_bf rtnewt(complex_bf *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
|
||||
|
||||
// Evaluate the partial fraction expansion of the rational function
|
||||
// with res roots and poles poles. Result is overwritten on input
|
||||
// arrays.
|
||||
void pfe(bigfloat *res, bigfloat* poles, bigfloat norm);
|
||||
|
||||
// Calculate function required for the approximation
|
||||
bigfloat func(bigfloat x);
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat getErr(bigfloat x, int *sign);
|
||||
|
||||
// Solve the system AX=B
|
||||
int simq(bigfloat *A, bigfloat *B, bigfloat *X, int n);
|
||||
|
||||
// Free memory and reallocate as necessary
|
||||
void allocate(int num_degree, int den_degree);
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
bigfloat approx(bigfloat x);
|
||||
|
||||
public:
|
||||
|
||||
// Constructor
|
||||
AlgRemez(double lower, double upper, long prec);
|
||||
|
||||
// Destructor
|
||||
virtual ~AlgRemez();
|
||||
|
||||
int getDegree(void){
|
||||
assert(n==d);
|
||||
return n;
|
||||
}
|
||||
// Reset the bounds of the approximation
|
||||
void setBounds(double lower, double upper);
|
||||
// Reset the bounds of the approximation
|
||||
void getBounds(double &lower, double &upper) {
|
||||
lower=(double)apstrt;
|
||||
upper=(double)apend;
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
unsigned long power_num, unsigned long power_den,
|
||||
int a_len, double* a_param, int* a_pow);
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
unsigned long power_num, unsigned long power_den);
|
||||
double generateApprox(int degree, unsigned long power_num,
|
||||
unsigned long power_den);
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(pnum/pden)
|
||||
int getPFE(double *res, double *pole, double *norm);
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(-pnum/pden)
|
||||
int getIPFE(double *res, double *pole, double *norm);
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
double evaluateApprox(double x);
|
||||
|
||||
// Evaluate the rational form Q(x)/P(x) using coefficients from the
|
||||
// solution vector param
|
||||
double evaluateInverseApprox(double x);
|
||||
|
||||
// Calculate function required for the approximation
|
||||
double evaluateFunc(double x);
|
||||
|
||||
// Calculate inverse function required for the approximation
|
||||
double evaluateInverseFunc(double x);
|
||||
|
||||
// Dump csv of function, approx and error
|
||||
void csv(std::ostream &os);
|
||||
};
|
||||
|
||||
#endif // Include guard
|
||||
|
||||
|
||||
|
727
Grid/algorithms/approx/Zolotarev.cc
Normal file
727
Grid/algorithms/approx/Zolotarev.cc
Normal file
@ -0,0 +1,727 @@
|
||||
/* -*- Mode: C; comment-column: 22; fill-column: 79; compile-command: "gcc -o zolotarev zolotarev.c -ansi -pedantic -lm -DTEST"; -*- */
|
||||
#define VERSION Source Time-stamp: <2015-05-18 16:32:08 neo>
|
||||
|
||||
/* These C routines evalute the optimal rational approximation to the signum
|
||||
* function for epsilon < |x| < 1 using Zolotarev's theorem.
|
||||
*
|
||||
* To obtain reliable results for high degree approximations (large n) it is
|
||||
* necessary to compute using sufficiently high precision arithmetic. To this
|
||||
* end the code has been parameterised to work with the preprocessor names
|
||||
* INTERNAL_PRECISION and PRECISION set to float, double, or long double as
|
||||
* appropriate. INTERNAL_PRECISION is used in computing the Zolotarev
|
||||
* coefficients, which are converted to PRECISION before being returned to the
|
||||
* caller. Presumably even higher precision could be obtained using GMP or
|
||||
* similar package, but bear in mind that rounding errors might also be
|
||||
* significant in evaluating the resulting polynomial. The convergence criteria
|
||||
* have been written in a precision-independent form. */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
#ifndef INTERNAL_PRECISION
|
||||
#define INTERNAL_PRECISION double
|
||||
#endif
|
||||
|
||||
#include "Zolotarev.h"
|
||||
#define ZOLOTAREV_INTERNAL
|
||||
#undef ZOLOTAREV_DATA
|
||||
#define ZOLOTAREV_DATA izd
|
||||
#undef ZPRECISION
|
||||
#define ZPRECISION INTERNAL_PRECISION
|
||||
#include "Zolotarev.h"
|
||||
#undef ZOLOTAREV_INTERNAL
|
||||
|
||||
/* The ANSI standard appears not to know what pi is */
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
|
||||
169399375105820974944592307816406286208998628034825342117068)
|
||||
#endif
|
||||
|
||||
#define ZERO ((INTERNAL_PRECISION) 0)
|
||||
#define ONE ((INTERNAL_PRECISION) 1)
|
||||
#define TWO ((INTERNAL_PRECISION) 2)
|
||||
#define THREE ((INTERNAL_PRECISION) 3)
|
||||
#define FOUR ((INTERNAL_PRECISION) 4)
|
||||
#define HALF (ONE/TWO)
|
||||
|
||||
/* The following obscenity seems to be the simplest (?) way to coerce the C
|
||||
* preprocessor to convert the value of a preprocessor token into a string. */
|
||||
|
||||
#define PP2(x) #x
|
||||
#define PP1(a,b,c) a ## b(c)
|
||||
#define STRINGIFY(name) PP1(PP,2,name)
|
||||
|
||||
/* Compute the partial fraction expansion coefficients (alpha) from the
|
||||
* factored form */
|
||||
namespace Grid {
|
||||
namespace Approx {
|
||||
|
||||
static void construct_partfrac(izd *z) {
|
||||
int dn = z -> dn, dd = z -> dd, type = z -> type;
|
||||
int j, k, da = dd + 1 + type;
|
||||
INTERNAL_PRECISION A = z -> A, *a = z -> a, *ap = z -> ap, *alpha;
|
||||
alpha = (INTERNAL_PRECISION*) malloc(da * sizeof(INTERNAL_PRECISION));
|
||||
for (j = 0; j < dd; j++)
|
||||
for (k = 0, alpha[j] = A; k < dd; k++)
|
||||
alpha[j] *=
|
||||
(k < dn ? ap[j] - a[k] : ONE) / (k == j ? ONE : ap[j] - ap[k]);
|
||||
if(type == 1) /* implicit pole at zero? */
|
||||
for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] : ONE); k < dd; k++) {
|
||||
alpha[dd] *= a[k] / ap[k];
|
||||
alpha[k] *= (dn > dd ? ap[k] - a[dd] : ONE) / ap[k];
|
||||
}
|
||||
alpha[da-1] = dn == da - 1 ? A : ZERO;
|
||||
z -> alpha = alpha;
|
||||
z -> da = da;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Convert factored polynomial into dense polynomial. The input is the overall
|
||||
* factor A and the roots a[i], such that p = A product(x - a[i], i = 1..d) */
|
||||
|
||||
static INTERNAL_PRECISION *poly_factored_to_dense(INTERNAL_PRECISION A,
|
||||
INTERNAL_PRECISION *a,
|
||||
int d) {
|
||||
INTERNAL_PRECISION *p;
|
||||
int i, j;
|
||||
p = (INTERNAL_PRECISION *) malloc((d + 2) * sizeof(INTERNAL_PRECISION));
|
||||
p[0] = A;
|
||||
for (i = 0; i < d; i++) {
|
||||
p[i+1] = p[i];
|
||||
for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
|
||||
p[0] *= - a[i];
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Convert a rational function of the form R0(x) = x p(x^2)/q(x^2) (type 0) or
|
||||
* R1(x) = p(x^2)/[x q(x^2)] (type 1) into its continued fraction
|
||||
* representation. We assume that 0 <= deg(q) - deg(p) <= 1 for type 0 and 0 <=
|
||||
* deg(p) - deg(q) <= 1 for type 1. On input p and q are in factored form, and
|
||||
* deg(q) = dq, deg(p) = dp. The output is the continued fraction coefficients
|
||||
* beta, where R(x) = beta[0] x + 1/(beta[1] x + 1/(...)).
|
||||
*
|
||||
* The method used is as follows. There are four cases to consider:
|
||||
*
|
||||
* 0.i. Type 0, deg p = deg q
|
||||
*
|
||||
* 0.ii. Type 0, deg p = deg q - 1
|
||||
*
|
||||
* 1.i. Type 1, deg p = deg q
|
||||
*
|
||||
* 1.ii. Type 1, deg p = deg q + 1
|
||||
*
|
||||
* and these are connected by two transformations:
|
||||
*
|
||||
* A. To obtain a continued fraction expansion of type 1 we use a single-step
|
||||
* polynomial division we find beta and r(x) such that p(x) = beta x q(x) +
|
||||
* r(x), with deg(r) = deg(q). This implies that p(x^2) = beta x^2 q(x^2) +
|
||||
* r(x^2), and thus R1(x) = x beta + r(x^2)/(x q(x^2)) = x beta + 1/R0(x)
|
||||
* with R0(x) = x q(x^2)/r(x^2).
|
||||
*
|
||||
* B. A continued fraction expansion of type 0 is obtained in a similar, but
|
||||
* not identical, manner. We use the polynomial division algorithm to compute
|
||||
* the quotient beta and the remainder r that satisfy p(x) = beta q(x) + r(x)
|
||||
* with deg(r) = deg(q) - 1. We thus have x p(x^2) = x beta q(x^2) + x r(x^2),
|
||||
* so R0(x) = x beta + x r(x^2)/q(x^2) = x beta + 1/R1(x) with R1(x) = q(x^2) /
|
||||
* (x r(x^2)).
|
||||
*
|
||||
* Note that the deg(r) must be exactly deg(q) for (A) and deg(q) - 1 for (B)
|
||||
* because p and q have disjoint roots all of multiplicity 1. This means that
|
||||
* the division algorithm requires only a single polynomial subtraction step.
|
||||
*
|
||||
* The transformations between the cases form the following finite state
|
||||
* automaton:
|
||||
*
|
||||
* +------+ +------+ +------+ +------+
|
||||
* | | | | ---(A)---> | | | |
|
||||
* | 0.ii | ---(B)---> | 1.ii | | 0.i | <---(A)--- | 1.i |
|
||||
* | | | | <---(B)--- | | | |
|
||||
* +------+ +------+ +------+ +------+
|
||||
*/
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *, int, int);
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *, int, int);
|
||||
|
||||
static void construct_contfrac(izd *z){
|
||||
INTERNAL_PRECISION *r, A = z -> A, *p = z -> a, *q = z -> ap;
|
||||
int dp = z -> dn, dq = z -> dd, type = z -> type;
|
||||
|
||||
z -> db = 2 * dq + 1 + type;
|
||||
z -> beta = (INTERNAL_PRECISION *)
|
||||
malloc(z -> db * sizeof(INTERNAL_PRECISION));
|
||||
p = poly_factored_to_dense(A, p, dp);
|
||||
q = poly_factored_to_dense(ONE, q, dq);
|
||||
r = (INTERNAL_PRECISION *) malloc((MAX(dp,dq) + 1) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
if (type == 0) (void) contfrac_B(z -> beta, p, q, r, dp, dq);
|
||||
else (void) contfrac_A(z -> beta, p, q, r, dp, dq);
|
||||
free(p); free(q); free(r);
|
||||
return;
|
||||
}
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *beta,
|
||||
INTERNAL_PRECISION *p,
|
||||
INTERNAL_PRECISION *q,
|
||||
INTERNAL_PRECISION *r, int dp, int dq) {
|
||||
INTERNAL_PRECISION quot, *rb;
|
||||
int j;
|
||||
|
||||
/* p(x) = x beta q(x) + r(x); dp = dq or dp = dq + 1 */
|
||||
|
||||
quot = dp == dq ? ZERO : p[dp] / q[dq];
|
||||
r[0] = p[0];
|
||||
for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
|
||||
#ifdef DEBUG
|
||||
printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
|
||||
__FUNCTION__, dp, dq, (float) quot);
|
||||
for (j = 0; j <= dq + 1; j++)
|
||||
printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
|
||||
j, (float) (j > dp ? ZERO : p[j]),
|
||||
j, (float) (j == 0 ? ZERO : q[j-1]),
|
||||
j, (float) (j == dp ? ZERO : r[j]));
|
||||
#endif /* DEBUG */
|
||||
*(rb = contfrac_B(beta, q, r, p, dq, dq)) = quot;
|
||||
return rb + 1;
|
||||
}
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *beta,
|
||||
INTERNAL_PRECISION *p,
|
||||
INTERNAL_PRECISION *q,
|
||||
INTERNAL_PRECISION *r, int dp, int dq) {
|
||||
INTERNAL_PRECISION quot, *rb;
|
||||
int j;
|
||||
|
||||
/* p(x) = beta q(x) + r(x); dp = dq or dp = dq - 1 */
|
||||
|
||||
quot = dp == dq ? p[dp] / q[dq] : ZERO;
|
||||
for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
|
||||
#ifdef DEBUG
|
||||
printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
|
||||
__FUNCTION__, dp, dq, (float) quot);
|
||||
for (j = 0; j <= dq; j++)
|
||||
printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
|
||||
j, (float) (j > dp ? ZERO : p[j]),
|
||||
j, (float) q[j],
|
||||
j, (float) (j == dq ? ZERO : r[j]));
|
||||
#endif /* DEBUG */
|
||||
*(rb = dq > 0 ? contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
|
||||
return rb + 1;
|
||||
}
|
||||
|
||||
/* The global variable U is used to hold the argument u throughout the AGM
|
||||
* recursion. The global variables F and K are set in the innermost
|
||||
* instantiation of the recursive function AGM to the values of the elliptic
|
||||
* integrals F(u,k) and K(k) respectively. They must be made thread local to
|
||||
* make this code thread-safe in a multithreaded environment. */
|
||||
|
||||
static INTERNAL_PRECISION U, F, K; /* THREAD LOCAL */
|
||||
|
||||
/* Recursive implementation of Gauss' arithmetico-geometric mean, which is the
|
||||
* kernel of the method used to compute the Jacobian elliptic functions
|
||||
* sn(u,k), cn(u,k), and dn(u,k) with parameter k (where 0 < k < 1), as well
|
||||
* as the elliptic integral F(s,k) satisfying F(sn(u,k)) = u and the complete
|
||||
* elliptic integral K(k).
|
||||
*
|
||||
* The algorithm used is a recursive implementation of the Gauss (Landen)
|
||||
* transformation.
|
||||
*
|
||||
* The function returns the value of sn(u,k'), where k' is the dual parameter,
|
||||
* and also sets the values of the global variables F and K. The latter is
|
||||
* used to determine the sign of cn(u,k').
|
||||
*
|
||||
* The algorithm is deemed to have converged when b ceases to increase. This
|
||||
* works whatever INTERNAL_PRECISION is specified. */
|
||||
|
||||
static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a,
|
||||
INTERNAL_PRECISION b,
|
||||
INTERNAL_PRECISION s) {
|
||||
static INTERNAL_PRECISION pb = -ONE;
|
||||
INTERNAL_PRECISION c, d, xi;
|
||||
|
||||
if (b <= pb) {
|
||||
pb = -ONE;
|
||||
F = asin(s) / a; /* Here, a is the AGM */
|
||||
K = M_PI / (TWO * a);
|
||||
return sin(U * a);
|
||||
}
|
||||
pb = b;
|
||||
c = a - b;
|
||||
d = a + b;
|
||||
xi = AGM(HALF*d, sqrt(a*b), ONE + c*c == ONE ?
|
||||
HALF*s*d/a : (a - sqrt(a*a - s*s*c*d))/(c*s));
|
||||
return 2*a*xi / (d + c*xi*xi);
|
||||
}
|
||||
|
||||
/* Computes sn(u,k), cn(u,k), dn(u,k), F(u,k), and K(k). It is essentially a
|
||||
* wrapper for the routine AGM. The sign of cn(u,k) is defined to be -1 if
|
||||
* K(k) < u < 3*K(k) and +1 otherwise, and thus sign is computed by some quite
|
||||
* unnecessarily obfuscated bit manipulations. */
|
||||
|
||||
static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
|
||||
INTERNAL_PRECISION* sn, INTERNAL_PRECISION* cn,
|
||||
INTERNAL_PRECISION* dn, INTERNAL_PRECISION* elF,
|
||||
INTERNAL_PRECISION* elK) {
|
||||
int sgn;
|
||||
U = u;
|
||||
*sn = AGM(ONE, sqrt(ONE - k*k), u);
|
||||
sgn = ((int) (fabs(u) / K)) % 4; /* sgn = 0, 1, 2, 3 */
|
||||
sgn ^= sgn >> 1; /* (sgn & 1) = 0, 1, 1, 0 */
|
||||
sgn = 1 - ((sgn & 1) << 1); /* sgn = 1, -1, -1, 1 */
|
||||
*cn = ((INTERNAL_PRECISION) sgn) * sqrt(ONE - *sn * *sn);
|
||||
*dn = sqrt(ONE - k*k* *sn * *sn);
|
||||
*elF = F;
|
||||
*elK = K;
|
||||
}
|
||||
|
||||
/* Compute the coefficients for the optimal rational approximation R(x) to
|
||||
* sgn(x) of degree n over the interval epsilon < |x| < 1 using Zolotarev's
|
||||
* formula.
|
||||
*
|
||||
* Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
|
||||
* type = 1 for the approximation which is infinite at x = 0. */
|
||||
|
||||
zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
|
||||
INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
|
||||
l, invlambda, xi, xisq, *tv, s, opl;
|
||||
int m, czero, ts;
|
||||
zolotarev_data *zd;
|
||||
izd *d = (izd*) malloc(sizeof(izd));
|
||||
|
||||
d -> type = type;
|
||||
d -> epsilon = (INTERNAL_PRECISION) epsilon;
|
||||
d -> n = n;
|
||||
d -> dd = n / 2;
|
||||
d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
|
||||
d -> deg_denom = 2 * d -> dd;
|
||||
d -> deg_num = 2 * d -> dn + 1;
|
||||
|
||||
d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
ksq = d -> epsilon * d -> epsilon;
|
||||
kp = sqrt(ONE - ksq);
|
||||
sncndnFK(ZERO, kp, &sn, &cn, &dn, &F, &Kp); /* compute Kp = K(kp) */
|
||||
z0 = TWO * Kp / (INTERNAL_PRECISION) n;
|
||||
M = ONE;
|
||||
A = ONE / d -> epsilon;
|
||||
|
||||
sncndnFK(HALF * z0, kp, &sn, &cn, &dn, &F, &Kj); /* compute xi */
|
||||
xi = ONE / dn;
|
||||
xisq = xi * xi;
|
||||
invlambda = xi;
|
||||
|
||||
for (m = 0; m < d -> dd; m++) {
|
||||
czero = 2 * (m + 1) == n; /* n even and m = dd -1 */
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + ONE);
|
||||
sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
t = cn / sn;
|
||||
c = - t*t;
|
||||
if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + HALF);
|
||||
sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
t = cn / sn;
|
||||
cp = - t*t;
|
||||
(d -> ap)[d -> dd - 1 - m] = ksq / cp;
|
||||
|
||||
M *= (ONE - c) / (ONE - cp);
|
||||
A *= (czero ? -ksq : c) * (ONE - cp) / (cp * (ONE - c));
|
||||
invlambda *= (ONE - c*xisq) / (ONE - cp*xisq);
|
||||
}
|
||||
invlambda /= M;
|
||||
d -> A = TWO / (ONE + invlambda) * A;
|
||||
d -> Delta = (invlambda - ONE) / (invlambda + ONE);
|
||||
|
||||
d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
l = ONE / invlambda;
|
||||
opl = ONE + l;
|
||||
sncndnFK(sqrt( d -> type == 1
|
||||
? (THREE + l) / (FOUR * opl)
|
||||
: (ONE + THREE*l) / (opl*opl*opl)
|
||||
), sqrt(ONE - l*l), &sn, &cn, &dn, &F, &Kj);
|
||||
s = M * F;
|
||||
for (m = 0; m < d -> n; m++) {
|
||||
sncndnFK(s + TWO*Kp*m/n, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
d -> gamma[m] = d -> epsilon / dn;
|
||||
}
|
||||
|
||||
/* If R(x) is a Zolotarev rational approximation of degree (n,m) with maximum
|
||||
* error Delta, then (1 - Delta^2) / R(x) is also an optimal Chebyshev
|
||||
* approximation of degree (m,n) */
|
||||
|
||||
if (d -> type == 1) {
|
||||
d -> A = (ONE - d -> Delta * d -> Delta) / d -> A;
|
||||
tv = d -> a; d -> a = d -> ap; d -> ap = tv;
|
||||
ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
|
||||
ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
|
||||
}
|
||||
|
||||
construct_partfrac(d);
|
||||
construct_contfrac(d);
|
||||
|
||||
/* Converting everything to PRECISION for external use only */
|
||||
|
||||
zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
|
||||
zd -> A = (PRECISION) d -> A;
|
||||
zd -> Delta = (PRECISION) d -> Delta;
|
||||
zd -> epsilon = (PRECISION) d -> epsilon;
|
||||
zd -> n = d -> n;
|
||||
zd -> type = d -> type;
|
||||
zd -> dn = d -> dn;
|
||||
zd -> dd = d -> dd;
|
||||
zd -> da = d -> da;
|
||||
zd -> db = d -> db;
|
||||
zd -> deg_num = d -> deg_num;
|
||||
zd -> deg_denom = d -> deg_denom;
|
||||
|
||||
zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
|
||||
free(d -> a);
|
||||
|
||||
zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
|
||||
free(d -> ap);
|
||||
|
||||
zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
|
||||
free(d -> alpha);
|
||||
|
||||
zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
|
||||
free(d -> beta);
|
||||
|
||||
zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
|
||||
free(d -> gamma);
|
||||
|
||||
free(d);
|
||||
return zd;
|
||||
}
|
||||
|
||||
|
||||
void zolotarev_free(zolotarev_data *zdata)
|
||||
{
|
||||
free(zdata -> a);
|
||||
free(zdata -> ap);
|
||||
free(zdata -> alpha);
|
||||
free(zdata -> beta);
|
||||
free(zdata -> gamma);
|
||||
free(zdata);
|
||||
}
|
||||
|
||||
|
||||
zolotarev_data* higham(PRECISION epsilon, int n) {
|
||||
INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
|
||||
int m, czero;
|
||||
zolotarev_data *zd;
|
||||
izd *d = (izd*) malloc(sizeof(izd));
|
||||
|
||||
d -> type = 0;
|
||||
d -> epsilon = (INTERNAL_PRECISION) epsilon;
|
||||
d -> n = n;
|
||||
d -> dd = n / 2;
|
||||
d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
|
||||
d -> deg_denom = 2 * d -> dd;
|
||||
d -> deg_num = 2 * d -> dn + 1;
|
||||
|
||||
d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
A = (INTERNAL_PRECISION) n;
|
||||
z0 = M_PI / A;
|
||||
A = n % 2 == 0 ? A : ONE / A;
|
||||
M = d -> epsilon * A;
|
||||
epssq = d -> epsilon * d -> epsilon;
|
||||
|
||||
for (m = 0; m < d -> dd; m++) {
|
||||
czero = 2 * (m + 1) == n; /* n even and m = dd - 1*/
|
||||
|
||||
if (!czero) {
|
||||
z = z0 * ((INTERNAL_PRECISION) m + ONE);
|
||||
t = tan(z);
|
||||
c = - t*t;
|
||||
(d -> a)[d -> dn - 1 - m] = c;
|
||||
M *= epssq - c;
|
||||
}
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + HALF);
|
||||
t = tan(z);
|
||||
cp = - t*t;
|
||||
(d -> ap)[d -> dd - 1 - m] = cp;
|
||||
M /= epssq - cp;
|
||||
}
|
||||
|
||||
d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
for (m = 0; m < d -> n; m++) d -> gamma[m] = ONE;
|
||||
|
||||
d -> A = A;
|
||||
d -> Delta = ONE - M;
|
||||
|
||||
construct_partfrac(d);
|
||||
construct_contfrac(d);
|
||||
|
||||
/* Converting everything to PRECISION for external use only */
|
||||
|
||||
zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
|
||||
zd -> A = (PRECISION) d -> A;
|
||||
zd -> Delta = (PRECISION) d -> Delta;
|
||||
zd -> epsilon = (PRECISION) d -> epsilon;
|
||||
zd -> n = d -> n;
|
||||
zd -> type = d -> type;
|
||||
zd -> dn = d -> dn;
|
||||
zd -> dd = d -> dd;
|
||||
zd -> da = d -> da;
|
||||
zd -> db = d -> db;
|
||||
zd -> deg_num = d -> deg_num;
|
||||
zd -> deg_denom = d -> deg_denom;
|
||||
|
||||
zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
|
||||
free(d -> a);
|
||||
|
||||
zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
|
||||
free(d -> ap);
|
||||
|
||||
zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
|
||||
free(d -> alpha);
|
||||
|
||||
zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
|
||||
free(d -> beta);
|
||||
|
||||
zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
|
||||
free(d -> gamma);
|
||||
|
||||
free(d);
|
||||
return zd;
|
||||
}
|
||||
}}
|
||||
|
||||
#ifdef TEST
|
||||
|
||||
#undef ZERO
|
||||
#define ZERO ((PRECISION) 0)
|
||||
#undef ONE
|
||||
#define ONE ((PRECISION) 1)
|
||||
#undef TWO
|
||||
#define TWO ((PRECISION) 2)
|
||||
|
||||
/* Evaluate the rational approximation R(x) using the factored form */
|
||||
|
||||
static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R;
|
||||
|
||||
if (rdata -> type == 0) {
|
||||
R = rdata -> A * x;
|
||||
for (m = 0; m < rdata -> deg_denom/2; m++)
|
||||
R *= (2*(m+1) > rdata -> deg_num ? ONE : x*x - rdata -> a[m]) /
|
||||
(x*x - rdata -> ap[m]);
|
||||
} else {
|
||||
R = rdata -> A / x;
|
||||
for (m = 0; m < rdata -> deg_num/2; m++)
|
||||
R *= (x*x - rdata -> a[m]) /
|
||||
(2*(m+1) > rdata -> deg_denom ? ONE : x*x - rdata -> ap[m]);
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using the partial fraction form */
|
||||
|
||||
static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R = rdata -> alpha[rdata -> da - 1];
|
||||
for (m = 0; m < rdata -> dd; m++)
|
||||
R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
|
||||
if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
|
||||
return R * x;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using continued fraction form.
|
||||
*
|
||||
* If x = 0 and type = 1 then the result should be INF, whereas if x = 0 and
|
||||
* type = 0 then the result should be 0, but division by zero will occur at
|
||||
* intermediate stages of the evaluation. For IEEE implementations with
|
||||
* non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
|
||||
* but with signalling overflow you will get an error message. */
|
||||
|
||||
static PRECISION zolotarev_contfrac_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R = rdata -> beta[0] * x;
|
||||
for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
|
||||
return R;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using Cayley form */
|
||||
|
||||
static PRECISION zolotarev_cayley_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION T;
|
||||
|
||||
T = rdata -> type == 0 ? ONE : -ONE;
|
||||
for (m = 0; m < rdata -> n; m++)
|
||||
T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
|
||||
return (ONE - T) / (ONE + T);
|
||||
}
|
||||
|
||||
/* Test program. Apart from printing out the parameters for R(x) it produces
|
||||
* the following data files for plotting (unless NPLOT is defined):
|
||||
*
|
||||
* zolotarev-fn is a plot of R(x) for |x| < 1.2. This should look like sgn(x).
|
||||
*
|
||||
* zolotarev-err is a plot of the error |R(x) - sgn(x)| scaled by 1/Delta. This
|
||||
* should oscillate deg_num + deg_denom + 2 times between +1 and -1 over the
|
||||
* domain epsilon <= |x| <= 1.
|
||||
*
|
||||
* If ALLPLOTS is defined then zolotarev-partfrac (zolotarev-contfrac) is a
|
||||
* plot of the difference between the values of R(x) computed using the
|
||||
* factored form and the partial fraction (continued fraction) form, scaled by
|
||||
* 1/Delta. It should be zero everywhere. */
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int m, n, plotpts = 5000, type = 0;
|
||||
float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
|
||||
zolotarev_data *rdata;
|
||||
PRECISION y;
|
||||
FILE *plot_function, *plot_error,
|
||||
*plot_partfrac, *plot_contfrac, *plot_cayley;
|
||||
|
||||
if (argc < 3 || argc > 4) {
|
||||
fprintf(stderr, "Usage: %s epsilon n [type]\n", *argv);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
sscanf(argv[1], "%g", &eps); /* First argument is epsilon */
|
||||
sscanf(argv[2], "%d", &n); /* Second argument is n */
|
||||
if (argc == 4) sscanf(argv[3], "%d", &type); /* Third argument is type */
|
||||
|
||||
if (type < 0 || type > 2) {
|
||||
fprintf(stderr, "%s: type must be 0 (Zolotarev R(0) = 0),\n"
|
||||
"\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
rdata = type == 2
|
||||
? higham((PRECISION) eps, n)
|
||||
: zolotarev((PRECISION) eps, n, type);
|
||||
|
||||
printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t"
|
||||
STRINGIFY(VERSION) "\n\t" STRINGIFY(HVERSION)
|
||||
"\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
|
||||
"\tPRECISION = " STRINGIFY(PRECISION)
|
||||
"\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
|
||||
"\tDelta = %g (maximum error)\n\n"
|
||||
"\tA = %g (overall factor)\n",
|
||||
(float) rdata -> epsilon, rdata -> n, type,
|
||||
rdata -> deg_num, rdata -> deg_denom,
|
||||
rdata -> type == 1 ? "infinite" : "zero",
|
||||
(float) rdata -> Delta, (float) rdata -> A);
|
||||
for (m = 0; m < MIN(rdata -> dd, rdata -> dn); m++)
|
||||
printf("\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
|
||||
m + 1, (float) rdata -> a[m], m + 1, (float) rdata -> ap[m]);
|
||||
if (rdata -> dd > rdata -> dn)
|
||||
printf("\t\t\t\t\ta'[%2d] = %14.8g\n",
|
||||
rdata -> dn + 1, (float) rdata -> ap[rdata -> dn]);
|
||||
if (rdata -> dd < rdata -> dn)
|
||||
printf("\ta[%2d] = %14.8g\n",
|
||||
rdata -> dd + 1, (float) rdata -> a[rdata -> dd]);
|
||||
|
||||
printf("\n\tPartial fraction coefficients\n");
|
||||
printf("\talpha[ 0] = %14.8g\n",
|
||||
(float) rdata -> alpha[rdata -> da - 1]);
|
||||
for (m = 0; m < rdata -> dd; m++)
|
||||
printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
|
||||
m + 1, (float) rdata -> alpha[m], m + 1, (float) rdata -> ap[m]);
|
||||
if (rdata -> type == 1)
|
||||
printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
|
||||
rdata -> dd + 1, (float) rdata -> alpha[rdata -> dd],
|
||||
rdata -> dd + 1, (float) ZERO);
|
||||
|
||||
printf("\n\tContinued fraction coefficients\n");
|
||||
for (m = 0; m < rdata -> db; m++)
|
||||
printf("\tbeta[%2d] = %14.8g\n", m, (float) rdata -> beta[m]);
|
||||
|
||||
printf("\n\tCayley transform coefficients\n");
|
||||
for (m = 0; m < rdata -> n; m++)
|
||||
printf("\tgamma[%2d] = %14.8g\n", m, (float) rdata -> gamma[m]);
|
||||
|
||||
#ifndef NPLOT
|
||||
plot_function = fopen("zolotarev-fn.dat", "w");
|
||||
plot_error = fopen("zolotarev-err.dat", "w");
|
||||
#ifdef ALLPLOTS
|
||||
plot_partfrac = fopen("zolotarev-partfrac.dat", "w");
|
||||
plot_contfrac = fopen("zolotarev-contfrac.dat", "w");
|
||||
plot_cayley = fopen("zolotarev-cayley.dat", "w");
|
||||
#endif /* ALLPLOTS */
|
||||
for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
|
||||
x = 2.4 * (float) m / plotpts - 1.2;
|
||||
if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
|
||||
/* skip x = 0 for type 1, as R(0) is singular */
|
||||
y = zolotarev_eval((PRECISION) x, rdata);
|
||||
fprintf(plot_function, "%g %g\n", x, (float) y);
|
||||
fprintf(plot_error, "%g %g\n",
|
||||
x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
|
||||
ypferr = (float)((zolotarev_partfrac_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
ycferr = (float)((zolotarev_contfrac_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
ycaylerr = (float)((zolotarev_cayley_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
|
||||
maxypferr = MAX(maxypferr, fabs(ypferr));
|
||||
maxycferr = MAX(maxycferr, fabs(ycferr));
|
||||
maxycaylerr = MAX(maxycaylerr, fabs(ycaylerr));
|
||||
}
|
||||
#ifdef ALLPLOTS
|
||||
fprintf(plot_partfrac, "%g %g\n", x, ypferr);
|
||||
fprintf(plot_contfrac, "%g %g\n", x, ycferr);
|
||||
fprintf(plot_cayley, "%g %g\n", x, ycaylerr);
|
||||
#endif /* ALLPLOTS */
|
||||
}
|
||||
}
|
||||
#ifdef ALLPLOTS
|
||||
fclose(plot_cayley);
|
||||
fclose(plot_contfrac);
|
||||
fclose(plot_partfrac);
|
||||
#endif /* ALLPLOTS */
|
||||
fclose(plot_error);
|
||||
fclose(plot_function);
|
||||
|
||||
printf("\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
|
||||
printf("\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
|
||||
printf("\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
|
||||
#endif /* NPLOT */
|
||||
|
||||
free(rdata -> a);
|
||||
free(rdata -> ap);
|
||||
free(rdata -> alpha);
|
||||
free(rdata -> beta);
|
||||
free(rdata -> gamma);
|
||||
free(rdata);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#endif /* TEST */
|
87
Grid/algorithms/approx/Zolotarev.h
Normal file
87
Grid/algorithms/approx/Zolotarev.h
Normal file
@ -0,0 +1,87 @@
|
||||
/* -*- Mode: C; comment-column: 22; fill-column: 79; -*- */
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace Grid {
|
||||
namespace Approx {
|
||||
#endif
|
||||
|
||||
#define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
|
||||
|
||||
|
||||
#ifndef ZOLOTAREV_INTERNAL
|
||||
#ifndef PRECISION
|
||||
#define PRECISION double
|
||||
#endif
|
||||
#define ZPRECISION PRECISION
|
||||
#define ZOLOTAREV_DATA zolotarev_data
|
||||
#endif
|
||||
|
||||
/* This struct contains the coefficients which parameterise an optimal rational
|
||||
* approximation to the signum function.
|
||||
*
|
||||
* The parameterisations used are:
|
||||
*
|
||||
* Factored form for type 0 (R0(0) = 0)
|
||||
*
|
||||
* R0(x) = A * x * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
|
||||
* .. dd-1),
|
||||
*
|
||||
* where deg_num = 2*dn + 1 and deg_denom = 2*dd.
|
||||
*
|
||||
* Factored form for type 1 (R1(0) = infinity)
|
||||
*
|
||||
* R1(x) = (A / x) * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
|
||||
* .. dd-1),
|
||||
*
|
||||
* where deg_num = 2*dn and deg_denom = 2*dd + 1.
|
||||
*
|
||||
* Partial fraction form
|
||||
*
|
||||
* R(x) = alpha[da] * x + sum(alpha[j] * x / (x^2 - ap[j]), j = 0 .. da-1)
|
||||
*
|
||||
* where da = dd for type 0 and da = dd + 1 with ap[dd] = 0 for type 1.
|
||||
*
|
||||
* Continued fraction form
|
||||
*
|
||||
* R(x) = beta[db-1] * x + 1 / (beta[db-2] * x + 1 / (beta[db-3] * x + ...))
|
||||
*
|
||||
* with the final coefficient being beta[0], with d' = 2 * dd + 1 for type 0
|
||||
* and db = 2 * dd + 2 for type 1.
|
||||
*
|
||||
* Cayley form (Chiu's domain wall formulation)
|
||||
*
|
||||
* R(x) = (1 - T(x)) / (1 + T(x))
|
||||
*
|
||||
* where T(x) = prod((x - gamma[j]) / (x + gamma[j]), j = 0 .. n-1)
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
ZPRECISION *a, /* zeros of numerator, a[0 .. dn-1] */
|
||||
*ap, /* poles (zeros of denominator), ap[0 .. dd-1] */
|
||||
A, /* overall factor */
|
||||
*alpha, /* coefficients of partial fraction, alpha[0 .. da-1] */
|
||||
*beta, /* coefficients of continued fraction, beta[0 .. db-1] */
|
||||
*gamma, /* zeros of numerator of T in Cayley form */
|
||||
Delta, /* maximum error, |R(x) - sgn(x)| <= Delta */
|
||||
epsilon; /* minimum x value, epsilon < |x| < 1 */
|
||||
int n, /* approximation degree */
|
||||
type, /* 0: R(0) = 0, 1: R(0) = infinity */
|
||||
dn, dd, da, db, /* number of elements of a, ap, alpha, and beta */
|
||||
deg_num, /* degree of numerator = deg_denom +/- 1 */
|
||||
deg_denom; /* degree of denominator */
|
||||
} ZOLOTAREV_DATA;
|
||||
|
||||
#ifndef ZOLOTAREV_INTERNAL
|
||||
|
||||
/* zolotarev(epsilon, n, type) returns a pointer to an initialised
|
||||
* zolotarev_data structure. The arguments must satisfy the constraints that
|
||||
* epsilon > 0, n > 0, and type = 0 or 1. */
|
||||
|
||||
ZOLOTAREV_DATA* higham(PRECISION epsilon, int n) ;
|
||||
ZOLOTAREV_DATA* zolotarev(PRECISION epsilon, int n, int type);
|
||||
void zolotarev_free(zolotarev_data *zdata);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}}
|
||||
#endif
|
206
Grid/algorithms/approx/bigfloat.h
Normal file
206
Grid/algorithms/approx/bigfloat.h
Normal file
@ -0,0 +1,206 @@
|
||||
/*
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
bigfloat.h
|
||||
|
||||
Simple C++ wrapper for multiprecision datatype used by AlgRemez
|
||||
algorithm
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_BIGFLOAT_H
|
||||
#define INCLUDED_BIGFLOAT_H
|
||||
|
||||
|
||||
#include <gmp.h>
|
||||
#include <mpf2mpfr.h>
|
||||
#include <mpfr.h>
|
||||
class bigfloat {
|
||||
|
||||
private:
|
||||
|
||||
mpf_t x;
|
||||
|
||||
public:
|
||||
|
||||
bigfloat() { mpf_init(x); }
|
||||
bigfloat(const bigfloat& y) { mpf_init_set(x, y.x); }
|
||||
bigfloat(const unsigned long u) { mpf_init_set_ui(x, u); }
|
||||
bigfloat(const long i) { mpf_init_set_si(x, i); }
|
||||
bigfloat(const int i) {mpf_init_set_si(x,(long)i);}
|
||||
bigfloat(const float d) { mpf_init_set_d(x, (double)d); }
|
||||
bigfloat(const double d) { mpf_init_set_d(x, d); }
|
||||
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
|
||||
~bigfloat(void) { mpf_clear(x); }
|
||||
operator double (void) const { return (double)mpf_get_d(x); }
|
||||
static void setDefaultPrecision(unsigned long dprec) {
|
||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||
mpf_set_default_prec(bprec);
|
||||
}
|
||||
|
||||
void setPrecision(unsigned long dprec) {
|
||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||
mpf_set_prec(x,bprec);
|
||||
}
|
||||
|
||||
unsigned long getPrecision(void) const { return mpf_get_prec(x); }
|
||||
|
||||
unsigned long getDefaultPrecision(void) const { return mpf_get_default_prec(); }
|
||||
|
||||
bigfloat& operator=(const bigfloat& y) {
|
||||
mpf_set(x, y.x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const unsigned long y) {
|
||||
mpf_set_ui(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const signed long y) {
|
||||
mpf_set_si(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const float y) {
|
||||
mpf_set_d(x, (double)y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const double y) {
|
||||
mpf_set_d(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t write(void);
|
||||
size_t read(void);
|
||||
|
||||
/* Arithmetic Functions */
|
||||
|
||||
bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
|
||||
bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
|
||||
bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
|
||||
bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_add(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_add_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_sub(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_ui_sub(a.x,x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_sub_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x) {
|
||||
bigfloat a;
|
||||
mpf_neg(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_mul(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_mul_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
mpf_div(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const unsigned long x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
mpf_ui_div(a.x,x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const unsigned long y){
|
||||
bigfloat a;
|
||||
mpf_div_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
mpf_sqrt(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const unsigned long x){
|
||||
bigfloat a;
|
||||
mpf_sqrt_ui(a.x,x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat abs_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
mpf_abs(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, long power) {
|
||||
bigfloat b;
|
||||
mpf_pow_ui(b.x,a.x,power);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
|
||||
bigfloat b;
|
||||
mpfr_pow(b.x,a.x,power.x,GMP_RNDN);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat exp_bf(const bigfloat& a) {
|
||||
bigfloat b;
|
||||
mpfr_exp(b.x,a.x,GMP_RNDN);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Comparison Functions */
|
||||
|
||||
friend int operator>(const bigfloat& x, const bigfloat& y) {
|
||||
int test;
|
||||
test = mpf_cmp(x.x,y.x);
|
||||
if (test > 0) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
friend int operator<(const bigfloat& x, const bigfloat& y) {
|
||||
int test;
|
||||
test = mpf_cmp(x.x,y.x);
|
||||
if (test < 0) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
friend int sgn(const bigfloat&);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
189
Grid/algorithms/approx/bigfloat_double.h
Normal file
189
Grid/algorithms/approx/bigfloat_double.h
Normal file
@ -0,0 +1,189 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/bigfloat_double.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <math.h>
|
||||
|
||||
typedef double mfloat;
|
||||
class bigfloat {
|
||||
private:
|
||||
|
||||
mfloat x;
|
||||
|
||||
public:
|
||||
|
||||
bigfloat() { }
|
||||
bigfloat(const bigfloat& y) { x=y.x; }
|
||||
bigfloat(const unsigned long u) { x=u; }
|
||||
bigfloat(const long i) { x=i; }
|
||||
bigfloat(const int i) { x=i;}
|
||||
bigfloat(const float d) { x=d;}
|
||||
bigfloat(const double d) { x=d;}
|
||||
bigfloat(const char *str) { x=std::stod(std::string(str));}
|
||||
~bigfloat(void) { }
|
||||
operator double (void) const { return (double)x; }
|
||||
static void setDefaultPrecision(unsigned long dprec) {
|
||||
}
|
||||
|
||||
void setPrecision(unsigned long dprec) {
|
||||
}
|
||||
|
||||
unsigned long getPrecision(void) const { return 64; }
|
||||
unsigned long getDefaultPrecision(void) const { return 64; }
|
||||
|
||||
bigfloat& operator=(const bigfloat& y) { x=y.x; return *this; }
|
||||
bigfloat& operator=(const unsigned long y) { x=y; return *this; }
|
||||
bigfloat& operator=(const signed long y) { x=y; return *this; }
|
||||
bigfloat& operator=(const float y) { x=y; return *this; }
|
||||
bigfloat& operator=(const double y) { x=y; return *this; }
|
||||
|
||||
size_t write(void);
|
||||
size_t read(void);
|
||||
|
||||
/* Arithmetic Functions */
|
||||
|
||||
bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
|
||||
bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
|
||||
bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
|
||||
bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x+y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
a.x=x.x+y;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x-y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
|
||||
bigfloat bx(x);
|
||||
return bx-y;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat by(y);
|
||||
return x-by;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x) {
|
||||
bigfloat a;
|
||||
a.x=-x.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x*y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
a.x=x.x*y;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
a.x=x.x/y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const unsigned long x, const bigfloat& y){
|
||||
bigfloat bx(x);
|
||||
return bx/y;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const unsigned long y){
|
||||
bigfloat by(y);
|
||||
return x/by;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
a.x= sqrt(x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const unsigned long x){
|
||||
bigfloat a(x);
|
||||
return sqrt_bf(a);
|
||||
}
|
||||
|
||||
friend bigfloat abs_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
a.x=fabs(x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, long power) {
|
||||
bigfloat b;
|
||||
b.x=pow(a.x,power);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
|
||||
bigfloat b;
|
||||
b.x=pow(a.x,power.x);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat exp_bf(const bigfloat& a) {
|
||||
bigfloat b;
|
||||
b.x=exp(a.x);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Comparison Functions */
|
||||
friend int operator>(const bigfloat& x, const bigfloat& y) {
|
||||
return x.x>y.x;
|
||||
}
|
||||
|
||||
friend int operator<(const bigfloat& x, const bigfloat& y) {
|
||||
return x.x<y.x;
|
||||
}
|
||||
|
||||
friend int sgn(const bigfloat& x) {
|
||||
if ( x.x>=0 ) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
/* Miscellaneous Functions */
|
||||
|
||||
// friend bigfloat& random(void);
|
||||
};
|
||||
|
||||
|
397
Grid/algorithms/iterative/AdefGeneric.h
Normal file
397
Grid/algorithms/iterative/AdefGeneric.h
Normal file
@ -0,0 +1,397 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/AdefGeneric.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
|
||||
/*
|
||||
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
||||
* Script A = SolverMatrix
|
||||
* Script P = Preconditioner
|
||||
*
|
||||
* Deflation methods considered
|
||||
* -- Solve P A x = P b [ like Luscher ]
|
||||
* DEF-1 M P A x = M P b [i.e. left precon]
|
||||
* DEF-2 P^T M A x = P^T M b
|
||||
* ADEF-1 Preconditioner = M P + Q [ Q + M + M A Q]
|
||||
* ADEF-2 Preconditioner = P^T M + Q
|
||||
* BNN Preconditioner = P^T M P + Q
|
||||
* BNN2 Preconditioner = M P + P^TM +Q - M P A M
|
||||
*
|
||||
* Implement ADEF-2
|
||||
*
|
||||
* Vstart = P^Tx + Qb
|
||||
* M1 = P^TM + Q
|
||||
* M2=M3=1
|
||||
* Vout = x
|
||||
*/
|
||||
|
||||
// abstract base
|
||||
template<class Field, class CoarseField>
|
||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
int verbose;
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
const int mmax = 5;
|
||||
GridBase *grid;
|
||||
GridBase *coarsegrid;
|
||||
|
||||
LinearOperatorBase<Field> *_Linop
|
||||
OperatorFunction<Field> *_Smoother,
|
||||
LinearFunction<CoarseField> *_CoarseSolver;
|
||||
|
||||
// Need somthing that knows how to get from Coarse to fine and back again
|
||||
|
||||
// more most opertor functions
|
||||
TwoLevelFlexiblePcg(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> *Linop,
|
||||
LinearOperatorBase<Field> *SmootherLinop,
|
||||
OperatorFunction<Field> *Smoother,
|
||||
OperatorFunction<CoarseField> CoarseLinop
|
||||
) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
_Linop(Linop),
|
||||
_PreconditionerLinop(PrecLinop),
|
||||
_Preconditioner(Preconditioner)
|
||||
{
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
// The Pcg routine is common to all, but the various matrices differ from derived
|
||||
// implementation to derived implmentation
|
||||
void operator() (const Field &src, Field &psi){
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
grid = src._grid;
|
||||
|
||||
RealD f;
|
||||
RealD rtzp,rtz,a,d,b;
|
||||
RealD rptzp;
|
||||
RealD tn;
|
||||
RealD guess = norm2(psi);
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = ssq*Tolerance*Tolerance;
|
||||
|
||||
/////////////////////////////
|
||||
// Set up history vectors
|
||||
/////////////////////////////
|
||||
std::vector<Field> p (mmax,grid);
|
||||
std::vector<Field> mmp(mmax,grid);
|
||||
std::vector<RealD> pAp(mmax);
|
||||
|
||||
Field x (grid); x = psi;
|
||||
Field z (grid);
|
||||
Field tmp(grid);
|
||||
Field r (grid);
|
||||
Field mu (grid);
|
||||
|
||||
//////////////////////////
|
||||
// x0 = Vstart -- possibly modify guess
|
||||
//////////////////////////
|
||||
x=src;
|
||||
Vstart(x,src);
|
||||
|
||||
// r0 = b -A x0
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||
|
||||
//////////////////////////////////
|
||||
// Compute z = M1 x
|
||||
//////////////////////////////////
|
||||
M1(r,z,tmp,mp,SmootherMirs);
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
///////////////////////////////////////
|
||||
// Solve for Mss mu = P A z and set p = z-mu
|
||||
// Def2: p = 1 - Q Az = Pright z
|
||||
// Other algos M2 is trivial
|
||||
///////////////////////////////////////
|
||||
M2(z,p[0]);
|
||||
|
||||
for (int k=0;k<=MaxIterations;k++){
|
||||
|
||||
int peri_k = k % mmax;
|
||||
int peri_kp = (k+1) % mmax;
|
||||
|
||||
rtz=rtzp;
|
||||
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
|
||||
a = rtz/d;
|
||||
|
||||
// Memorise this
|
||||
pAp[peri_k] = d;
|
||||
|
||||
axpy(x,a,p[peri_k],x);
|
||||
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||
|
||||
// Compute z = M x
|
||||
M1(r,z,tmp,mp);
|
||||
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||
|
||||
p[peri_kp]=p[peri_k];
|
||||
|
||||
// Standard search direction p -> z + b p ; b =
|
||||
b = (rtzp)/rtz;
|
||||
|
||||
int northog;
|
||||
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
|
||||
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
||||
|
||||
for(int back=0; back < northog; back++){
|
||||
int peri_back = (k-back)%mmax;
|
||||
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
|
||||
RealD beta = -pbApk/pAp[peri_back];
|
||||
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
|
||||
}
|
||||
|
||||
RealD rrn=sqrt(rn/ssq);
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if ( rn <= rsq ) {
|
||||
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
axpy(tmp,-1.0,src,mmp[0]);
|
||||
|
||||
RealD psinorm = sqrt(norm2(x));
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD tmpnorm = sqrt(norm2(tmp));
|
||||
RealD true_residual = tmpnorm/srcnorm;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
||||
return k;
|
||||
}
|
||||
}
|
||||
// Non-convergence
|
||||
assert(0);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
virtual void M(Field & in,Field & out,Field & tmp) {
|
||||
|
||||
}
|
||||
|
||||
virtual void M1(Field & in, Field & out) {// the smoother
|
||||
|
||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||
Field tmp(grid);
|
||||
Field Min(grid);
|
||||
|
||||
PcgM(in,Min); // Smoother call
|
||||
|
||||
HermOp(Min,out);
|
||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||
|
||||
ProjectToSubspace(tmp,PleftProj);
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||
}
|
||||
|
||||
virtual void M2(const Field & in, Field & out) {
|
||||
out=in;
|
||||
// Must override for Def2 only
|
||||
// case PcgDef2:
|
||||
// Pright(in,out);
|
||||
// break;
|
||||
}
|
||||
|
||||
virtual RealD M3(const Field & p, Field & mmp){
|
||||
double d,dd;
|
||||
HermOpAndNorm(p,mmp,d,dd);
|
||||
return dd;
|
||||
// Must override for Def1 only
|
||||
// case PcgDef1:
|
||||
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
|
||||
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
|
||||
// Pleft(mp,mmp);
|
||||
// d=real(linop_d->inner(p,mmp));
|
||||
}
|
||||
|
||||
virtual void VstartDef2(Field & xconst Field & src){
|
||||
//case PcgDef2:
|
||||
//case PcgAdef2:
|
||||
//case PcgAdef2f:
|
||||
//case PcgV11f:
|
||||
///////////////////////////////////
|
||||
// Choose x_0 such that
|
||||
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
||||
// = [1 - Ass_inv A] Guess + Assinv src
|
||||
// = P^T guess + Assinv src
|
||||
// = Vstart [Tang notation]
|
||||
// This gives:
|
||||
// W^T (src - A x_0) = src_s - A guess_s - r_s
|
||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||
// = 0
|
||||
///////////////////////////////////
|
||||
Field r(grid);
|
||||
Field mmp(grid);
|
||||
|
||||
HermOp(x,mmp);
|
||||
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
||||
ProjectToSubspace(r,PleftProj);
|
||||
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
PromoteFromSubspace(PleftMss_proj,mmp);
|
||||
x=x+mmp;
|
||||
|
||||
}
|
||||
|
||||
virtual void Vstart(Field & x,const Field & src){
|
||||
return;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Only Def1 has non-trivial Vout. Override in Def1
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
virtual void Vout (Field & in, Field & out,Field & src){
|
||||
out = in;
|
||||
//case PcgDef1:
|
||||
// //Qb + PT x
|
||||
// ProjectToSubspace(src,PleftProj);
|
||||
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
// PromoteFromSubspace(PleftMss_proj,tmp);
|
||||
//
|
||||
// Pright(in,out);
|
||||
//
|
||||
// linop_d->axpy(out,tmp,out,1.0);
|
||||
// break;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Pright and Pleft are common to all implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void Pright(Field & in,Field & out){
|
||||
// P_R = [ 1 0 ]
|
||||
// [ -Mss^-1 Msb 0 ]
|
||||
Field in_sbar(grid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
HermOp(in_sbar,out);
|
||||
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
|
||||
|
||||
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||
PromoteFromSubspace(PleftMss_proj,out); //
|
||||
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
||||
}
|
||||
virtual void Pleft (Field & in,Field & out){
|
||||
// P_L = [ 1 -Mbs Mss^-1]
|
||||
// [ 0 0 ]
|
||||
Field in_sbar(grid);
|
||||
Field tmp2(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||
PromoteFromSubspace(PleftMss_proj,out);
|
||||
|
||||
HermOp(out,Mtmp);
|
||||
|
||||
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
|
||||
PromoteFromSubspace(PleftProj,tmp2);
|
||||
|
||||
axpy(out,-1.0,tmp2,Mtmp);
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp){
|
||||
|
||||
}
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
|
||||
|
||||
}
|
||||
virtual void M2(Field & in, Field & out){
|
||||
|
||||
}
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
}
|
||||
/*
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
*/
|
||||
#endif
|
606
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
606
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
@ -0,0 +1,606 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
int blockDim ;
|
||||
int Nblock;
|
||||
|
||||
BlockCGtype CGtype;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
//
|
||||
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
||||
//
|
||||
// Q C = R => Q = R C^{-1}
|
||||
//
|
||||
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
||||
//
|
||||
// Set C = L^{dag}, and then Q^dag Q = ident
|
||||
//
|
||||
// Checks:
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
#if 0
|
||||
std::cout << " Calling Cholesky ldlt on m_rr " << m_rr <<std::endl;
|
||||
Eigen::MatrixXcd L_ldlt = m_rr.ldlt().matrixL();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << L_ldlt <<std::endl;
|
||||
auto D_ldlt = m_rr.ldlt().vectorD();
|
||||
std::cout << " Called Cholesky ldlt on m_rr " << D_ldlt <<std::endl;
|
||||
#endif
|
||||
|
||||
// std::cout << " Calling Cholesky llt on m_rr " <<std::endl;
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
// std::cout << " Called Cholesky llt on m_rr " << L <<std::endl;
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Q = R C^{-1}
|
||||
//
|
||||
// Q_j = R_i Cinv(i,j)
|
||||
//
|
||||
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == BlockCG ) {
|
||||
BlockCGsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
Field Q(B);
|
||||
Field D(B);
|
||||
Field Z(B);
|
||||
Field AD(B);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,B,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,B,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,X,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
|
||||
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
||||
************************************************************************
|
||||
* O'Leary : R = B - A X
|
||||
* O'Leary : P = M R ; preconditioner M = 1
|
||||
* O'Leary : alpha = PAP^{-1} RMR
|
||||
* O'Leary : beta = RMR^{-1}_old RMR_new
|
||||
* O'Leary : X=X+Palpha
|
||||
* O'Leary : R_new=R_old-AP alpha
|
||||
* O'Leary : P=MR_new+P beta
|
||||
*/
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_pAp_inv = m_pAp.inverse();
|
||||
m_alpha = m_pAp_inv * m_rr ;
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
m_rr_inv = m_rr.inverse();
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_beta = m_rr_inv *m_rr;
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
P= AP;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
RealD rr;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim
|
||||
Nblock = Src._grid->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
std::vector<ComplexD> v_pAp(Nblock);
|
||||
std::vector<RealD> v_rr (Nblock);
|
||||
std::vector<RealD> v_rr_inv(Nblock);
|
||||
std::vector<RealD> v_alpha(Nblock);
|
||||
std::vector<RealD> v_beta(Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch sliceNormTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductVector(v_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
|
||||
}
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_rr_inv[b] = 1.0/v_rr[b];
|
||||
}
|
||||
sliceNormTimer.Start();
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
sliceNormTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_beta[b] = v_rr_inv[b] *v_rr[b];
|
||||
}
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(P,v_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
RealD rr = v_rr[b]/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
177
Grid/algorithms/iterative/ConjugateGradient.h
Normal file
177
Grid/algorithms/iterative/ConjugateGradient.h
Normal file
@ -0,0 +1,177 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||
#define GRID_CONJUGATE_GRADIENT_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Field>
|
||||
class ConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
Field p(src);
|
||||
Field mmp(src);
|
||||
Field r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(p, mmp);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
InnerTimer.Start();
|
||||
ComplexD dc = innerProduct(p,mmp);
|
||||
InnerTimer.Stop();
|
||||
d = dc.real();
|
||||
a = c / d;
|
||||
|
||||
AxpyNormTimer.Start();
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
AxpyNormTimer.Stop();
|
||||
b = cp / c;
|
||||
|
||||
LinearCombTimer.Start();
|
||||
parallel_for(int ss=0;ss<src._grid->oSites();ss++){
|
||||
vstream(psi[ss], a * p[ss] + psi[ss]);
|
||||
vstream(p [ss], b * p[ss] + r[ss]);
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
SolverTimer.Stop();
|
||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
154
Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
154
Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
@ -0,0 +1,154 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//Mixed precision restarted defect correction CG
|
||||
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||
|
||||
void useGuesser(LinearFunction<FieldF> &g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.checkerboard;
|
||||
sol_d.checkerboard = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in._grid;
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.checkerboard = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.checkerboard = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.checkerboard = cb;
|
||||
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.checkerboard = cb;
|
||||
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
//Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.HermOp(sol_d, tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||
|
||||
if(norm < OuterLoopNormMult * stop){
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||
break;
|
||||
}
|
||||
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
zeroit(sol_f);
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL)
|
||||
(*guesser)(src_f, sol_f);
|
||||
|
||||
//Inner CG
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
322
Grid/algorithms/iterative/ConjugateGradientMultiShift.h
Normal file
322
Grid/algorithms/iterative/ConjugateGradientMultiShift.h
Normal file
@ -0,0 +1,322 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateGradientMultiShift : public OperatorMultiFunction<Field>,
|
||||
public OperatorFunction<Field>
|
||||
{
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
shifts(_shifts)
|
||||
{
|
||||
verbose=1;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||
{
|
||||
GridBase *grid = src._grid;
|
||||
int nshift = shifts.order;
|
||||
std::vector<Field> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
||||
{
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
std::vector<Field> ps(nshift,grid);// Search directions
|
||||
|
||||
assert(psi.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
Field r(grid);
|
||||
Field p(grid);
|
||||
Field tmp(grid);
|
||||
Field mmp(grid);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src);
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
|
||||
<<" target resid "<<rsq[s]<<std::endl;
|
||||
ps[s] = src;
|
||||
}
|
||||
// r and p for primary
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
//MdagM+m[0]
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
// have verified that inner product of
|
||||
// p and mmp is equal to d after this since
|
||||
// the d computation is tricky
|
||||
// qq = real(innerProduct(p,mmp));
|
||||
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
// for avoiding linear algebra in "single" case.
|
||||
//
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps[s],a,ps[s],r);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps[s],z[s][iz],as,r,ps[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
// Scope for optimisation here in case of "single".
|
||||
// Could load psi[0] and pull all ps[s] in.
|
||||
// if ( single ) ss=primary;
|
||||
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
|
||||
// Pipelined CG gain:
|
||||
//
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
// New Kernel: Load psi[0], vector of coeffs, vector of pointers ps
|
||||
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
|
||||
// on ps[s].
|
||||
// Before: 3 x npole + 3 x npole
|
||||
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
|
||||
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi[ss],-bs[s]*alpha[s],ps[s],psi[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsq[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop.HermOpAndNorm(psi[s],mmp,d,qq);
|
||||
axpy(tmp,mass[s],psi[s],mmp);
|
||||
axpy(r,-alpha[s],src,tmp);
|
||||
RealD rn = norm2(r);
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
256
Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
256
Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
@ -0,0 +1,256 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer ReliableUpdatesPerformed;
|
||||
|
||||
bool DoFinalCleanup; //Final DP cleanup, defaults to true
|
||||
Integer IterationsToCleanup; //Final DP cleanup step iterations
|
||||
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
GridBase* SinglePrecGrid;
|
||||
RealD Delta; //reliable update parameter
|
||||
|
||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||
RealD fallback_transition_tol;
|
||||
|
||||
|
||||
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Delta(_delta),
|
||||
Linop_f(_Linop_f),
|
||||
Linop_d(_Linop_d),
|
||||
SinglePrecGrid(_sp_grid),
|
||||
ErrorOnNoConverge(err_on_no_conv),
|
||||
DoFinalCleanup(true),
|
||||
Linop_fallback(NULL)
|
||||
{};
|
||||
|
||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||
Linop_fallback = &_Linop_fallback;
|
||||
fallback_transition_tol = _fallback_transition_tol;
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
psi.checkerboard = src.checkerboard;
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
FieldD p(src);
|
||||
FieldD mmp(src);
|
||||
FieldD r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
//Single prec initialization
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
r_f.checkerboard = r.checkerboard;
|
||||
precisionChange(r_f, r);
|
||||
|
||||
FieldF psi_f(r_f);
|
||||
psi_f = zero;
|
||||
|
||||
FieldF p_f(r_f);
|
||||
FieldF mmp_f(r_f);
|
||||
|
||||
RealD MaxResidSinceLastRelUp = cp; //initial residual
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k = 0;
|
||||
int l = 0;
|
||||
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
cp = axpy_norm(r_f, -a, mmp_f, r_f);
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi_f = a * p_f + psi_f;
|
||||
//p_f = p_f * b + r_f;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
if(cp > MaxResidSinceLastRelUp){
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
//Although not written in the paper, I assume that I have to add on the final solution
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD resnorm = sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
|
||||
if(DoFinalCleanup){
|
||||
//Do a final CG to cleanup
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
|
||||
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
|
||||
CG.ErrorOnNoConverge = ErrorOnNoConverge;
|
||||
CG(Linop_d,src,psi);
|
||||
IterationsToCleanup = CG.IterationsToComplete;
|
||||
}
|
||||
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
|
||||
return;
|
||||
}
|
||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
r = src - mmp;
|
||||
|
||||
psi_f = zero;
|
||||
precisionChange(r_f, r);
|
||||
cp = norm2(r);
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
|
||||
b = cp/c;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
|
||||
|
||||
l = l+1;
|
||||
}
|
||||
|
||||
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
|
||||
|
||||
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
|
||||
Linop_f_use = Linop_fallback;
|
||||
using_fallback = true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
111
Grid/algorithms/iterative/ConjugateResidual.h
Normal file
111
Grid/algorithms/iterative/ConjugateResidual.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_CONJUGATE_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
|
||||
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b, c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
psi=zero;
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid);
|
||||
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=1;k<MaxIterations;k++){
|
||||
|
||||
a = rAr/pAAp;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
|
||||
cp = axpy_norm(r,-a,Ap,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual "<<sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
112
Grid/algorithms/iterative/Deflation.h
Normal file
112
Grid/algorithms/iterative/Deflation.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class Guesser {
|
||||
public:
|
||||
Guesser(void) = default;
|
||||
virtual ~Guesser(void) = default;
|
||||
virtual void operator()(const Field &src, Field &guess) = 0;
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public Guesser<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = zero; };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public Guesser<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public Guesser<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = zero;
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.checkerboard = src.checkerboard;
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public Guesser<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0]._grid);
|
||||
CoarseField guess_coarse(evec_coarse[0]._grid); guess_coarse = zero;
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.checkerboard = src.checkerboard;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
842
Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
842
Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
@ -0,0 +1,842 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BIRL_H
|
||||
#define GRID_BIRL_H
|
||||
|
||||
#include <string.h> //memset
|
||||
//#include <zlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Move following 100 LOC to lattice/Lattice_basis.h
|
||||
////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0]._grid;
|
||||
|
||||
parallel_region
|
||||
{
|
||||
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
|
||||
parallel_for_internal(int ss=0;ss < grid->oSites();ss++){
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis[k]._odata[ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis[j]._odata[ss] = B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0]._grid;
|
||||
|
||||
result.checkerboard = basis[0].checkerboard;
|
||||
parallel_for(int ss=0;ss < grid->oSites();ss++){
|
||||
vobj B = zero;
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B +=Qt(j,k) * basis[k]._odata[ss];
|
||||
}
|
||||
result._odata[ss] = B;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class ImplicitlyRestartedLanczosTester
|
||||
{
|
||||
public:
|
||||
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
};
|
||||
|
||||
enum IRLdiagonalisation {
|
||||
IRLdiagonaliseWithDSTEGR,
|
||||
IRLdiagonaliseWithQR,
|
||||
IRLdiagonaliseWithEigen
|
||||
};
|
||||
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
return TestConvergence(j,resid,B,eval,evalMaxApprox);
|
||||
}
|
||||
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
Field v(B);
|
||||
RealD eval_poly = eval;
|
||||
// Apply operator
|
||||
_HermOp(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class ImplicitlyRestartedLanczos {
|
||||
private:
|
||||
const RealD small = 1.0e-8;
|
||||
int MaxIter;
|
||||
int MinRestart; // Minimum number of restarts; only check for convergence after
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
|
||||
int Nm; // Nm -- total number of vectors
|
||||
IRLdiagonalisation diagonalisation;
|
||||
int orth_period;
|
||||
|
||||
RealD OrthoTime;
|
||||
RealD eresid, betastp;
|
||||
////////////////////////////////
|
||||
// Embedded objects
|
||||
////////////////////////////////
|
||||
LinearFunction<Field> &_PolyOp;
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosTester<Field> &_Tester;
|
||||
// Default tester provided (we need a ref to something in default case)
|
||||
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Too many options & knobs.
|
||||
// Eliminate:
|
||||
// orth_period
|
||||
// betastp
|
||||
// MinRestart
|
||||
//
|
||||
// Do we really need orth_period
|
||||
// What is the theoretical basis & guarantees of betastp ?
|
||||
// Nstop=Nk viable?
|
||||
// MinRestart avoidable with new convergence test?
|
||||
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
|
||||
// HermOp could be eliminated if we dropped the Power method for max eval.
|
||||
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
|
||||
//////////////////////////////////////////////////////////////////
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
ImplicitlyRestartedLanczosTester<Field> & Tester,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
////////////////////////////////
|
||||
// Helpers
|
||||
////////////////////////////////
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
|
||||
{
|
||||
OrthoTime-=usecond()/1e6;
|
||||
basisOrthogonalize(evec,w,k);
|
||||
normalise(w);
|
||||
OrthoTime+=usecond()/1e6;
|
||||
}
|
||||
|
||||
/* Rudy Arthur's thesis pp.137
|
||||
------------------------
|
||||
Require: M > K P = M − K †
|
||||
Compute the factorization AVM = VM HM + fM eM
|
||||
repeat
|
||||
Q=I
|
||||
for i = 1,...,P do
|
||||
QiRi =HM −θiI Q = QQi
|
||||
H M = Q †i H M Q i
|
||||
end for
|
||||
βK =HM(K+1,K) σK =Q(M,K)
|
||||
r=vK+1βK +rσK
|
||||
VK =VM(1:M)Q(1:M,1:K)
|
||||
HK =HM(1:K,1:K)
|
||||
→AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
|
||||
until convergence
|
||||
*/
|
||||
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
|
||||
{
|
||||
GridBase *grid = src._grid;
|
||||
assert(grid == evec[0]._grid);
|
||||
|
||||
GridLogIRL.TimingMode(1);
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
|
||||
}
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
|
||||
assert(Nm <= evec.size() && Nm <= eval.size());
|
||||
|
||||
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
|
||||
RealD evalMaxApprox = 0.0;
|
||||
{
|
||||
auto src_n = src;
|
||||
auto tmp = src;
|
||||
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
||||
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
||||
normalise(src_n);
|
||||
_HermOp(src_n,tmp);
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
|
||||
i=_MAX_ITER_IRL_MEVAPP_;
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
src_n = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<RealD> lme(Nm);
|
||||
std::vector<RealD> lme2(Nm);
|
||||
std::vector<RealD> eval2(Nm);
|
||||
std::vector<RealD> eval2_copy(Nm);
|
||||
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
|
||||
|
||||
Field f(grid);
|
||||
Field v(grid);
|
||||
int k1 = 1;
|
||||
int k2 = Nk;
|
||||
RealD beta_k;
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
// Set initial vector
|
||||
evec[0] = src;
|
||||
normalise(evec[0]);
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime=0.;
|
||||
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// Restarting loop begins
|
||||
//////////////////////////////////
|
||||
int iter;
|
||||
for(iter = 0; iter<MaxIter; ++iter){
|
||||
|
||||
OrthoTime=0.;
|
||||
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
|
||||
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
|
||||
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
f *= lme[Nm-1];
|
||||
|
||||
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// getting eigenvalues
|
||||
//////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k+k1-1];
|
||||
lme2[k] = lme[k+k1-1];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// sorting
|
||||
//////////////////////////////////
|
||||
eval2_copy = eval2;
|
||||
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
|
||||
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
|
||||
const int chunk=8;
|
||||
for(int io=0; io<k2;io+=chunk){
|
||||
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
|
||||
for(int ii=0;ii<chunk;ii++){
|
||||
if ( (io+ii)<k2 )
|
||||
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Implicitly shifted QR transformations
|
||||
//////////////////////////////////
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
for(int ip=k2; ip<Nm; ++ip){
|
||||
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
||||
}
|
||||
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
|
||||
|
||||
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
|
||||
|
||||
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
|
||||
std::cout<<GridLogIRL <<"basisRotated by Qt"<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Compressed vector f and beta(k2)
|
||||
////////////////////////////////////////////////////
|
||||
f *= Qt(k2-1,Nm-1);
|
||||
f += lme[k2-1] * evec[k2];
|
||||
beta_k = norm2(f);
|
||||
beta_k = sqrt(beta_k);
|
||||
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
|
||||
|
||||
RealD betar = 1.0/beta_k;
|
||||
evec[k2] = betar * f;
|
||||
lme[k2-1] = beta_k;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Convergence test
|
||||
////////////////////////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k];
|
||||
lme2[k] = lme[k];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
|
||||
|
||||
Nconv = 0;
|
||||
if (iter >= MinRestart) {
|
||||
|
||||
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
|
||||
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
{
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
||||
if( Nconv>=Nstop){
|
||||
goto converged;
|
||||
}
|
||||
|
||||
} else {
|
||||
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
|
||||
} // end of iter loop
|
||||
}
|
||||
|
||||
std::cout<<GridLogError<<"\n NOT converged.\n";
|
||||
abort();
|
||||
|
||||
converged:
|
||||
{
|
||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
|
||||
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
|
||||
Nconv=0;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Full final convergence test; unconditionally applied
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
for(int j = 0; j<=Nk; j++){
|
||||
B=evec[j];
|
||||
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
|
||||
Nconv++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nconv < Nstop )
|
||||
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
|
||||
|
||||
eval=eval2;
|
||||
|
||||
//Keep only converged
|
||||
eval.resize(Nconv);// Nstop?
|
||||
evec.resize(Nconv,grid);// Nstop?
|
||||
basisSortInPlace(evec,eval,reverse);
|
||||
|
||||
}
|
||||
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
|
||||
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
|
||||
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
/* Saad PP. 195
|
||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
||||
2. For k = 1,2,...,m Do:
|
||||
3. wk:=Avk−βkv_{k−1}
|
||||
4. αk:=(wk,vk) //
|
||||
5. wk:=wk−αkvk // wk orthog vk
|
||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
7. vk+1 := wk/βk+1
|
||||
8. EndDo
|
||||
*/
|
||||
void step(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
std::vector<Field>& evec,
|
||||
Field& w,int Nm,int k)
|
||||
{
|
||||
const RealD tiny = 1.0e-20;
|
||||
assert( k< Nm );
|
||||
|
||||
GridStopWatch gsw_op,gsw_o;
|
||||
|
||||
Field& evec_k = evec[k];
|
||||
|
||||
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
|
||||
|
||||
if(k>0) w -= lme[k-1] * evec[k-1];
|
||||
|
||||
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
|
||||
RealD alph = real(zalph);
|
||||
|
||||
w = w - alph * evec_k;// 5. wk:=wk−αkvk
|
||||
|
||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
if (k>0 && k % orth_period == 0) {
|
||||
orthogonalize(w,evec,k); // orthonormalise
|
||||
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
|
||||
}
|
||||
|
||||
if(k < Nm-1) evec[k+1] = w;
|
||||
|
||||
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
||||
if ( beta < tiny )
|
||||
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
|
||||
}
|
||||
|
||||
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt, // Nm x Nm
|
||||
GridBase *grid)
|
||||
{
|
||||
Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
|
||||
|
||||
for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
|
||||
|
||||
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
|
||||
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
|
||||
}
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
for (int j = 0; j < Nk; j++) {
|
||||
Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// File could end here if settle on Eigen ??? !!!
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
void QR_decomp(std::vector<RealD>& lmd, // Nm
|
||||
std::vector<RealD>& lme, // Nm
|
||||
int Nk, int Nm, // Nk, Nm
|
||||
Eigen::MatrixXd& Qt, // Nm x Nm matrix
|
||||
RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin-1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
|
||||
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||
RealD s = -lme[k] *Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
x =-s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 - s*Qtmp2;
|
||||
Qt(k+1,i)= s*Qtmp1 + c*Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for(int k = kmin; k < kmax-1; ++k){
|
||||
|
||||
RealD Fden = 1.0/hypot(x,lme[k-1]);
|
||||
RealD c = lme[k-1]*Fden;
|
||||
RealD s = - x*Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
lme[k-1] = c*lme[k-1] -s*x;
|
||||
|
||||
if(k != kmax-2){
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
}
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 -s*Qtmp2;
|
||||
Qt(k+1,i) = s*Qtmp1 +c*Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
#endif
|
||||
|
||||
void diagonalize_lapack(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd& Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
#ifdef USE_LAPACK
|
||||
const int size = Nm;
|
||||
int NN = Nk;
|
||||
double evals_tmp[NN];
|
||||
double evec_tmp[NN][NN];
|
||||
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i< NN; i++) {
|
||||
for (int j = i - 1; j <= i + 1; j++) {
|
||||
if ( j < NN && j >= 0 ) {
|
||||
if (i==j) DD[i] = lmd[i];
|
||||
if (i==j) evals_tmp[i] = lmd[i];
|
||||
if (j==(i-1)) EE[j] = lme[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
int evals_found;
|
||||
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
|
||||
int liwork = 3+NN*10 ;
|
||||
int iwork[liwork];
|
||||
double work[lwork];
|
||||
int isuppz[2*NN];
|
||||
char jobz = 'V'; // calculate evals & evecs
|
||||
char range = 'I'; // calculate all evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
int info;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN/total)+1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
int il = interval*node+1 , iu = interval*(node+1);
|
||||
if (iu > NN) iu=NN;
|
||||
double tol = 0.0;
|
||||
if (1) {
|
||||
memset(evals_tmp,0,sizeof(double)*NN);
|
||||
if ( il <= NN){
|
||||
LAPACK_dstegr(&jobz, &range, &NN,
|
||||
(double*)DD, (double*)EE,
|
||||
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double*)evec_tmp, &NN,
|
||||
isuppz,
|
||||
work, &lwork, iwork, &liwork,
|
||||
&info);
|
||||
for (int i = iu-1; i>= il-1; i--){
|
||||
evals_tmp[i] = evals_tmp[i - (il-1)];
|
||||
if (il>1) evals_tmp[i-(il-1)]=0.;
|
||||
for (int j = 0; j< NN; j++){
|
||||
evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
|
||||
if (il>1) evec_tmp[i-(il-1)][j]=0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector(evals_tmp,NN);
|
||||
grid->GlobalSumVector((double*)evec_tmp,NN*NN);
|
||||
}
|
||||
}
|
||||
// Safer to sort instead of just reversing it,
|
||||
// but the document of the routine says evals are sorted in increasing order.
|
||||
// qr gives evals in decreasing order.
|
||||
for(int i=0;i<NN;i++){
|
||||
lmd [NN-1-i]=evals_tmp[i];
|
||||
for(int j=0;j<NN;j++){
|
||||
Qt((NN-1-i),j)=evec_tmp[i][j];
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
int QRiter = 100*Nm;
|
||||
int kmin = 1;
|
||||
int kmax = Nk;
|
||||
|
||||
// (this should be more sophisticated)
|
||||
for(int iter=0; iter<QRiter; ++iter){
|
||||
|
||||
// determination of 2x2 leading submatrix
|
||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||
RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||
// (Dsh: shift)
|
||||
|
||||
// transformation
|
||||
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
|
||||
|
||||
// Convergence criterion (redef of kmin and kamx)
|
||||
for(int j=kmax-1; j>= kmin; --j){
|
||||
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||
if(fabs(lme[j-1])+dds > dds){
|
||||
kmax = j+1;
|
||||
goto continued;
|
||||
}
|
||||
}
|
||||
QRiter = iter;
|
||||
return;
|
||||
|
||||
continued:
|
||||
for(int j=0; j<kmax-1; ++j){
|
||||
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||
if(fabs(lme[j])+dds > dds){
|
||||
kmin = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
|
||||
abort();
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
406
Grid/algorithms/iterative/LocalCoherenceLanczos.h
Normal file
406
Grid/algorithms/iterative/LocalCoherenceLanczos.h
Normal file
@ -0,0 +1,406 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
struct LanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||
ChebyParams, Cheby,/*Chebyshev*/
|
||||
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
|
||||
int, Nk, /*Vecs in Lanczos seek converge*/
|
||||
int, Nm, /*Total vecs in Lanczos include restart*/
|
||||
RealD, resid, /*residual*/
|
||||
int, MaxIt,
|
||||
RealD, betastp, /* ? */
|
||||
int, MinRes); // Must restart
|
||||
};
|
||||
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
bool, doCoarseRead,
|
||||
LanczosParams, FineParams,
|
||||
LanczosParams, CoarseParams,
|
||||
ChebyParams, Smoother,
|
||||
RealD , coarse_relax_tol,
|
||||
std::vector<int>, blockSize,
|
||||
std::string, config,
|
||||
std::vector < std::complex<double> >, omega,
|
||||
RealD, mass,
|
||||
RealD, M5);
|
||||
};
|
||||
|
||||
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard= checkerboard;
|
||||
FineField fout(FineGrid); fout.checkerboard = checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = subspace[0]._grid;
|
||||
int checkerboard = subspace[0].checkerboard;
|
||||
|
||||
FineField fin (FineGrid); fin.checkerboard =checkerboard;
|
||||
FineField fout(FineGrid);fout.checkerboard =checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
return conv;
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _subspace[0]._grid;
|
||||
int checkerboard = _subspace[0].checkerboard;
|
||||
FineField fB(FineGrid);fB.checkerboard =checkerboard;
|
||||
FineField fv(FineGrid);fv.checkerboard =checkerboard;
|
||||
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
_Linop.HermOp(fB,fv);
|
||||
|
||||
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
|
||||
RealD vden = norm2(fB);
|
||||
RealD vv0 = norm2(fv);
|
||||
eval = vnum/vden;
|
||||
fv -= eval*fB;
|
||||
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
|
||||
if( (vv<eresid*eresid) ) return 1;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Make serializable Lanczos params
|
||||
////////////////////////////////////////////
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class LocalCoherenceLanczos
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
protected:
|
||||
GridBase *_CoarseGrid;
|
||||
GridBase *_FineGrid;
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = ::sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].checkerboard=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
subspace[k].checkerboard=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
} else {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
assert(nbasis<=Nm);
|
||||
Chebyshev<FineField> Cheby(cheby_parms);
|
||||
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
||||
|
||||
CoarseField src(_CoarseGrid); src=1.0;
|
||||
|
||||
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
int Nconv=0;
|
||||
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
|
||||
assert(Nconv>=Nstop);
|
||||
evals_coarse.resize(Nstop);
|
||||
evec_coarse.resize (Nstop,_CoarseGrid);
|
||||
for (int i=0;i<Nstop;i++){
|
||||
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
60
Grid/algorithms/iterative/NormalEquations.h
Normal file
60
Grid/algorithms/iterative/NormalEquations.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/NormalEquations.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NORMAL_EQUATIONS_H
|
||||
#define GRID_NORMAL_EQUATIONS_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form an NE solver calling a Herm solver
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class NormalEquations : public OperatorFunction<Field>{
|
||||
private:
|
||||
SparseMatrixBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianSolver;
|
||||
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations trick
|
||||
/////////////////////////////////////////////////////
|
||||
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver)
|
||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver) {};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
|
||||
Field src(in._grid);
|
||||
|
||||
_Matrix.Mdag(in,src);
|
||||
_HermitianSolver(src,out); // Mdag M out = Mdag in
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
119
Grid/algorithms/iterative/PrecConjugateResidual.h
Normal file
119
Grid/algorithms/iterative/PrecConjugateResidual.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class PrecConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec)
|
||||
{
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b, c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||
|
||||
psi=zero;
|
||||
r = src;
|
||||
Preconditioner(r,p);
|
||||
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Ar=Ap;
|
||||
rAr=pAp;
|
||||
rAAr=pAAp;
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
|
||||
Preconditioner(Ap,z);
|
||||
RealD rq= real(innerProduct(Ap,z));
|
||||
|
||||
a = rAr/rq;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
cp = axpy_norm(r,-a,z,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual "<<sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
230
Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
230
Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
@ -0,0 +1,230 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_GCR_H
|
||||
#define GRID_PREC_GCR_H
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//VPGCR Abe and Zhang, 2005.
|
||||
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||
// but VPGCR was nicely written up in their paper
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
int mmax;
|
||||
int nstep;
|
||||
int steps;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch MatTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Preconditioner(Prec),
|
||||
mmax(_mmax),
|
||||
nstep(_nstep)
|
||||
{
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
psi=zero;
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
Field r(src._grid);
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
steps=0;
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
cp=GCRnStep(Linop,src,psi,rsq);
|
||||
|
||||
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
Linop.HermOp(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
RealD tr = norm2(r);
|
||||
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual " <<sqrt(tr/ssq)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
||||
|
||||
RealD cp;
|
||||
RealD a, b, c, d;
|
||||
RealD zAz, zAAz;
|
||||
RealD rAq, rq;
|
||||
|
||||
GridBase *grid = src._grid;
|
||||
|
||||
Field r(grid);
|
||||
Field z(grid);
|
||||
Field tmp(grid);
|
||||
Field ttmp(grid);
|
||||
Field Az(grid);
|
||||
|
||||
////////////////////////////////
|
||||
// history for flexible orthog
|
||||
////////////////////////////////
|
||||
std::vector<Field> q(mmax,grid);
|
||||
std::vector<Field> p(mmax,grid);
|
||||
std::vector<RealD> qq(mmax);
|
||||
|
||||
//////////////////////////////////
|
||||
// initial guess x0 is taken as nonzero.
|
||||
// r0=src-A x0 = src
|
||||
//////////////////////////////////
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
r=src-Az;
|
||||
|
||||
/////////////////////
|
||||
// p = Prec(r)
|
||||
/////////////////////
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOp(z,tmp);
|
||||
MatTimer.Stop();
|
||||
|
||||
ttmp=tmp;
|
||||
tmp=tmp-r;
|
||||
|
||||
/*
|
||||
std::cout<<GridLogMessage<<r<<std::endl;
|
||||
std::cout<<GridLogMessage<<z<<std::endl;
|
||||
std::cout<<GridLogMessage<<ttmp<<std::endl;
|
||||
std::cout<<GridLogMessage<<tmp<<std::endl;
|
||||
*/
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
|
||||
//p[0],q[0],qq[0]
|
||||
p[0]= z;
|
||||
q[0]= Az;
|
||||
qq[0]= zAAz;
|
||||
|
||||
cp =norm2(r);
|
||||
|
||||
for(int k=0;k<nstep;k++){
|
||||
|
||||
steps++;
|
||||
|
||||
int kp = k+1;
|
||||
int peri_k = k %mmax;
|
||||
int peri_kp= kp%mmax;
|
||||
|
||||
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
|
||||
a = rq/qq[peri_k];
|
||||
|
||||
axpy(psi,a,p[peri_k],psi);
|
||||
|
||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||
|
||||
if((k==nstep-1)||(cp<rsq)){
|
||||
return cp;
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " <<sqrt(cp/rsq)<<std::endl;
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);// solve Az = r
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
Linop.HermOp(z,tmp);
|
||||
MatTimer.Stop();
|
||||
tmp=tmp-r;
|
||||
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
||||
|
||||
q[peri_kp]=Az;
|
||||
p[peri_kp]=z;
|
||||
|
||||
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||
for(int back=0;back<northog;back++){
|
||||
|
||||
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||
|
||||
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||
|
||||
}
|
||||
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||
|
||||
|
||||
}
|
||||
assert(0); // never reached
|
||||
return cp;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
503
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
503
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
@ -0,0 +1,503 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Now make the norm reflect extra factor of Mee
|
||||
template<class Field> class SchurRedBlackStaggeredSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out, Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
//src_o = tmp; assert(src_o.checkerboard ==Odd);
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
||||
guess(src_o, sol_o);
|
||||
Mtmp = sol_o;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) sol_o = sol_o-Mtmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver,int cb=0, const bool initSubGuess = false) : _HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=cb;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
guess(src_o,sol_o);
|
||||
Mtmp = sol_o;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) sol_o = sol_o-Mtmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagMooee solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve {
|
||||
private:
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix,class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
guess(src_o,tmp);
|
||||
Mtmp = tmp;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) tmp = tmp-Mtmp;
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoMixed {
|
||||
private:
|
||||
LinearFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoMixed(LinearFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise=0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
template<class Matrix>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
template<class Matrix, class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field src_e(grid);
|
||||
Field src_o(grid);
|
||||
Field sol_e(grid);
|
||||
Field sol_o(grid);
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
// _HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
guess(src_o,tmp);
|
||||
Mtmp = tmp;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,tmp); assert(tmp.checkerboard==Odd);
|
||||
// Fionn A2A boolean behavioural control
|
||||
if (subGuess) tmp = tmp-Mtmp;
|
||||
_Matrix.MooeeInv(tmp,sol_o); assert( sol_o.checkerboard ==Odd);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout << GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid " << std::sqrt(nr / ns) << " nr " << nr << " ns " << ns << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
125
Grid/allocator/AlignedAllocator.cc
Normal file
125
Grid/allocator/AlignedAllocator.cc
Normal file
@ -0,0 +1,125 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||
bool MemoryProfiler::debug = false;
|
||||
|
||||
int PointerCache::victim;
|
||||
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return ptr;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<Ncache;e++) {
|
||||
if ( Entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%Ncache;
|
||||
}
|
||||
|
||||
if ( Entries[v].valid ) {
|
||||
ret = Entries[v].address;
|
||||
Entries[v].valid = 0;
|
||||
Entries[v].address = NULL;
|
||||
Entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
Entries[v].address=ptr;
|
||||
Entries[v].bytes =bytes;
|
||||
Entries[v].valid =1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
|
||||
#ifdef _OPENMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
for(int e=0;e<Ncache;e++){
|
||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||
Entries[e].valid = 0;
|
||||
return Entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||
{
|
||||
#ifdef __linux__
|
||||
int fd = open("/proc/self/pagemap", O_RDONLY);
|
||||
assert(fd >= 0);
|
||||
const int page_size = 4096;
|
||||
uint64_t virt_pfn = (uint64_t)Buf / page_size;
|
||||
off_t offset = sizeof(uint64_t) * virt_pfn;
|
||||
uint64_t npages = (BYTES + page_size-1) / page_size;
|
||||
uint64_t pagedata[npages];
|
||||
uint64_t ret = lseek(fd, offset, SEEK_SET);
|
||||
assert(ret == offset);
|
||||
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
|
||||
assert(ret == sizeof(uint64_t) * npages);
|
||||
int nhugepages = npages / 512;
|
||||
int n4ktotal, nnothuge;
|
||||
n4ktotal = 0;
|
||||
nnothuge = 0;
|
||||
for (int i = 0; i < nhugepages; ++i) {
|
||||
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
|
||||
for (int j = 0; j < 512; ++j) {
|
||||
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
|
||||
++n4ktotal;
|
||||
if (pageaddr != baseaddr + j * page_size)
|
||||
++nnothuge;
|
||||
}
|
||||
}
|
||||
int rank = CartesianCommunicator::RankWorld();
|
||||
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string sizeString(const size_t bytes)
|
||||
{
|
||||
constexpr unsigned int bufSize = 256;
|
||||
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
|
||||
char buf[256];
|
||||
size_t s = 0;
|
||||
double count = bytes;
|
||||
|
||||
while (count >= 1024 && s < 7)
|
||||
{
|
||||
s++;
|
||||
count /= 1024;
|
||||
}
|
||||
if (count - floor(count) == 0.0)
|
||||
{
|
||||
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
|
||||
}
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
}
|
315
Grid/allocator/AlignedAllocator.h
Normal file
315
Grid/allocator/AlignedAllocator.h
Normal file
@ -0,0 +1,315 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/AlignedAllocator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
||||
#define GRID_ALIGNED_ALLOCATOR_H
|
||||
|
||||
#ifdef HAVE_MALLOC_MALLOC_H
|
||||
#include <malloc/malloc.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class PointerCache {
|
||||
private:
|
||||
|
||||
static const int Ncache=8;
|
||||
static int victim;
|
||||
|
||||
typedef struct {
|
||||
void *address;
|
||||
size_t bytes;
|
||||
int valid;
|
||||
} PointerCacheEntry;
|
||||
|
||||
static PointerCacheEntry Entries[Ncache];
|
||||
|
||||
public:
|
||||
|
||||
|
||||
static void *Insert(void *ptr,size_t bytes) ;
|
||||
static void *Lookup(size_t bytes) ;
|
||||
|
||||
};
|
||||
|
||||
std::string sizeString(size_t bytes);
|
||||
|
||||
struct MemoryStats
|
||||
{
|
||||
size_t totalAllocated{0}, maxAllocated{0},
|
||||
currentlyAllocated{0}, totalFreed{0};
|
||||
};
|
||||
|
||||
class MemoryProfiler
|
||||
{
|
||||
public:
|
||||
static MemoryStats *stats;
|
||||
static bool debug;
|
||||
};
|
||||
|
||||
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
|
||||
#define profilerDebugPrint \
|
||||
if (MemoryProfiler::stats)\
|
||||
{\
|
||||
auto s = MemoryProfiler::stats;\
|
||||
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\
|
||||
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
|
||||
<< std::endl;\
|
||||
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
|
||||
<< std::endl;\
|
||||
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
|
||||
<< std::endl;\
|
||||
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
|
||||
<< std::endl;\
|
||||
}
|
||||
|
||||
#define profilerAllocate(bytes)\
|
||||
if (MemoryProfiler::stats)\
|
||||
{\
|
||||
auto s = MemoryProfiler::stats;\
|
||||
s->totalAllocated += (bytes);\
|
||||
s->currentlyAllocated += (bytes);\
|
||||
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\
|
||||
}\
|
||||
if (MemoryProfiler::debug)\
|
||||
{\
|
||||
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\
|
||||
profilerDebugPrint;\
|
||||
}
|
||||
|
||||
#define profilerFree(bytes)\
|
||||
if (MemoryProfiler::stats)\
|
||||
{\
|
||||
auto s = MemoryProfiler::stats;\
|
||||
s->totalFreed += (bytes);\
|
||||
s->currentlyAllocated -= (bytes);\
|
||||
}\
|
||||
if (MemoryProfiler::debug)\
|
||||
{\
|
||||
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\
|
||||
profilerDebugPrint;\
|
||||
}
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// A lattice of something, but assume the something is SIMDized.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename _Tp>
|
||||
class alignedAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||
alignedAllocator() throw() { }
|
||||
alignedAllocator(const alignedAllocator&) throw() { }
|
||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||
~alignedAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
|
||||
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
|
||||
// if ( ptr != NULL )
|
||||
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
|
||||
|
||||
//////////////////
|
||||
// Hack 2MB align; could make option probably doesn't need configurability
|
||||
//////////////////
|
||||
//define GRID_ALLOC_ALIGN (128)
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||
#endif
|
||||
// std::cout << "alignedAllocator " << std::hex << ptr <<std::dec <<std::endl;
|
||||
// First touch optimise in threaded loop
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
|
||||
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( __freeme ) _mm_free((void *)__freeme);
|
||||
#else
|
||||
if ( __freeme ) free((void *)__freeme);
|
||||
#endif
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MPI3 : comms must use shm region
|
||||
// SHMEM: comms must use symmetric heap
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
extern "C" {
|
||||
#include <mpp/shmem.h>
|
||||
extern void * shmem_align(size_t, size_t);
|
||||
extern void shmem_free(void *);
|
||||
}
|
||||
#define PARANOID_SYMMETRIC_HEAP
|
||||
#endif
|
||||
|
||||
template<typename _Tp>
|
||||
class commAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
||||
commAllocator() throw() { }
|
||||
commAllocator(const commAllocator&) throw() { }
|
||||
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
||||
~commAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerAllocate(bytes);
|
||||
#ifdef CRAY
|
||||
_Tp *ptr = (_Tp *) shmem_align(bytes,64);
|
||||
#else
|
||||
_Tp *ptr = (_Tp *) shmem_align(64,bytes);
|
||||
#endif
|
||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||
static void * bcast;
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
|
||||
bcast = (void *) ptr;
|
||||
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
|
||||
|
||||
if ( bcast != ptr ) {
|
||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||
// BACKTRACEFILE();
|
||||
exit(0);
|
||||
}
|
||||
assert( bcast == (void *) ptr);
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
shmem_free((void *)__p);
|
||||
}
|
||||
#else
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerAllocate(bytes);
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes);
|
||||
#endif
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for(size_type n=0;n<bytes;n+=4096){
|
||||
cp[n]=0;
|
||||
}
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_mm_free((void *)__p);
|
||||
#else
|
||||
free((void *)__p);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
||||
template<class T> using commVector = std::vector<T,commAllocator<T> >;
|
||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||
|
||||
}; // namespace Grid
|
||||
#endif
|
35
Grid/cartesian/Cartesian.h
Normal file
35
Grid/cartesian/Cartesian.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cartesian.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_H
|
||||
#define GRID_CARTESIAN_H
|
||||
|
||||
#include <Grid/cartesian/Cartesian_base.h>
|
||||
#include <Grid/cartesian/Cartesian_full.h>
|
||||
#include <Grid/cartesian/Cartesian_red_black.h>
|
||||
|
||||
#endif
|
292
Grid/cartesian/Cartesian_base.h
Normal file
292
Grid/cartesian/Cartesian_base.h
Normal file
@ -0,0 +1,292 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_BASE_H
|
||||
#define GRID_CARTESIAN_BASE_H
|
||||
|
||||
|
||||
namespace Grid{
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Commicator provides information on the processor grid
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// unsigned long _ndimension;
|
||||
// std::vector<int> _processors; // processor grid
|
||||
// int _processor; // linear processor rank
|
||||
// std::vector<int> _processor_coor; // linear processor rank
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
class GridBase : public CartesianCommunicator , public GridThread {
|
||||
|
||||
public:
|
||||
int dummy;
|
||||
// Give Lattice access
|
||||
template<class object> friend class Lattice;
|
||||
|
||||
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
|
||||
GridBase(const std::vector<int> & processor_grid,
|
||||
const CartesianCommunicator &parent,
|
||||
int &split_rank)
|
||||
: CartesianCommunicator(processor_grid,parent,split_rank) {};
|
||||
GridBase(const std::vector<int> & processor_grid,
|
||||
const CartesianCommunicator &parent)
|
||||
: CartesianCommunicator(processor_grid,parent,dummy) {};
|
||||
|
||||
virtual ~GridBase() = default;
|
||||
|
||||
|
||||
// Physics Grid information.
|
||||
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
|
||||
std::vector<int> _gdimensions;// Global dimensions of array after cb removal
|
||||
std::vector<int> _ldimensions;// local dimensions of array with processor images removed
|
||||
std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed
|
||||
std::vector<int> _ostride; // Outer stride for each dimension
|
||||
std::vector<int> _istride; // Inner stride i.e. within simd lane
|
||||
int _osites; // _isites*_osites = product(dimensions).
|
||||
int _isites;
|
||||
int _fsites; // _isites*_osites = product(dimensions).
|
||||
int _gsites;
|
||||
std::vector<int> _slice_block;// subslice information
|
||||
std::vector<int> _slice_stride;
|
||||
std::vector<int> _slice_nblock;
|
||||
|
||||
std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d]
|
||||
std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
|
||||
|
||||
bool _isCheckerBoarded;
|
||||
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Checkerboarding interface is virtual and overridden by
|
||||
// GridCartesian / GridRedBlackCartesian
|
||||
////////////////////////////////////////////////////////////////
|
||||
virtual int CheckerBoarded(int dim)=0;
|
||||
virtual int CheckerBoard(const std::vector<int> &site)=0;
|
||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Local layout calculations
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// These routines are key. Subdivide the linearised cartesian index into
|
||||
// "inner" index identifying which simd lane of object<vFcomplex> is associated with coord
|
||||
// "outer" index identifying which element of _odata in class "Lattice" is associated with coord.
|
||||
//
|
||||
// Compared to, say, Blitz++ we simply need to store BOTH an inner stride and an outer
|
||||
// stride per dimension. The cost of evaluating the indexing information is doubled for an n-dimensional
|
||||
// coordinate. Note, however, for data parallel operations the "inner" indexing cost is not paid and all
|
||||
// lanes are operated upon simultaneously.
|
||||
|
||||
virtual int oIndex(std::vector<int> &coor)
|
||||
{
|
||||
int idx=0;
|
||||
// Works with either global or local coordinates
|
||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||
return idx;
|
||||
}
|
||||
virtual int iIndex(std::vector<int> &lcoor)
|
||||
{
|
||||
int idx=0;
|
||||
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||
return idx;
|
||||
}
|
||||
inline int oIndexReduced(std::vector<int> &ocoor)
|
||||
{
|
||||
int idx=0;
|
||||
// ocoor is already reduced so can eliminate the modulo operation
|
||||
// for fast indexing and inline the routine
|
||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d];
|
||||
return idx;
|
||||
}
|
||||
inline void oCoorFromOindex (std::vector<int>& coor,int Oindex){
|
||||
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
|
||||
}
|
||||
|
||||
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
|
||||
lcoor.resize(_ndimension);
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// SIMD lane addressing
|
||||
//////////////////////////////////////////////////////////
|
||||
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
||||
{
|
||||
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||
}
|
||||
|
||||
inline int PermuteDim(int dimension){
|
||||
return _simd_layout[dimension]>1;
|
||||
}
|
||||
inline int PermuteType(int dimension){
|
||||
int permute_type=0;
|
||||
//
|
||||
// FIXME:
|
||||
//
|
||||
// Best way to encode this would be to present a mask
|
||||
// for which simd dimensions are rotated, and the rotation
|
||||
// size. If there is only one simd dimension rotated, this is just
|
||||
// a permute.
|
||||
//
|
||||
// Cases: PermuteType == 1,2,4,8
|
||||
// Distance should be either 0,1,2..
|
||||
//
|
||||
if ( _simd_layout[dimension] > 2 ) {
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if ( d != dimension ) assert ( (_simd_layout[d]==1) );
|
||||
}
|
||||
permute_type = RotateBit; // How to specify distance; this is not just direction.
|
||||
return permute_type;
|
||||
}
|
||||
|
||||
for(int d=_ndimension-1;d>dimension;d--){
|
||||
if (_simd_layout[d]>1 ) permute_type++;
|
||||
}
|
||||
return permute_type;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Array sizing queries
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
inline int iSites(void) const { return _isites; };
|
||||
inline int Nsimd(void) const { return _isites; };// Synonymous with iSites
|
||||
inline int oSites(void) const { return _osites; };
|
||||
inline int lSites(void) const { return _isites*_osites; };
|
||||
inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
|
||||
inline int Nd (void) const { return _ndimension;};
|
||||
|
||||
inline const std::vector<int> LocalStarts(void) { return _lstart; };
|
||||
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
|
||||
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
|
||||
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
|
||||
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Utility to print the full decomposition details
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
||||
void show_decomposition(){
|
||||
std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl;
|
||||
std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl;
|
||||
std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl;
|
||||
std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl;
|
||||
std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl;
|
||||
std::cout << GridLogMessage << "\toSites : " << _osites << std::endl;
|
||||
std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl;
|
||||
std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Global addressing
|
||||
////////////////////////////////////////////////////////////////
|
||||
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
|
||||
assert(gidx< gSites());
|
||||
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
|
||||
}
|
||||
void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
|
||||
assert(lidx<lSites());
|
||||
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
|
||||
}
|
||||
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
|
||||
gidx=0;
|
||||
int mult=1;
|
||||
for(int mu=0;mu<_ndimension;mu++) {
|
||||
gidx+=mult*gcoor[mu];
|
||||
mult*=_gdimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
|
||||
{
|
||||
pcoor.resize(_ndimension);
|
||||
lcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++){
|
||||
int _fld = _fdimensions[mu]/_processors[mu];
|
||||
pcoor[mu] = gcoor[mu]/_fld;
|
||||
lcoor[mu] = gcoor[mu]%_fld;
|
||||
}
|
||||
}
|
||||
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
|
||||
{
|
||||
std::vector<int> pcoor;
|
||||
std::vector<int> lcoor;
|
||||
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
|
||||
rank = RankFromProcessorCoor(pcoor);
|
||||
/*
|
||||
std::vector<int> cblcoor(lcoor);
|
||||
for(int d=0;d<cblcoor.size();d++){
|
||||
if( this->CheckerBoarded(d) ) {
|
||||
cblcoor[d] = lcoor[d]/2;
|
||||
}
|
||||
}
|
||||
*/
|
||||
i_idx= iIndex(lcoor);
|
||||
o_idx= oIndex(lcoor);
|
||||
}
|
||||
|
||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
||||
{
|
||||
gcoor.resize(_ndimension);
|
||||
std::vector<int> coor(_ndimension);
|
||||
|
||||
ProcessorCoorFromRank(rank,coor);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]*coor[mu];
|
||||
|
||||
iCoorFromIindex(coor,i_idx);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]*coor[mu];
|
||||
|
||||
oCoorFromOindex (coor,o_idx);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu];
|
||||
|
||||
}
|
||||
void RankIndexCbToFullGlobalCoor(int rank, int o_idx, int i_idx, int cb,std::vector<int> &fcoor)
|
||||
{
|
||||
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
|
||||
if(CheckerBoarded(0)){
|
||||
fcoor[0] = fcoor[0]*2+cb;
|
||||
}
|
||||
}
|
||||
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)
|
||||
{
|
||||
gcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
174
Grid/cartesian/Cartesian_full.h
Normal file
174
Grid/cartesian/Cartesian_full.h
Normal file
@ -0,0 +1,174 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_full.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_FULL_H
|
||||
#define GRID_CARTESIAN_FULL_H
|
||||
|
||||
namespace Grid{
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Grid Support.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class GridCartesian: public GridBase {
|
||||
|
||||
public:
|
||||
int dummy;
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardFromOindex (int Oindex)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoarded(int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoard(const std::vector<int> &site){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
||||
return 0;
|
||||
}
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
|
||||
return shift;
|
||||
}
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
|
||||
return shift;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Constructor takes a parent grid and possibly subdivides communicator.
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Construct from comm world
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
GridCartesian(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid);
|
||||
}
|
||||
|
||||
virtual ~GridCartesian() = default;
|
||||
|
||||
void Init(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid)
|
||||
{
|
||||
///////////////////////
|
||||
// Grid information
|
||||
///////////////////////
|
||||
_isCheckerBoarded = false;
|
||||
_ndimension = dimensions.size();
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d]; // Global dimensions
|
||||
_gdimensions[d] = _fdimensions[d]; // Global dimensions
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
// Use a reduced simd grid
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// subplane information
|
||||
///////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
320
Grid/cartesian/Cartesian_red_black.h
Normal file
320
Grid/cartesian/Cartesian_red_black.h
Normal file
@ -0,0 +1,320 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cartesian/Cartesian_red_black.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CARTESIAN_RED_BLACK_H
|
||||
#define GRID_CARTESIAN_RED_BLACK_H
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
static const int CbRed =0;
|
||||
static const int CbBlack=1;
|
||||
static const int Even =CbRed;
|
||||
static const int Odd =CbBlack;
|
||||
|
||||
// Specialise this for red black grids storing half the data like a chess board.
|
||||
class GridRedBlackCartesian : public GridBase
|
||||
{
|
||||
public:
|
||||
std::vector<int> _checker_dim_mask;
|
||||
int _checker_dim;
|
||||
std::vector<int> _checker_board;
|
||||
|
||||
virtual int CheckerBoarded(int dim){
|
||||
if( dim==_checker_dim) return 1;
|
||||
else return 0;
|
||||
}
|
||||
virtual int CheckerBoard(const std::vector<int> &site){
|
||||
int linear=0;
|
||||
assert(site.size()==_ndimension);
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
if(_checker_dim_mask[d])
|
||||
linear=linear+site[d];
|
||||
}
|
||||
return (linear&0x1);
|
||||
}
|
||||
|
||||
|
||||
// Depending on the cb of site, we toggle source cb.
|
||||
// for block #b, element #e = (b, e)
|
||||
// we need
|
||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int ocb){
|
||||
if(dim != _checker_dim) return shift;
|
||||
|
||||
int fulldim =_fdimensions[dim];
|
||||
shift = (shift+fulldim)%fulldim;
|
||||
|
||||
// Probably faster with table lookup;
|
||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||
|
||||
if ( (source_cb+ocb)&1 ) {
|
||||
return (shift)/2;
|
||||
} else {
|
||||
return (shift+1)/2;
|
||||
}
|
||||
}
|
||||
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||
return _checker_board[Oindex];
|
||||
}
|
||||
virtual int CheckerBoardFromOindex (int Oindex)
|
||||
{
|
||||
std::vector<int> ocoor;
|
||||
oCoorFromOindex(ocoor,Oindex);
|
||||
return CheckerBoard(ocoor);
|
||||
}
|
||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||
|
||||
if(dim != _checker_dim) return shift;
|
||||
|
||||
int ocb=CheckerBoardFromOindex(osite);
|
||||
|
||||
return CheckerBoardShiftForCB(source_cb,dim,shift,ocb);
|
||||
}
|
||||
|
||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim){
|
||||
if ( _checker_dim_mask[dim] ) {
|
||||
// If _fdimensions[checker_dim] is odd, then shifting by 1 in other dims
|
||||
// does NOT cause a parity hop.
|
||||
int add=(dim==_checker_dim) ? 0 : _fdimensions[_checker_dim];
|
||||
if ( (shift+add) &0x1) {
|
||||
return 1-source_cb;
|
||||
} else {
|
||||
return source_cb;
|
||||
}
|
||||
} else {
|
||||
return source_cb;
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create Redblack from original grid; require full grid pointer ?
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base)
|
||||
{
|
||||
int dims = base->_ndimension;
|
||||
std::vector<int> checker_dim_mask(dims,1);
|
||||
int checker_dim = 0;
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack from original grid, with non-trivial checker dim mask
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim
|
||||
) : GridBase(base->_processors,*base)
|
||||
{
|
||||
Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ;
|
||||
}
|
||||
|
||||
virtual ~GridRedBlackCartesian() = default;
|
||||
#if 0
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack grid ;; deprecate these. Should not
|
||||
// need direct creation of redblack without a full grid to base on
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim
|
||||
) : GridBase(processor_grid,*base)
|
||||
{
|
||||
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Create redblack grid
|
||||
////////////////////////////////////////////////////////////
|
||||
GridRedBlackCartesian(const GridBase *base,
|
||||
const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid) : GridBase(processor_grid,*base)
|
||||
{
|
||||
std::vector<int> checker_dim_mask(dimensions.size(),1);
|
||||
int checker_dim = 0;
|
||||
Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Init(const std::vector<int> &dimensions,
|
||||
const std::vector<int> &simd_layout,
|
||||
const std::vector<int> &processor_grid,
|
||||
const std::vector<int> &checker_dim_mask,
|
||||
int checker_dim)
|
||||
{
|
||||
|
||||
_isCheckerBoarded = true;
|
||||
_checker_dim = checker_dim;
|
||||
assert(checker_dim_mask[checker_dim] == 1);
|
||||
_ndimension = dimensions.size();
|
||||
assert(checker_dim_mask.size() == _ndimension);
|
||||
assert(processor_grid.size() == _ndimension);
|
||||
assert(simd_layout.size() == _ndimension);
|
||||
|
||||
_fdimensions.resize(_ndimension);
|
||||
_gdimensions.resize(_ndimension);
|
||||
_ldimensions.resize(_ndimension);
|
||||
_rdimensions.resize(_ndimension);
|
||||
_simd_layout.resize(_ndimension);
|
||||
_lstart.resize(_ndimension);
|
||||
_lend.resize(_ndimension);
|
||||
|
||||
_ostride.resize(_ndimension);
|
||||
_istride.resize(_ndimension);
|
||||
|
||||
_fsites = _gsites = _osites = _isites = 1;
|
||||
|
||||
_checker_dim_mask = checker_dim_mask;
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
_fdimensions[d] = dimensions[d];
|
||||
_gdimensions[d] = _fdimensions[d];
|
||||
_fsites = _fsites * _fdimensions[d];
|
||||
_gsites = _gsites * _gdimensions[d];
|
||||
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
assert((_gdimensions[d] & 0x1) == 0);
|
||||
_gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard
|
||||
_gsites /= 2;
|
||||
}
|
||||
_ldimensions[d] = _gdimensions[d] / _processors[d];
|
||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||
_lstart[d] = _processor_coor[d] * _ldimensions[d];
|
||||
_lend[d] = _processor_coor[d] * _ldimensions[d] + _ldimensions[d] - 1;
|
||||
|
||||
// Use a reduced simd grid
|
||||
_simd_layout[d] = simd_layout[d];
|
||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; // this is not checking if this is integer
|
||||
assert(_rdimensions[d] * _simd_layout[d] == _ldimensions[d]);
|
||||
assert(_rdimensions[d] > 0);
|
||||
|
||||
// all elements of a simd vector must have same checkerboard.
|
||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||
if (_simd_layout[d] > 1)
|
||||
{
|
||||
if (checker_dim_mask[d])
|
||||
{
|
||||
assert((_rdimensions[d] & 0x1) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
_osites *= _rdimensions[d];
|
||||
_isites *= _simd_layout[d];
|
||||
|
||||
// Addressing support
|
||||
if (d == 0)
|
||||
{
|
||||
_ostride[d] = 1;
|
||||
_istride[d] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ostride[d] = _ostride[d - 1] * _rdimensions[d - 1];
|
||||
_istride[d] = _istride[d - 1] * _simd_layout[d - 1];
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// subplane information
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
_slice_block.resize(_ndimension);
|
||||
_slice_stride.resize(_ndimension);
|
||||
_slice_nblock.resize(_ndimension);
|
||||
|
||||
int block = 1;
|
||||
int nblock = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
nblock *= _rdimensions[d];
|
||||
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
nblock /= _rdimensions[d];
|
||||
_slice_block[d] = block;
|
||||
_slice_stride[d] = _ostride[d] * _rdimensions[d];
|
||||
_slice_nblock[d] = nblock;
|
||||
block = block * _rdimensions[d];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Create a checkerboard lookup table
|
||||
////////////////////////////////////////////////
|
||||
int rvol = 1;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
rvol = rvol * _rdimensions[d];
|
||||
}
|
||||
_checker_board.resize(rvol);
|
||||
for (int osite = 0; osite < _osites; osite++)
|
||||
{
|
||||
_checker_board[osite] = CheckerBoardFromOindex(osite);
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
virtual int oIndex(std::vector<int> &coor)
|
||||
{
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]);
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _ostride[d] * (coor[d] % _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
};
|
||||
|
||||
virtual int iIndex(std::vector<int> &lcoor)
|
||||
{
|
||||
int idx = 0;
|
||||
for (int d = 0; d < _ndimension; d++)
|
||||
{
|
||||
if (d == _checker_dim)
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d]));
|
||||
}
|
||||
else
|
||||
{
|
||||
idx += _istride[d] * (lcoor[d] / _rdimensions[d]);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
34
Grid/communicator/Communicator.h
Normal file
34
Grid/communicator/Communicator.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Communicator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMMUNICATOR_H
|
||||
#define GRID_COMMUNICATOR_H
|
||||
|
||||
#include <Grid/communicator/SharedMemory.h>
|
||||
#include <Grid/communicator/Communicator_base.h>
|
||||
|
||||
#endif
|
76
Grid/communicator/Communicator_base.cc
Normal file
76
Grid/communicator/Communicator_base.cc
Normal file
@ -0,0 +1,76 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_none.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////
|
||||
CartesianCommunicator::CommunicatorPolicy_t
|
||||
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
||||
int CartesianCommunicator::nCommThreads = -1;
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
|
||||
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
|
||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void CartesianCommunicator::GlobalSum(ComplexF &c)
|
||||
{
|
||||
GlobalSumVector((float *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
|
||||
{
|
||||
GlobalSumVector((float *)c,2*N);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(ComplexD &c)
|
||||
{
|
||||
GlobalSumVector((double *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||
{
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
|
||||
}
|
||||
|
207
Grid/communicator/Communicator_base.h
Normal file
207
Grid/communicator/Communicator_base.h
Normal file
@ -0,0 +1,207 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMMUNICATOR_BASE_H
|
||||
#define GRID_COMMUNICATOR_BASE_H
|
||||
|
||||
///////////////////////////////////
|
||||
// Processor layout information
|
||||
///////////////////////////////////
|
||||
#include <Grid/communicator/SharedMemory.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class CartesianCommunicator : public SharedMemory {
|
||||
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Policies
|
||||
////////////////////////////////////////////
|
||||
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
|
||||
static CommunicatorPolicy_t CommunicatorPolicy;
|
||||
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
|
||||
static int nCommThreads;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Communicator should know nothing of the physics grid, only processor grid.
|
||||
////////////////////////////////////////////
|
||||
int _Nprocessors; // How many in all
|
||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||
int _processor; // linear processor rank
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
static Grid_MPI_Comm communicator_world;
|
||||
Grid_MPI_Comm communicator;
|
||||
std::vector<Grid_MPI_Comm> communicator_halo;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Must call in Grid startup
|
||||
////////////////////////////////////////////////
|
||||
static void Init(int *argc, char ***argv);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Constructors to sub-divide a parent communicator
|
||||
// and default to comm world
|
||||
////////////////////////////////////////////////
|
||||
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank);
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
virtual ~CartesianCommunicator();
|
||||
|
||||
private:
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Private initialise from an MPI communicator
|
||||
// Can use after an MPI_Comm_split, but hidden from user so private
|
||||
////////////////////////////////////////////////
|
||||
void InitFromMPICommunicator(const std::vector<int> &processors, Grid_MPI_Comm communicator_base);
|
||||
|
||||
public:
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
|
||||
int Dimensions(void) ;
|
||||
int IsBoss(void) ;
|
||||
int BossRank(void) ;
|
||||
int ThisRank(void) ;
|
||||
const std::vector<int> & ThisProcessorCoor(void) ;
|
||||
const std::vector<int> & ProcessorGrid(void) ;
|
||||
int ProcessorCount(void) ;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static int RankWorld(void) ;
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
////////////////////////////////////////////////////////////
|
||||
void GlobalSum(RealF &);
|
||||
void GlobalSumVector(RealF *,int N);
|
||||
void GlobalSum(RealD &);
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
void GlobalSum(ComplexF &c);
|
||||
void GlobalSumVector(ComplexF *c,int N);
|
||||
void GlobalSum(ComplexD &c);
|
||||
void GlobalSumVector(ComplexD *c,int N);
|
||||
void GlobalXOR(uint32_t &);
|
||||
void GlobalXOR(uint64_t &);
|
||||
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
int words = sizeof(obj)/sizeof(scalar_type);
|
||||
scalar_type * ptr = (scalar_type *)& o;
|
||||
GlobalSumVector(ptr,words);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Face exchange, buffer swap in translational invariant way
|
||||
////////////////////////////////////////////////////////////
|
||||
void SendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
double StencilSendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes,int dir);
|
||||
|
||||
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes,int dir);
|
||||
|
||||
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int i);
|
||||
void StencilBarrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Barrier
|
||||
////////////////////////////////////////////////////////////
|
||||
void Barrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// All2All down one dimension
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class T> void AllToAll(int dim,std::vector<T> &in, std::vector<T> &out){
|
||||
assert(dim>=0);
|
||||
assert(dim<_ndimension);
|
||||
assert(in.size()==out.size());
|
||||
int numnode = _processors[dim];
|
||||
uint64_t bytes=sizeof(T);
|
||||
uint64_t words=in.size()/numnode;
|
||||
assert(numnode * words == in.size());
|
||||
assert(words < (1ULL<<31));
|
||||
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
|
||||
}
|
||||
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
|
||||
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
|
||||
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
{
|
||||
Broadcast(root,(void *)&data,sizeof(data));
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
514
Grid/communicator/Communicator_mpi3.cc
Normal file
514
Grid/communicator/Communicator_mpi3.cc
Normal file
@ -0,0 +1,514 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/communicator/SharedMemory.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
Grid_MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// First initialise of comms system
|
||||
////////////////////////////////////////////
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||
{
|
||||
|
||||
int flag;
|
||||
int provided;
|
||||
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||
if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
|
||||
(nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) )
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Grid_quiesce_nodes();
|
||||
|
||||
// Never clean up as done once.
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
GlobalSharedMemory::SharedMemoryAllocate(
|
||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
|
||||
GlobalSharedMemory::Hugepages);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Use cartesian communicators now even in MPI3
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
|
||||
assert(ierr==0);
|
||||
}
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||
{
|
||||
int rank;
|
||||
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
|
||||
assert(ierr==0);
|
||||
return rank;
|
||||
}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||
{
|
||||
coor.resize(_ndimension);
|
||||
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Initialises from communicator_world
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
MPI_Comm optimal_comm;
|
||||
////////////////////////////////////////////////////
|
||||
// Remap using the shared memory optimising routine
|
||||
// The remap creates a comm which must be freed
|
||||
////////////////////////////////////////////////////
|
||||
GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm);
|
||||
InitFromMPICommunicator(processors,optimal_comm);
|
||||
SetCommunicator(optimal_comm);
|
||||
///////////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////////
|
||||
MPI_Comm_free(&optimal_comm);
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Try to subdivide communicator
|
||||
//////////////////////////////////
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
|
||||
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
||||
std::vector<int> parent_processor_coor(_ndimension,0);
|
||||
std::vector<int> parent_processors (_ndimension,1);
|
||||
|
||||
// Can make 5d grid from 4d etc...
|
||||
int pad = _ndimension-parent_ndimension;
|
||||
for(int d=0;d<parent_ndimension;d++){
|
||||
parent_processor_coor[pad+d]=parent._processor_coor[d];
|
||||
parent_processors [pad+d]=parent._processors[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// int Nparent = parent._processors ;
|
||||
// std::cout << " splitting from communicator "<<parent.communicator <<std::endl;
|
||||
int Nparent;
|
||||
MPI_Comm_size(parent.communicator,&Nparent);
|
||||
// std::cout << " Parent size "<<Nparent <<std::endl;
|
||||
|
||||
int childsize=1;
|
||||
for(int d=0;d<processors.size();d++) {
|
||||
childsize *= processors[d];
|
||||
}
|
||||
int Nchild = Nparent/childsize;
|
||||
assert (childsize * Nchild == Nparent);
|
||||
|
||||
// std::cout << " child size "<<childsize <<std::endl;
|
||||
|
||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
ccoor[d] = parent_processor_coor[d] % processors[d];
|
||||
scoor[d] = parent_processor_coor[d] / processors[d];
|
||||
ssize[d] = parent_processors[d] / processors[d];
|
||||
}
|
||||
|
||||
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
||||
int crank;
|
||||
// Mpi uses the reverse Lexico convention to us; so reversed routines called
|
||||
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
|
||||
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
|
||||
|
||||
MPI_Comm comm_split;
|
||||
if ( Nchild > 1 ) {
|
||||
|
||||
if(0){
|
||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Declare victory
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||
std::cout << " Split communicator " <<comm_split <<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Split the communicator
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
||||
assert(ierr==0);
|
||||
|
||||
} else {
|
||||
srank = 0;
|
||||
int ierr = MPI_Comm_dup (parent.communicator,&comm_split);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Set up from the new split communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
InitFromMPICommunicator(processors,comm_split);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take the right SHM buffers
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
SetCommunicator(comm_split);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Free the temp communicator
|
||||
///////////////////////////////////////////////
|
||||
MPI_Comm_free(&comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
for(int d=0;d<processors.size();d++){
|
||||
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
||||
}
|
||||
}
|
||||
for(int d=0;d<processors.size();d++){
|
||||
assert(_processor_coor[d] == ccoor[d] );
|
||||
}
|
||||
}
|
||||
|
||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
||||
{
|
||||
////////////////////////////////////////////////////
|
||||
// Creates communicator, and the communicator_halo
|
||||
////////////////////////////////////////////////////
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
/////////////////////////////////
|
||||
// Count the requested nodes
|
||||
/////////////////////////////////
|
||||
_Nprocessors=1;
|
||||
_processors = processors;
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
|
||||
std::vector<int> periodic(_ndimension,1);
|
||||
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
if ( 0 && (communicator_base != communicator_world) ) {
|
||||
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
||||
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
||||
for(int d=0;d<_processors.size();d++){
|
||||
std::cout << _processor_coor[d]<<" ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
int Size;
|
||||
MPI_Comm_size(communicator,&Size);
|
||||
|
||||
communicator_halo.resize (2*_ndimension);
|
||||
for(int i=0;i<_ndimension*2;i++){
|
||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
||||
}
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
|
||||
CartesianCommunicator::~CartesianCommunicator()
|
||||
{
|
||||
int MPI_is_finalised;
|
||||
MPI_Finalized(&MPI_is_finalised);
|
||||
if (communicator && !MPI_is_finalised) {
|
||||
MPI_Comm_free(&communicator);
|
||||
for(int i=0;i<communicator_halo.size();i++){
|
||||
MPI_Comm_free(&communicator_halo[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(float &f){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||
{
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(double &d)
|
||||
{
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||
{
|
||||
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
std::vector<CommsRequest_t> reqs(0);
|
||||
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
|
||||
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
|
||||
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
|
||||
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||
SendToRecvFromComplete(reqs);
|
||||
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
|
||||
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
|
||||
}
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int sender,
|
||||
int receiver,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Status stat;
|
||||
assert(sender != receiver);
|
||||
int tag = sender;
|
||||
if ( _processor == sender ) {
|
||||
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||
}
|
||||
if ( _processor == receiver ) {
|
||||
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||
}
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
int myrank = _processor;
|
||||
int ierr;
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
} else {
|
||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
||||
recv,bytes,MPI_CHAR,from, from,
|
||||
communicator,MPI_STATUS_IGNORE);
|
||||
assert(ierr==0);
|
||||
}
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
double offbytes = StencilSendToRecvFromBegin(list,xmit,dest,recv,from,bytes,dir);
|
||||
StencilSendToRecvFromComplete(list,dir);
|
||||
return offbytes;
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes,int dir)
|
||||
{
|
||||
int ncomm =communicator_halo.size();
|
||||
int commdir=dir%ncomm;
|
||||
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
int ierr;
|
||||
int gdest = ShmRanks[dest];
|
||||
int gfrom = ShmRanks[from];
|
||||
int gme = ShmRanks[_processor];
|
||||
|
||||
assert(dest != _processor);
|
||||
assert(from != _processor);
|
||||
assert(gme == ShmRank);
|
||||
double off_node_bytes=0.0;
|
||||
|
||||
if ( gfrom ==MPI_UNDEFINED) {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( gdest == MPI_UNDEFINED ) {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
off_node_bytes+=bytes;
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
this->StencilSendToRecvFromComplete(list,dir);
|
||||
}
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int nreq=list.size();
|
||||
|
||||
if (nreq==0) return;
|
||||
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
assert(ierr==0);
|
||||
list.resize(0);
|
||||
}
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
int ierr = MPI_Barrier(communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr=MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
int CartesianCommunicator::RankWorld(void){
|
||||
int r;
|
||||
MPI_Comm_rank(communicator_world,&r);
|
||||
return r;
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
std::vector<int> row(_ndimension,1);
|
||||
assert(dim>=0 && dim<_ndimension);
|
||||
|
||||
// Split the communicator
|
||||
row[dim] = _processors[dim];
|
||||
|
||||
int me;
|
||||
CartesianCommunicator Comm(row,*this,me);
|
||||
Comm.AllToAll(in,out,words,bytes);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
// MPI is a pain and uses "int" arguments
|
||||
// 64*64*64*128*16 == 500Million elements of data.
|
||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||
// (Turns up on 32^3 x 64 Gparity too)
|
||||
MPI_Datatype object;
|
||||
int iwords;
|
||||
int ibytes;
|
||||
iwords = words;
|
||||
ibytes = bytes;
|
||||
assert(words == iwords); // safe to cast to int ?
|
||||
assert(bytes == ibytes); // safe to cast to int ?
|
||||
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
|
||||
MPI_Type_commit(&object);
|
||||
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
|
||||
MPI_Type_free(&object);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
165
Grid/communicator/Communicator_none.cc
Normal file
165
Grid/communicator/Communicator_none.cc
Normal file
@ -0,0 +1,165 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_none.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Grid_MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||
{
|
||||
GlobalSharedMemory::Init(communicator_world);
|
||||
GlobalSharedMemory::SharedMemoryAllocate(
|
||||
GlobalSharedMemory::MAX_MPI_SHM_BYTES,
|
||||
GlobalSharedMemory::Hugepages);
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
||||
: CartesianCommunicator(processors)
|
||||
{
|
||||
srank=0;
|
||||
SetCommunicator(communicator_world);
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_processors = processors;
|
||||
_ndimension = processors.size();
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
// Require 1^N processor grid for fake
|
||||
_Nprocessors=1;
|
||||
_processor = 0;
|
||||
for(int d=0;d<_ndimension;d++) {
|
||||
assert(_processors[d]==1);
|
||||
_processor_coor[d] = 0;
|
||||
}
|
||||
SetCommunicator(communicator_world);
|
||||
}
|
||||
|
||||
CartesianCommunicator::~CartesianCommunicator(){}
|
||||
|
||||
void CartesianCommunicator::GlobalSum(float &){}
|
||||
void CartesianCommunicator::GlobalSumVector(float *,int N){}
|
||||
void CartesianCommunicator::GlobalSum(double &){}
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalSum(uint64_t &){}
|
||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||
void CartesianCommunicator::GlobalXOR(uint32_t &){}
|
||||
void CartesianCommunicator::GlobalXOR(uint64_t &){}
|
||||
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
// Basic Halo comms primitive -- should never call in single node
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
bcopy(in,out,bytes*words);
|
||||
}
|
||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
||||
{
|
||||
bcopy(in,out,bytes*words);
|
||||
}
|
||||
|
||||
int CartesianCommunicator::RankWorld(void){return 0;}
|
||||
void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor; }
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
source =0;
|
||||
dest=0;
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
std::vector<CommsRequest_t> list;
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
SendToRecvFromComplete(list);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes, int dir)
|
||||
{
|
||||
// Discard the "dir"
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
return 2.0*bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void){};
|
||||
|
||||
|
||||
}
|
||||
|
92
Grid/communicator/SharedMemory.cc
Normal file
92
Grid/communicator/SharedMemory.cc
Normal file
@ -0,0 +1,92 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// static data
|
||||
|
||||
uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
||||
int GlobalSharedMemory::Hugepages = 0;
|
||||
int GlobalSharedMemory::_ShmSetup;
|
||||
int GlobalSharedMemory::_ShmAlloc;
|
||||
uint64_t GlobalSharedMemory::_ShmAllocBytes;
|
||||
|
||||
std::vector<void *> GlobalSharedMemory::WorldShmCommBufs;
|
||||
|
||||
Grid_MPI_Comm GlobalSharedMemory::WorldShmComm;
|
||||
int GlobalSharedMemory::WorldShmRank;
|
||||
int GlobalSharedMemory::WorldShmSize;
|
||||
std::vector<int> GlobalSharedMemory::WorldShmRanks;
|
||||
|
||||
Grid_MPI_Comm GlobalSharedMemory::WorldComm;
|
||||
int GlobalSharedMemory::WorldSize;
|
||||
int GlobalSharedMemory::WorldRank;
|
||||
|
||||
int GlobalSharedMemory::WorldNodes;
|
||||
int GlobalSharedMemory::WorldNode;
|
||||
|
||||
void GlobalSharedMemory::SharedMemoryFree(void)
|
||||
{
|
||||
assert(_ShmAlloc);
|
||||
assert(_ShmAllocBytes>0);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
munmap(WorldShmCommBufs[r],_ShmAllocBytes);
|
||||
}
|
||||
_ShmAlloc = 0;
|
||||
_ShmAllocBytes = 0;
|
||||
}
|
||||
/////////////////////////////////
|
||||
// Alloc, free shmem region
|
||||
/////////////////////////////////
|
||||
void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||
void *ptr = (void *)heap_top;
|
||||
heap_top += bytes;
|
||||
heap_bytes+= bytes;
|
||||
if (heap_bytes >= heap_size) {
|
||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
||||
assert(heap_bytes<heap_size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void SharedMemory::ShmBufferFreeAll(void) {
|
||||
heap_top =(size_t)ShmBufferSelf();
|
||||
heap_bytes=0;
|
||||
}
|
||||
void *SharedMemory::ShmBufferSelf(void)
|
||||
{
|
||||
return ShmCommBufs[ShmRank];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
165
Grid/communicator/SharedMemory.h
Normal file
165
Grid/communicator/SharedMemory.h
Normal file
@ -0,0 +1,165 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
|
||||
// TODO
|
||||
// 1) move includes into SharedMemory.cc
|
||||
//
|
||||
// 2) split shared memory into a) optimal communicator creation from comm world
|
||||
//
|
||||
// b) shared memory buffers container
|
||||
// -- static globally shared; init once
|
||||
// -- per instance set of buffers.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
#if defined (GRID_COMMS_MPI3)
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
#include <semaphore.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <zlib.h>
|
||||
#ifdef HAVE_NUMAIF_H
|
||||
#include <numaif.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#if defined (GRID_COMMS_MPI3)
|
||||
typedef MPI_Comm Grid_MPI_Comm;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
typedef int Grid_MPI_Comm;
|
||||
#endif
|
||||
|
||||
class GlobalSharedMemory {
|
||||
private:
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
|
||||
// Init once lock on the buffer allocation
|
||||
static int _ShmSetup;
|
||||
static int _ShmAlloc;
|
||||
static uint64_t _ShmAllocBytes;
|
||||
|
||||
public:
|
||||
static int ShmSetup(void) { return _ShmSetup; }
|
||||
static int ShmAlloc(void) { return _ShmAlloc; }
|
||||
static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; }
|
||||
static uint64_t MAX_MPI_SHM_BYTES;
|
||||
static int Hugepages;
|
||||
|
||||
static std::vector<void *> WorldShmCommBufs;
|
||||
|
||||
static Grid_MPI_Comm WorldComm;
|
||||
static int WorldRank;
|
||||
static int WorldSize;
|
||||
|
||||
static Grid_MPI_Comm WorldShmComm;
|
||||
static int WorldShmRank;
|
||||
static int WorldShmSize;
|
||||
|
||||
static int WorldNodes;
|
||||
static int WorldNode;
|
||||
|
||||
static std::vector<int> WorldShmRanks;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Create an optimal reordered communicator that makes MPI_Cart_create get it right
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
|
||||
static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian
|
||||
///////////////////////////////////////////////////
|
||||
// Provide shared memory facilities off comm world
|
||||
///////////////////////////////////////////////////
|
||||
static void SharedMemoryAllocate(uint64_t bytes, int flags);
|
||||
static void SharedMemoryFree(void);
|
||||
|
||||
};
|
||||
|
||||
//////////////////////////////
|
||||
// one per communicator
|
||||
//////////////////////////////
|
||||
class SharedMemory
|
||||
{
|
||||
private:
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
|
||||
size_t heap_top;
|
||||
size_t heap_bytes;
|
||||
size_t heap_size;
|
||||
|
||||
protected:
|
||||
|
||||
Grid_MPI_Comm ShmComm; // for barriers
|
||||
int ShmRank;
|
||||
int ShmSize;
|
||||
std::vector<void *> ShmCommBufs;
|
||||
std::vector<int> ShmRanks;// Mapping comm ranks to Shm ranks
|
||||
|
||||
public:
|
||||
SharedMemory() {};
|
||||
~SharedMemory();
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// set the buffers & sizes
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void SetCommunicator(Grid_MPI_Comm comm);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// For this instance ; disjoint buffer sets between splits if split grid
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void ShmBarrier(void);
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Call on any instance
|
||||
///////////////////////////////////////////////////
|
||||
void SharedMemoryTest(void);
|
||||
void *ShmBufferSelf(void);
|
||||
void *ShmBuffer (int rank);
|
||||
void *ShmBufferTranslate(int rank,void * local_p);
|
||||
void *ShmBufferMalloc(size_t bytes);
|
||||
void ShmBufferFreeAll(void) ;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Make info on Nodes & ranks and Shared memory available
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
int NodeCount(void) { return GlobalSharedMemory::WorldNodes;};
|
||||
int RankCount(void) { return GlobalSharedMemory::WorldSize;};
|
||||
|
||||
};
|
||||
|
||||
}
|
652
Grid/communicator/SharedMemoryMPI.cc
Normal file
652
Grid/communicator/SharedMemoryMPI.cc
Normal file
@ -0,0 +1,652 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*Construct from an MPI communicator*/
|
||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(_ShmSetup==0);
|
||||
WorldComm = comm;
|
||||
MPI_Comm_rank(WorldComm,&WorldRank);
|
||||
MPI_Comm_size(WorldComm,&WorldSize);
|
||||
// WorldComm, WorldSize, WorldRank
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm);
|
||||
MPI_Comm_rank(WorldShmComm ,&WorldShmRank);
|
||||
MPI_Comm_size(WorldShmComm ,&WorldShmSize);
|
||||
// WorldShmComm, WorldShmSize, WorldShmRank
|
||||
|
||||
// WorldNodes
|
||||
WorldNodes = WorldSize/WorldShmSize;
|
||||
assert( (WorldNodes * WorldShmSize) == WorldSize );
|
||||
|
||||
// FIXME: Check all WorldShmSize are the same ?
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (WorldComm, &WorldGroup);
|
||||
MPI_Comm_group (WorldShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize); for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
WorldShmRanks.resize(WorldSize);
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &WorldShmRanks[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and nominate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
std::vector<int> MyGroup;
|
||||
MyGroup.resize(WorldShmSize);
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(WorldShmRanks[rank]!=MPI_UNDEFINED){
|
||||
assert(g<WorldShmSize);
|
||||
MyGroup[g++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||
int myleader = MyGroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(WorldNodes,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the node of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
WorldNode=-1;
|
||||
for(int g=0;g<WorldNodes;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
WorldNode=g;
|
||||
}
|
||||
}
|
||||
assert(WorldNode!=-1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
// Gray encode support
|
||||
int BinaryToGray (int binary) {
|
||||
int gray = (binary>>1)^binary;
|
||||
return gray;
|
||||
}
|
||||
int Log2Size(int TwoToPower,int MAXLOG2)
|
||||
{
|
||||
int log2size = -1;
|
||||
for(int i=0;i<=MAXLOG2;i++){
|
||||
if ( (0x1<<i) == TwoToPower ) {
|
||||
log2size = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return log2size;
|
||||
}
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
#undef HYPERCUBE
|
||||
#ifdef HYPERCUBE
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify the hypercube coordinate of this node using hostname
|
||||
////////////////////////////////////////////////////////////////
|
||||
// n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits
|
||||
// i runs 0..7 3 bits
|
||||
// r runs 0..3 2 bits
|
||||
// 2^10 = 1024 nodes
|
||||
const int maxhdim = 10;
|
||||
std::vector<int> HyperCubeCoords(maxhdim,0);
|
||||
std::vector<int> RootHyperCubeCoords(maxhdim,0);
|
||||
int R;
|
||||
int I;
|
||||
int N;
|
||||
const int namelen = _POSIX_HOST_NAME_MAX;
|
||||
char name[namelen];
|
||||
|
||||
// Parse ICE-XA hostname to get hypercube location
|
||||
gethostname(name,namelen);
|
||||
int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
|
||||
assert(nscan==3);
|
||||
|
||||
int nlo = N%9;
|
||||
int nhi = N/9;
|
||||
uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ;
|
||||
uint32_t rootcoor = hypercoor;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Print debug info
|
||||
//////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
std::string hname(name);
|
||||
std::cout << "hostname "<<hname<<std::endl;
|
||||
std::cout << "R " << R << " I " << I << " N "<< N<<
|
||||
<< " hypercoor 0x"<<std::hex<<hypercoor<<std::dec<<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// broadcast node 0's base coordinate for this partition.
|
||||
//////////////////////////////////////////////////////////////////
|
||||
MPI_Bcast(&rootcoor, sizeof(rootcoor), MPI_BYTE, 0, WorldComm);
|
||||
hypercoor=hypercoor-rootcoor;
|
||||
assert(hypercoor<WorldSize);
|
||||
assert(hypercoor>=0);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Printing
|
||||
//////////////////////////////////////
|
||||
for(int d=0;d<maxhdim;d++){
|
||||
HyperCubeCoords[d] = (hypercoor>>d)&0x1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
std::vector<int> HyperCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Map Hcube according to physical lattice
|
||||
// must partition. Loop over dims and find out who would join.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int hcoor = hypercoor;
|
||||
for(int d=0;d<ndimension;d++){
|
||||
int bits = Log2Size(NodeDims[d],MAXLOG2RANKSPERNODE);
|
||||
int msk = (0x1<<bits)-1;
|
||||
HyperCoor[d]=hcoor & msk;
|
||||
HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic
|
||||
hcoor = hcoor >> bits;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
|
||||
for(int d=0;d<ndimension;d++) NodeCoor[d]=HyperCoor[d];
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
////////////////////////////////////////////////////////////////
|
||||
int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE);
|
||||
assert(log2size != -1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Identify subblock of ranks on node spreading across dims
|
||||
// in a maximally symmetrical way
|
||||
////////////////////////////////////////////////////////////////
|
||||
int ndimension = processors.size();
|
||||
std::vector<int> processor_coor(ndimension);
|
||||
std::vector<int> WorldDims = processors; std::vector<int> ShmDims (ndimension,1); std::vector<int> NodeDims (ndimension);
|
||||
std::vector<int> ShmCoor (ndimension); std::vector<int> NodeCoor (ndimension); std::vector<int> WorldCoor(ndimension);
|
||||
int dim = 0;
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%ndimension;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<ndimension;d++){
|
||||
NodeDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
////////////////////////////////////////////////////////////////
|
||||
int Nprocessors=1;
|
||||
for(int i=0;i<ndimension;i++){
|
||||
Nprocessors*=processors[i];
|
||||
}
|
||||
assert(WorldSize==Nprocessors);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
////////////////////////////////////////////////////////////////
|
||||
int rank;
|
||||
|
||||
Lexicographic::CoorFromIndexReversed(NodeCoor,WorldNode ,NodeDims);
|
||||
Lexicographic::CoorFromIndexReversed(ShmCoor ,WorldShmRank,ShmDims);
|
||||
for(int d=0;d<ndimension;d++) WorldCoor[d] = NodeCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||
Lexicographic::IndexFromCoorReversed(WorldCoor,rank,WorldDims);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Build the new communicator
|
||||
/////////////////////////////////////////////////////////////////
|
||||
int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
|
||||
assert(ierr==0);
|
||||
#endif
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SHMGET
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMGET
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
std::vector<int> shmids(WorldShmSize);
|
||||
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
size_t size = bytes;
|
||||
key_t key = IPC_PRIVATE;
|
||||
int flags = IPC_CREAT | SHM_R | SHM_W;
|
||||
#ifdef SHM_HUGETLB
|
||||
if (Hugepages) flags|=SHM_HUGETLB;
|
||||
#endif
|
||||
if ((shmids[r]= shmget(key,size, flags)) ==-1) {
|
||||
int errsv = errno;
|
||||
printf("Errno %d\n",errsv);
|
||||
printf("key %d\n",key);
|
||||
printf("size %lld\n",size);
|
||||
printf("flags %d\n",flags);
|
||||
perror("shmget");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
MPI_Bcast(&shmids[0],WorldShmSize*sizeof(int),MPI_BYTE,0,WorldShmComm);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
WorldShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
|
||||
if (WorldShmCommBufs[r] == (uint64_t *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
shmctl(shmids[r], IPC_RMID, NULL);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
///////////////////////////////////
|
||||
// Mark for clean up
|
||||
///////////////////////////////////
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
|
||||
}
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_MPI3_SHMMMAP
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP implementation "<< GRID_SHM_PATH <<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
sprintf(shm_name,GRID_SHM_PATH "/Grid_mpi3_shm_%d_%d",WorldNode,r);
|
||||
int fd=open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd == -1) {
|
||||
printf("open %s failed\n",shm_name);
|
||||
perror("open hugetlbfs");
|
||||
exit(0);
|
||||
}
|
||||
int mmap_flag = MAP_SHARED ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHM_NONE
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " MMAP anonymous implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared windows for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbf and others map filesystems as mappable huge pages
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
char shm_name [NAME_MAX];
|
||||
assert(WorldShmSize == 1);
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
int fd=-1;
|
||||
int mmap_flag = MAP_SHARED |MAP_ANONYMOUS ;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag|=MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void *ptr = (void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag,fd, 0);
|
||||
if ( ptr == (void *)MAP_FAILED ) {
|
||||
printf("mmap %s failed\n",shm_name);
|
||||
perror("failed mmap"); assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
close(fd);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< bytes<< "bytes)"<<std::endl;
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
};
|
||||
#endif // MMAP
|
||||
|
||||
#ifdef GRID_MPI3_SHMOPEN
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// POSIX SHMOPEN ; as far as I know Linux does not allow EXPLICIT HugePages with this case
|
||||
// tmpfs (Larry Meadows says) does not support explicit huge page, and this is used for
|
||||
// the posix shm virtual file system
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
std::cout << "SharedMemoryAllocate "<< bytes<< " SHMOPEN implementation "<<std::endl;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
MPI_Barrier(WorldShmComm);
|
||||
WorldShmCommBufs.resize(WorldShmSize);
|
||||
|
||||
char shm_name [NAME_MAX];
|
||||
if ( WorldShmRank == 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
size_t size = bytes;
|
||||
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
shm_unlink(shm_name);
|
||||
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666);
|
||||
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
|
||||
ftruncate(fd, size);
|
||||
|
||||
int mmap_flag = MAP_SHARED;
|
||||
#ifdef MAP_POPULATE
|
||||
mmap_flag |= MAP_POPULATE;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if (flags) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0);
|
||||
|
||||
std::cout << "Set WorldShmCommBufs["<<r<<"]="<<ptr<< "("<< size<< "bytes)"<<std::endl;
|
||||
if ( ptr == (void * )MAP_FAILED ) {
|
||||
perror("failed mmap");
|
||||
assert(0);
|
||||
}
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(WorldShmComm);
|
||||
|
||||
if ( WorldShmRank != 0 ) {
|
||||
for(int r=0;r<WorldShmSize;r++){
|
||||
|
||||
size_t size = bytes ;
|
||||
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
sprintf(shm_name,"/Grid_%s_mpi3_shm_%d_%d",pw->pw_name,WorldNode,r);
|
||||
|
||||
int fd=shm_open(shm_name,O_RDWR,0666);
|
||||
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
|
||||
|
||||
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
|
||||
assert(((uint64_t)ptr&0x3F)==0);
|
||||
WorldShmCommBufs[r] =ptr;
|
||||
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
_ShmAlloc=1;
|
||||
_ShmAllocBytes = bytes;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
////////////////////////////////////////////////////////
|
||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
{
|
||||
int rank, size;
|
||||
MPI_Comm_rank(comm,&rank);
|
||||
MPI_Comm_size(comm,&size);
|
||||
ShmRanks.resize(size);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Map ShmRank to WorldShmRank and use the right buffer
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
assert (GlobalSharedMemory::ShmAlloc()==1);
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
|
||||
uint32_t wsr = (r==ShmRank) ? GlobalSharedMemory::WorldShmRank : 0 ;
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
// std::cout << "SetCommunicator ShmCommBufs ["<< r<< "] = "<< ShmCommBufs[r]<< " wsr = "<<wsr<<std::endl;
|
||||
}
|
||||
ShmBufferFreeAll();
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find comm ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group FullGroup, ShmGroup;
|
||||
MPI_Comm_group (comm , &FullGroup);
|
||||
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> ranks(size); for(int r=0;r<size;r++) ranks[r]=r;
|
||||
MPI_Group_translate_ranks (FullGroup,size,&ranks[0],ShmGroup, &ShmRanks[0]);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
//////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::ShmBarrier(void)
|
||||
{
|
||||
MPI_Barrier (ShmComm);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Test the shared memory is working
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::SharedMemoryTest(void)
|
||||
{
|
||||
ShmBarrier();
|
||||
if ( ShmRank == 0 ) {
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
uint64_t * check = (uint64_t *) ShmCommBufs[r];
|
||||
check[0] = GlobalSharedMemory::WorldNode;
|
||||
check[1] = r;
|
||||
check[2] = 0x5A5A5A;
|
||||
}
|
||||
}
|
||||
ShmBarrier();
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
uint64_t * check = (uint64_t *) ShmCommBufs[r];
|
||||
|
||||
assert(check[0]==GlobalSharedMemory::WorldNode);
|
||||
assert(check[1]==r);
|
||||
assert(check[2]==0x5A5A5A);
|
||||
|
||||
}
|
||||
ShmBarrier();
|
||||
}
|
||||
|
||||
void *SharedMemory::ShmBuffer(int rank)
|
||||
{
|
||||
int gpeer = ShmRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
return ShmCommBufs[gpeer];
|
||||
}
|
||||
}
|
||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
static int count =0;
|
||||
int gpeer = ShmRanks[rank];
|
||||
assert(gpeer!=ShmRank); // never send to self
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{
|
||||
int MPI_is_finalised; MPI_Finalized(&MPI_is_finalised);
|
||||
if ( !MPI_is_finalised ) {
|
||||
MPI_Comm_free(&ShmComm);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
128
Grid/communicator/SharedMemoryNone.cc
Normal file
128
Grid/communicator/SharedMemoryNone.cc
Normal file
@ -0,0 +1,128 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/SharedMemory.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/*Construct from an MPI communicator*/
|
||||
void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(_ShmSetup==0);
|
||||
WorldComm = 0;
|
||||
WorldRank = 0;
|
||||
WorldSize = 1;
|
||||
WorldShmComm = 0 ;
|
||||
WorldShmRank = 0 ;
|
||||
WorldShmSize = 1 ;
|
||||
WorldNodes = 1 ;
|
||||
WorldNode = 0 ;
|
||||
WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0;
|
||||
WorldShmCommBufs.resize(1);
|
||||
_ShmSetup=1;
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
|
||||
{
|
||||
optimal_comm = WorldComm;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hugetlbfs mapping intended, use anonymous mmap
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
{
|
||||
void * ShmCommBuf ;
|
||||
assert(_ShmSetup==1);
|
||||
assert(_ShmAlloc==0);
|
||||
int mmap_flag =0;
|
||||
#ifdef MAP_ANONYMOUS
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
||||
#endif
|
||||
#ifdef MAP_ANON
|
||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
|
||||
#endif
|
||||
#ifdef MAP_HUGETLB
|
||||
if ( flags ) mmap_flag |= MAP_HUGETLB;
|
||||
#endif
|
||||
ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
||||
if (ShmCommBuf == (void *)MAP_FAILED) {
|
||||
perror("mmap failed ");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#ifdef MADV_HUGEPAGE
|
||||
if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE);
|
||||
#endif
|
||||
bzero(ShmCommBuf,bytes);
|
||||
WorldShmCommBufs[0] = ShmCommBuf;
|
||||
_ShmAllocBytes=bytes;
|
||||
_ShmAlloc=1;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
////////////////////////////////////////////////////////
|
||||
void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
{
|
||||
assert(GlobalSharedMemory::ShmAlloc()==1);
|
||||
ShmRanks.resize(1);
|
||||
ShmCommBufs.resize(1);
|
||||
ShmRanks[0] = 0;
|
||||
ShmRank = 0;
|
||||
ShmSize = 1;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Map ShmRank to WorldShmRank and use the right buffer
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0];
|
||||
heap_size = GlobalSharedMemory::ShmAllocBytes();
|
||||
ShmBufferFreeAll();
|
||||
return;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
//////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::ShmBarrier(void){ return ; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Test the shared memory is working
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void SharedMemory::SharedMemoryTest(void) { return; }
|
||||
|
||||
void *SharedMemory::ShmBuffer(int rank)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
void *SharedMemory::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
SharedMemory::~SharedMemory()
|
||||
{};
|
||||
|
||||
}
|
52
Grid/cshift/Cshift.h
Normal file
52
Grid/cshift/Cshift.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cshift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_H_
|
||||
#define _GRID_CSHIFT_H_
|
||||
|
||||
#include <Grid/cshift/Cshift_common.h>
|
||||
|
||||
#ifdef GRID_COMMS_NONE
|
||||
#include <Grid/cshift/Cshift_none.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_MPI
|
||||
#include <Grid/cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_MPI3
|
||||
#include <Grid/cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_MPIT
|
||||
#include <Grid/cshift/Cshift_mpi.h>
|
||||
#endif
|
||||
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||
#endif
|
||||
#endif
|
391
Grid/cshift/Cshift_common.h
Normal file
391
Grid/cshift/Cshift_common.h
Normal file
@ -0,0 +1,391 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cshift/Cshift_common.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_COMMON_H_
|
||||
#define _GRID_CSHIFT_COMMON_H_
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there is no need to SIMD split
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj> void
|
||||
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask = 0x3;
|
||||
}
|
||||
|
||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int ent = 0;
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int bo = n*e2;
|
||||
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*stride;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb &cbmask ) {
|
||||
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Gather for when there *is* need to SIMD split
|
||||
///////////////////////////////////////////////////////////////////
|
||||
template<class vobj> void
|
||||
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask = 0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int n1=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
if ( cbmask ==0x3){
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o = n*n1;
|
||||
int offset = b+n*e2;
|
||||
|
||||
vobj temp =rhs._odata[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||
// Test_cshift_red_black code.
|
||||
std::cout << " Dense packed buffer WARNING " <<std::endl;
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
|
||||
int o=n*n1;
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
int offset = b+n*e2;
|
||||
|
||||
if ( ocb & cbmask ) {
|
||||
vobj temp =rhs._odata[so+o+b];
|
||||
extract<vobj>(temp,pointers,offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there is no need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride=rhs._grid->_slice_stride[dimension];
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent =0;
|
||||
|
||||
if ( cbmask ==0x3 ) {
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int bo =n*rhs._grid->_slice_block[dimension];
|
||||
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
int bo=0;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*rhs._grid->_slice_stride[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||
if ( ocb & cbmask ) {
|
||||
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Scatter for when there *is* need to SIMD split
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
|
||||
if(cbmask ==0x3 ) {
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||
merge(rhs._odata[so+o+b],pointers,offset);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// Case of SIMD split AND checker dim cannot currently be hit, except in
|
||||
// Test_cshift_red_black code.
|
||||
// std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME
|
||||
std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<<std::endl;
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o = n*rhs._grid->_slice_stride[dimension];
|
||||
int offset = b+n*rhs._grid->_slice_block[dimension];
|
||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) {
|
||||
merge(rhs._odata[so+o+b],pointers,offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// local to node block strided copies
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
|
||||
{
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||
int e2=rhs._grid->_slice_block[dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
if(cbmask == 0x3 ){
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride+b;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||
if ( ocb&cbmask ) {
|
||||
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||
{
|
||||
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
if ( !rhs._grid->CheckerBoarded(dimension) ) {
|
||||
cbmask=0x3;
|
||||
}
|
||||
|
||||
int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||
|
||||
int e1=rhs._grid->_slice_nblock[dimension];
|
||||
int e2=rhs._grid->_slice_block [dimension];
|
||||
int stride = rhs._grid->_slice_stride[dimension];
|
||||
|
||||
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||
int ent=0;
|
||||
|
||||
double t_tab,t_perm;
|
||||
if ( cbmask == 0x3 ) {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
} else {
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int o =n*stride;
|
||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||
}}
|
||||
}
|
||||
|
||||
parallel_for(int i=0;i<ent;i++){
|
||||
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Local to node Cshift
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
double t_local;
|
||||
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
Cshift_local(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_local(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid = rhs._grid;
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int gd = grid->_gdimensions[dimension];
|
||||
int ly = grid->_simd_layout[dimension];
|
||||
|
||||
// Map to always positive shift modulo global full dimension.
|
||||
shift = (shift+fd)%fd;
|
||||
|
||||
// the permute type
|
||||
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
|
||||
int permute_dim =grid->PermuteDim(dimension);
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
int permute_type_dist;
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int o = 0;
|
||||
int bo = x * grid->_ostride[dimension];
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
|
||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
int sx = (x+sshift)%rd;
|
||||
|
||||
// wrap is whether sshift > rd.
|
||||
// num is sshift mod rd.
|
||||
//
|
||||
// shift 7
|
||||
//
|
||||
// XoXo YcYc
|
||||
// oXoX cYcY
|
||||
// XoXo YcYc
|
||||
// oXoX cYcY
|
||||
//
|
||||
// sshift --
|
||||
//
|
||||
// XX YY ; 3
|
||||
// XX YY ; 0
|
||||
// XX YY ; 3
|
||||
// XX YY ; 0
|
||||
//
|
||||
int permute_slice=0;
|
||||
if(permute_dim){
|
||||
int wrap = sshift/rd; wrap=wrap % ly;
|
||||
int num = sshift%rd;
|
||||
|
||||
if ( x< rd-num ) permute_slice=wrap;
|
||||
else permute_slice = (wrap+1)%ly;
|
||||
|
||||
if ( (ly>2) && (permute_slice) ) {
|
||||
assert(permute_type & RotateBit);
|
||||
permute_type_dist = permute_type|permute_slice;
|
||||
} else {
|
||||
permute_type_dist = permute_type;
|
||||
}
|
||||
}
|
||||
|
||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
262
Grid/cshift/Cshift_mpi.h
Normal file
262
Grid/cshift/Cshift_mpi.h
Normal file
@ -0,0 +1,262 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cshift/Cshift_mpi.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_MPI_H_
|
||||
#define _GRID_CSHIFT_MPI_H_
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
Lattice<vobj> ret(rhs._grid);
|
||||
|
||||
int fd = rhs._grid->_fdimensions[dimension];
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
|
||||
// Map to always positive shift modulo global full dimension.
|
||||
shift = (shift+fd)%fd;
|
||||
|
||||
ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
|
||||
|
||||
// the permute type
|
||||
int simd_layout = rhs._grid->_simd_layout[dimension];
|
||||
int comm_dim = rhs._grid->_processors[dimension] >1 ;
|
||||
int splice_dim = rhs._grid->_simd_layout[dimension]>1 && (comm_dim);
|
||||
|
||||
|
||||
if ( !comm_dim ) {
|
||||
//std::cout << "CSHIFT: Cshift_local" <<std::endl;
|
||||
Cshift_local(ret,rhs,dimension,shift); // Handles checkerboarding
|
||||
} else if ( splice_dim ) {
|
||||
//std::cout << "CSHIFT: Cshift_comms_simd call - splice_dim = " << splice_dim << " shift " << shift << " dimension = " << dimension << std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift);
|
||||
} else {
|
||||
//std::cout << "CSHIFT: Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
// std::cout << "Cshift_comms dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
// std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
// std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
|
||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<<dimension<<"cb "<<rhs.checkerboard<<"shift "<<shift<<" sshift " << sshift[0]<<" "<<sshift[1]<<std::endl;
|
||||
if ( sshift[0] == sshift[1] ) {
|
||||
//std::cout << "Single pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
|
||||
} else {
|
||||
//std::cout << "Two pass Cshift_comms" <<std::endl;
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
|
||||
Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
GridBase *grid=rhs._grid;
|
||||
Lattice<vobj> temp(rhs._grid);
|
||||
|
||||
int fd = rhs._grid->_fdimensions[dimension];
|
||||
int rd = rhs._grid->_rdimensions[dimension];
|
||||
int pd = rhs._grid->_processors[dimension];
|
||||
int simd_layout = rhs._grid->_simd_layout[dimension];
|
||||
int comm_dim = rhs._grid->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
assert(comm_dim==1);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
||||
commVector<vobj> send_buf(buffer_size);
|
||||
commVector<vobj> recv_buf(buffer_size);
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc==0) {
|
||||
|
||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
|
||||
} else {
|
||||
|
||||
int words = send_buf.size();
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(vobj);
|
||||
|
||||
Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask);
|
||||
|
||||
int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
grid->Barrier();
|
||||
|
||||
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid=rhs._grid;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int pd = grid->_processors[dimension];
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Simd direction uses an extract/merge pair
|
||||
///////////////////////////////////////////////
|
||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||
int words = sizeof(vobj)/sizeof(vector_type);
|
||||
|
||||
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
std::vector<scalar_object *> pointers(Nsimd); //
|
||||
std::vector<scalar_object *> rpointers(Nsimd); // received pointers
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Work out what to send where
|
||||
///////////////////////////////////////////
|
||||
int cb = (cbmask==0x2)? Odd : Even;
|
||||
int sshift= grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = &send_buf_extract[i][0];
|
||||
}
|
||||
int sx = (x+sshift)%rd;
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
grid->SendToRecvFrom((void *)&send_buf_extract[nbr_lane][0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf_extract[i][0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
grid->Barrier();
|
||||
rpointers[i] = &recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
|
||||
}
|
||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
39
Grid/cshift/Cshift_none.h
Normal file
39
Grid/cshift/Cshift_none.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/cshift/Cshift_none.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_CSHIFT_NONE_H_
|
||||
#define _GRID_CSHIFT_NONE_H_
|
||||
namespace Grid {
|
||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
Lattice<vobj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
|
||||
Cshift_local(ret,rhs,dimension,shift);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
14750
Grid/json/json.hpp
Normal file
14750
Grid/json/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
33
Grid/lattice/Lattice.h
Normal file
33
Grid/lattice/Lattice.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Lattice.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_H
|
||||
#define GRID_LATTICE_H
|
||||
|
||||
#include <Grid/lattice/Lattice_base.h>
|
||||
|
||||
#endif
|
466
Grid/lattice/Lattice_ET.h
Normal file
466
Grid/lattice/Lattice_ET.h
Normal file
@ -0,0 +1,466 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_ET.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ET_H
|
||||
#define GRID_LATTICE_ET_H
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Predicated where support
|
||||
////////////////////////////////////////////////////
|
||||
template <class iobj, class vobj, class robj>
|
||||
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||
const robj &iffalse) {
|
||||
typename std::remove_const<vobj>::type ret;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
const int Nsimd = vobj::vector_type::Nsimd();
|
||||
const int words = sizeof(vobj) / sizeof(vector_type);
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals(Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
extract(iftrue, truevals);
|
||||
extract(iffalse, falsevals);
|
||||
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||
|
||||
for (int s = 0; s < Nsimd; s++) {
|
||||
if (mask[s]) falsevals[s] = truevals[s];
|
||||
}
|
||||
|
||||
merge(ret, falsevals);
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// recursive evaluation of expressions; Could
|
||||
// switch to generic approach with variadics, a la
|
||||
// Antonin's Lat Sim but the repack to variadic with popped
|
||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||
////////////////////////////////////////////
|
||||
|
||||
// leaf eval of lattice ; should enable if protect using traits
|
||||
|
||||
template <typename T>
|
||||
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||
|
||||
template <typename T>
|
||||
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||
|
||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||
|
||||
//Specialization of getVectorType for lattices
|
||||
template<typename T>
|
||||
struct getVectorType<Lattice<T> >{
|
||||
typedef typename Lattice<T>::vector_object type;
|
||||
};
|
||||
|
||||
template<class sobj>
|
||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||
{
|
||||
return arg;
|
||||
}
|
||||
template <class lobj>
|
||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||
return arg._odata[ss];
|
||||
}
|
||||
|
||||
// handle nodes in syntax tree
|
||||
template <typename Op, typename T1>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
auto inline eval(
|
||||
const unsigned int ss,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)));
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
auto inline eval(const unsigned int ss,
|
||||
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||
&expr) // eval three operands
|
||||
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)))) {
|
||||
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||
eval(ss, std::get<1>(expr.second)),
|
||||
eval(ss, std::get<2>(expr.second)));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if (grid) {
|
||||
conformable(grid, lat._grid);
|
||||
}
|
||||
grid = lat._grid;
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const T1 ¬lat) // non-lattice leaf
|
||||
{}
|
||||
template <typename Op, typename T1>
|
||||
inline void GridFromExpression(GridBase *&grid,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void GridFromExpression(
|
||||
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||
GridFromExpression(grid, std::get<1>(expr.second));
|
||||
GridFromExpression(grid, std::get<2>(expr.second));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||
// tree recursion
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class T1,
|
||||
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||
{
|
||||
if ((cb == Odd) || (cb == Even)) {
|
||||
assert(cb == lat.checkerboard);
|
||||
}
|
||||
cb = lat.checkerboard;
|
||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||
}
|
||||
template <class T1,
|
||||
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||
{
|
||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
inline void CBFromExpression(int &cb,
|
||||
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||
}
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
inline void CBFromExpression(
|
||||
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||
CBFromExpression(cb, std::get<1>(expr.second));
|
||||
CBFromExpression(cb, std::get<2>(expr.second));
|
||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Unary operators and funcs
|
||||
////////////////////////////////////////////
|
||||
#define GridUnopClass(name, ret) \
|
||||
template <class arg> \
|
||||
struct name { \
|
||||
static auto inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||
};
|
||||
|
||||
GridUnopClass(UnarySub, -a);
|
||||
GridUnopClass(UnaryNot, Not(a));
|
||||
GridUnopClass(UnaryAdj, adj(a));
|
||||
GridUnopClass(UnaryConj, conjugate(a));
|
||||
GridUnopClass(UnaryTrace, trace(a));
|
||||
GridUnopClass(UnaryTranspose, transpose(a));
|
||||
GridUnopClass(UnaryTa, Ta(a));
|
||||
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||
GridUnopClass(UnaryReal, real(a));
|
||||
GridUnopClass(UnaryImag, imag(a));
|
||||
GridUnopClass(UnaryToReal, toReal(a));
|
||||
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||
GridUnopClass(UnaryTimesI, timesI(a));
|
||||
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||
GridUnopClass(UnaryAbs, abs(a));
|
||||
GridUnopClass(UnarySqrt, sqrt(a));
|
||||
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||
GridUnopClass(UnarySin, sin(a));
|
||||
GridUnopClass(UnaryCos, cos(a));
|
||||
GridUnopClass(UnaryAsin, asin(a));
|
||||
GridUnopClass(UnaryAcos, acos(a));
|
||||
GridUnopClass(UnaryLog, log(a));
|
||||
GridUnopClass(UnaryExp, exp(a));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Binary operators
|
||||
////////////////////////////////////////////
|
||||
#define GridBinOpClass(name, combination) \
|
||||
template <class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const left &lhs, const right &rhs) \
|
||||
-> decltype(combination) const { \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||
GridBinOpClass(BinarySub, lhs - rhs);
|
||||
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||
|
||||
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Trinary conditional op
|
||||
////////////////////////////////////////////////////
|
||||
#define GridTrinOpClass(name, combination) \
|
||||
template <class predicate, class left, class right> \
|
||||
struct name { \
|
||||
static auto inline func(const predicate &pred, const left &lhs, \
|
||||
const right &rhs) -> decltype(combination) const { \
|
||||
return combination; \
|
||||
} \
|
||||
}
|
||||
|
||||
GridTrinOpClass(
|
||||
TrinaryWhere,
|
||||
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||
typename std::remove_reference<right>::type>(pred, lhs,
|
||||
rhs)));
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Operator syntactical glue
|
||||
////////////////////////////////////////////
|
||||
|
||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
#define GRID_TRINOP(name) \
|
||||
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||
|
||||
#define GRID_DEF_UNOP(op, name) \
|
||||
template <typename T1, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &arg) \
|
||||
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_LEFT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<is_lattice<T1>::value || \
|
||||
is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
}
|
||||
|
||||
#define GRID_BINOP_RIGHT(op, name) \
|
||||
template <typename T1, typename T2, \
|
||||
typename std::enable_if<!is_lattice<T1>::value && \
|
||||
!is_lattice_expr<T1>::value, \
|
||||
T1>::type * = nullptr, \
|
||||
typename std::enable_if<is_lattice<T2>::value || \
|
||||
is_lattice_expr<T2>::value, \
|
||||
T2>::type * = nullptr> \
|
||||
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||
->decltype( \
|
||||
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), \
|
||||
std::forward_as_tuple(lhs, rhs)))) { \
|
||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||
}
|
||||
|
||||
#define GRID_DEF_BINOP(op, name) \
|
||||
GRID_BINOP_LEFT(op, name); \
|
||||
GRID_BINOP_RIGHT(op, name);
|
||||
|
||||
#define GRID_DEF_TRINOP(op, name) \
|
||||
template <typename T1, typename T2, typename T3> \
|
||||
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||
->decltype( \
|
||||
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||
const T3 &>(std::make_pair( \
|
||||
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||
}
|
||||
////////////////////////
|
||||
// Operator definitions
|
||||
////////////////////////
|
||||
|
||||
GRID_DEF_UNOP(operator-, UnarySub);
|
||||
GRID_DEF_UNOP(Not, UnaryNot);
|
||||
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||
GRID_DEF_UNOP(real, UnaryReal);
|
||||
GRID_DEF_UNOP(imag, UnaryImag);
|
||||
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||
// abs-fabs-dabs-labs thing
|
||||
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||
GRID_DEF_UNOP(sin, UnarySin);
|
||||
GRID_DEF_UNOP(cos, UnaryCos);
|
||||
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||
GRID_DEF_UNOP(log, UnaryLog);
|
||||
GRID_DEF_UNOP(exp, UnaryExp);
|
||||
|
||||
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||
GRID_DEF_BINOP(operator-, BinarySub);
|
||||
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||
|
||||
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||
|
||||
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Closure convenience to force expression to evaluate
|
||||
/////////////////////////////////////////////////////////////
|
||||
template <class Op, class T1>
|
||||
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||
expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2>
|
||||
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second))))>
|
||||
ret(expr);
|
||||
return ret;
|
||||
}
|
||||
template <class Op, class T1, class T2, class T3>
|
||||
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))> {
|
||||
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||
eval(0, std::get<1>(expr.second)),
|
||||
eval(0, std::get<2>(expr.second))))>
|
||||
ret(expr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#undef GRID_UNOP
|
||||
#undef GRID_BINOP
|
||||
#undef GRID_TRINOP
|
||||
|
||||
#undef GRID_DEF_UNOP
|
||||
#undef GRID_DEF_BINOP
|
||||
#undef GRID_DEF_TRINOP
|
||||
}
|
||||
|
||||
#if 0
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc,char **argv){
|
||||
|
||||
Lattice<double> v1(16);
|
||||
Lattice<double> v2(16);
|
||||
Lattice<double> v3(16);
|
||||
|
||||
BinaryAdd<double,double> tmp;
|
||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||
expr(std::make_pair(tmp,
|
||||
std::forward_as_tuple(v1,v2)));
|
||||
tmp.func(eval(0,v1),eval(0,v2));
|
||||
|
||||
auto var = v1+v2;
|
||||
std::cout<<GridLogMessage<<typeid(var).name()<<std::endl;
|
||||
|
||||
v3=v1+v2;
|
||||
v3=v1+v2+v1*v2;
|
||||
};
|
||||
|
||||
void testit(Lattice<double> &v1,Lattice<double> &v2,Lattice<double> &v3)
|
||||
{
|
||||
v3=v1+v2+v1*v2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
255
Grid/lattice/Lattice_arith.h
Normal file
255
Grid/lattice/Lattice_arith.h
Normal file
@ -0,0 +1,255 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_arith.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_ARITH_H
|
||||
#define GRID_LATTICE_ARITH_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(ret,lhs);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
ret.checkerboard = lhs.checkerboard;
|
||||
conformable(lhs,ret);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs._odata[ss],&rhs);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs._odata[ss],&rhs);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// avoid copy back routines for mult, mac, sub, add
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mult(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mult(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
mac(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
mac(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
sub(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
sub(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> strong_inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
obj1 tmp;
|
||||
add(&tmp,&lhs,&rhs._odata[ss]);
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
add(&ret._odata[ss],&lhs,&rhs._odata[ss]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
ret.checkerboard = x.checkerboard;
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
parallel_for(int ss=0;ss<x._grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
#else
|
||||
ret._odata[ss]=a*x._odata[ss]+b*y._odata[ss];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> strong_inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
375
Grid/lattice/Lattice_base.h
Normal file
375
Grid/lattice/Lattice_base.h
Normal file
@ -0,0 +1,375 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_base.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_BASE_H
|
||||
#define GRID_LATTICE_BASE_H
|
||||
|
||||
#define STREAMING_STORES
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// TODO:
|
||||
// mac,real,imag
|
||||
|
||||
// Functionality:
|
||||
// -=,+=,*=,()
|
||||
// add,+,sub,-,mult,mac,*
|
||||
// adj,conjugate
|
||||
// real,imag
|
||||
// transpose,transposeIndex
|
||||
// trace,traceIndex
|
||||
// peekIndex
|
||||
// innerProduct,outerProduct,
|
||||
// localNorm2
|
||||
// localInnerProduct
|
||||
|
||||
extern int GridCshiftPermuteMap[4][16];
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Basic expressions used in Expression Template
|
||||
////////////////////////////////////////////////
|
||||
|
||||
class LatticeBase
|
||||
{
|
||||
public:
|
||||
virtual ~LatticeBase(void) = default;
|
||||
GridBase *_grid;
|
||||
};
|
||||
|
||||
class LatticeExpressionBase {};
|
||||
|
||||
template <typename Op, typename T1>
|
||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeUnaryExpression(const std::pair<Op,std::tuple<T1> > &arg): std::pair<Op,std::tuple<T1> >(arg) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename T1, typename T2>
|
||||
class LatticeBinaryExpression : public std::pair<Op,std::tuple<T1,T2> > , public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeBinaryExpression(const std::pair<Op,std::tuple<T1,T2> > &arg): std::pair<Op,std::tuple<T1,T2> >(arg) {};
|
||||
};
|
||||
|
||||
template <typename Op, typename T1, typename T2, typename T3>
|
||||
class LatticeTrinaryExpression :public std::pair<Op,std::tuple<T1,T2,T3> >, public LatticeExpressionBase {
|
||||
public:
|
||||
LatticeTrinaryExpression(const std::pair<Op,std::tuple<T1,T2,T3> > &arg): std::pair<Op,std::tuple<T1,T2,T3> >(arg) {};
|
||||
};
|
||||
|
||||
void inline conformable(GridBase *lhs,GridBase *rhs)
|
||||
{
|
||||
assert(lhs == rhs);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
class Lattice : public LatticeBase
|
||||
{
|
||||
public:
|
||||
int checkerboard;
|
||||
Vector<vobj> _odata;
|
||||
|
||||
// to pthread need a computable loop where loop induction is not required
|
||||
int begin(void) { return 0;};
|
||||
int end(void) { return _odata.size(); }
|
||||
vobj & operator[](int i) { return _odata[i]; };
|
||||
const vobj & operator[](int i) const { return _odata[i]; };
|
||||
|
||||
public:
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef vobj vector_object;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Expression Template closure support
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
conformable(_grid,egrid);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
//vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
#else
|
||||
_odata[ss] = eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
//GridFromExpression is tricky to do
|
||||
template<class Op,class T1>
|
||||
Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2>
|
||||
Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
#ifdef STREAMING_STORES
|
||||
vobj tmp = eval(ss,expr);
|
||||
vstream(_odata[ss] ,tmp);
|
||||
#else
|
||||
_odata[ss]=eval(ss,expr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template<class Op,class T1, class T2, class T3>
|
||||
Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
|
||||
_grid = nullptr;
|
||||
GridFromExpression(_grid,expr);
|
||||
assert(_grid!=nullptr);
|
||||
|
||||
int cb=-1;
|
||||
CBFromExpression(cb,expr);
|
||||
assert( (cb==Odd) || (cb==Even));
|
||||
checkerboard=cb;
|
||||
|
||||
_odata.resize(_grid->oSites());
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
vstream(_odata[ss] ,eval(ss,expr));
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Constructor requires "grid" passed.
|
||||
// what about a default grid?
|
||||
//////////////////////////////////////////////////////////////////
|
||||
Lattice(GridBase *grid) : _odata(grid->oSites()) {
|
||||
_grid = grid;
|
||||
// _odata.reserve(_grid->oSites());
|
||||
// _odata.resize(_grid->oSites());
|
||||
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
|
||||
assert((((uint64_t)&_odata[0])&0xF) ==0);
|
||||
checkerboard=0;
|
||||
}
|
||||
|
||||
Lattice(const Lattice& r){ // copy constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
if (_grid != grid) {
|
||||
_grid = grid;
|
||||
_odata.resize(grid->oSites());
|
||||
checkerboard = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||
*this = (*this)*r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
|
||||
*this = (*this)-r;
|
||||
return *this;
|
||||
}
|
||||
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
|
||||
*this = (*this)+r;
|
||||
return *this;
|
||||
}
|
||||
}; // class Lattice
|
||||
|
||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||
std::vector<int> gcoor;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ss;
|
||||
for(int g=0;g<o._grid->_gsites;g++){
|
||||
o._grid->GlobalIndexToGlobalCoor(g,gcoor);
|
||||
peekSite(ss,o,gcoor);
|
||||
stream<<"[";
|
||||
for(int d=0;d<gcoor.size();d++){
|
||||
stream<<gcoor[d];
|
||||
if(d!=gcoor.size()-1) stream<<",";
|
||||
}
|
||||
stream<<"]\t";
|
||||
stream<<ss<<std::endl;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "Lattice_conformable.h"
|
||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||
#include "Lattice_ET.h"
|
||||
#else
|
||||
#include "Lattice_overload.h"
|
||||
#endif
|
||||
#include "Lattice_arith.h"
|
||||
#include "Lattice_trace.h"
|
||||
#include "Lattice_transpose.h"
|
||||
#include "Lattice_local.h"
|
||||
#include "Lattice_reduction.h"
|
||||
#include "Lattice_peekpoke.h"
|
||||
#include "Lattice_reality.h"
|
||||
#include "Lattice_comparison_utils.h"
|
||||
#include "Lattice_comparison.h"
|
||||
#include "Lattice_coordinate.h"
|
||||
#include "Lattice_where.h"
|
||||
#include "Lattice_rng.h"
|
||||
#include "Lattice_unary.h"
|
||||
#include "Lattice_transfer.h"
|
||||
|
||||
|
||||
#endif
|
169
Grid/lattice/Lattice_comparison.h
Normal file
169
Grid/lattice/Lattice_comparison.h
Normal file
@ -0,0 +1,169 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COMPARISON_H
|
||||
#define GRID_LATTICE_COMPARISON_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// relational operators
|
||||
//
|
||||
// Support <,>,<=,>=,==,!=
|
||||
//
|
||||
//Query supporting bitwise &, |, ^, !
|
||||
//Query supporting logical &&, ||,
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LLComparison(vfunctor op,const Lattice<lobj> &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare lattice to scalar
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> LSComparison(vfunctor op,const Lattice<lobj> &lhs,const robj &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// compare scalar to lattice
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template<class vfunctor,class lobj,class robj>
|
||||
inline Lattice<vInteger> SLComparison(vfunctor op,const lobj &lhs,const Lattice<robj> &rhs)
|
||||
{
|
||||
Lattice<vInteger> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=op(lhs._odata[ss],rhs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Map to functors
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Less than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator < (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vlt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Less than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator <= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vle<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// Greater than
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator > (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vgt<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// Greater than equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator >= (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vge<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
// equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator == (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(veq<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
|
||||
|
||||
// not equal
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const Lattice<robj> & rhs) {
|
||||
return LLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const Lattice<lobj> & lhs, const robj & rhs) {
|
||||
return LSComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
template<class lobj,class robj>
|
||||
inline Lattice<vInteger> operator != (const lobj & lhs, const Lattice<robj> & rhs) {
|
||||
return SLComparison(vne<lobj,robj>(),lhs,rhs);
|
||||
}
|
||||
}
|
||||
#endif
|
232
Grid/lattice/Lattice_comparison_utils.h
Normal file
232
Grid/lattice/Lattice_comparison_utils.h
Normal file
@ -0,0 +1,232 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_comparison_utils.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_COMPARISON_H
|
||||
#define GRID_COMPARISON_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////
|
||||
// This implementation is a bit poor.
|
||||
//
|
||||
// Only support relational logical operations (<, > etc)
|
||||
// on scalar objects. Therefore can strip any tensor structures.
|
||||
//
|
||||
// Should guard this with isGridTensor<> enable if?
|
||||
/////////////////////////////////////////
|
||||
//
|
||||
// Generic list of functors
|
||||
//
|
||||
template<class lobj,class robj> class veq {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vne {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vlt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vle {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vgt {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class vge {
|
||||
public:
|
||||
vInteger operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
// Generic list of functors
|
||||
template<class lobj,class robj> class seq {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) == (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sne {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) != (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class slt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) < (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sle {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) <= (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sgt {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) > (rhs);
|
||||
}
|
||||
};
|
||||
template<class lobj,class robj> class sge {
|
||||
public:
|
||||
Integer operator()(const lobj &lhs, const robj &rhs)
|
||||
{
|
||||
return (lhs) >= (rhs);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer and real get extra relational functions.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<scalar> vrhs(vsimd::Nsimd());
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(lhs,vlhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(vlhs[s],rhs);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class sfunctor, class vsimd,IfNotComplex<vsimd> = 0>
|
||||
inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs)
|
||||
{
|
||||
typedef typename vsimd::scalar_type scalar;
|
||||
std::vector<scalar> vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation
|
||||
std::vector<Integer> vpred(vsimd::Nsimd());
|
||||
vInteger ret;
|
||||
extract<vsimd,scalar>(rhs,vrhs);
|
||||
for(int s=0;s<vsimd::Nsimd();s++){
|
||||
vpred[s] = sop(lhs,vrhs[s]);
|
||||
}
|
||||
merge<vInteger,Integer>(ret,vpred);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \
|
||||
{\
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
return lhs._internal op rhs; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
40
Grid/lattice/Lattice_conformable.h
Normal file
40
Grid/lattice/Lattice_conformable.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_conformable.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_CONFORMABLE_H
|
||||
#define GRID_LATTICE_CONFORMABLE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class obj1,class obj2> void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
|
||||
{
|
||||
assert(lhs._grid == rhs._grid);
|
||||
assert(lhs.checkerboard == rhs.checkerboard);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
56
Grid/lattice/Lattice_coordinate.h
Normal file
56
Grid/lattice/Lattice_coordinate.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_coordinate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_COORDINATE_H
|
||||
#define GRID_LATTICE_COORDINATE_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
|
||||
{
|
||||
typedef typename iobj::scalar_type scalar_type;
|
||||
typedef typename iobj::vector_type vector_type;
|
||||
|
||||
GridBase *grid = l._grid;
|
||||
int Nsimd = grid->iSites();
|
||||
|
||||
std::vector<int> gcoor;
|
||||
std::vector<scalar_type> mergebuf(Nsimd);
|
||||
|
||||
vector_type vI;
|
||||
for(int o=0;o<grid->oSites();o++){
|
||||
for(int i=0;i<grid->iSites();i++){
|
||||
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
|
||||
mergebuf[i]=(Integer)gcoor[mu];
|
||||
}
|
||||
merge<vector_type,scalar_type>(vI,mergebuf);
|
||||
l._odata[o]=vI;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
75
Grid/lattice/Lattice_local.h
Normal file
75
Grid/lattice/Lattice_local.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_local.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
|
||||
#define GRID_LATTICE_LOCALREDUCTION_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// localInner, localNorm, outerProduct
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Non site, reduced locally reduced routines
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
// localNorm2,
|
||||
template<class vobj>
|
||||
inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// localInnerProduct
|
||||
template<class vobj>
|
||||
inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs) -> Lattice<typename vobj::tensor_reduced>
|
||||
{
|
||||
Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// outerProduct Scalar x Scalar -> Scalar
|
||||
// Vector x Vector -> Matrix
|
||||
template<class ll,class rr>
|
||||
inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
138
Grid/lattice/Lattice_overload.h
Normal file
138
Grid/lattice/Lattice_overload.h
Normal file
@ -0,0 +1,138 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_overload.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_OVERLOAD_H
|
||||
#define GRID_LATTICE_OVERLOAD_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// unary negation
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> operator -(const Lattice<vobj> &r)
|
||||
{
|
||||
Lattice<vobj> ret(r._grid);
|
||||
parallel_for(int ss=0;ss<r._grid->oSites();ss++){
|
||||
vstream(ret._odata[ss], -r._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lattice BinOp Lattice,
|
||||
//NB mult performs conformable check. Do not reapply here for performance.
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
|
||||
mult(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs._odata[0])> ret(rhs._grid);
|
||||
add(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs._odata[0])> ret(rhs._grid);
|
||||
sub(ret,lhs,rhs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Scalar BinOp Lattice ;generate return type
|
||||
template<class left,class right>
|
||||
inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs*rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs+rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs+rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs+rhs._odata[ss];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs-rhs._odata[0])>
|
||||
{
|
||||
Lattice<decltype(lhs-rhs._odata[0])> ret(rhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss];
|
||||
vstream(ret._odata[ss],tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]*rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]+rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]+rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]+rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class left,class right>
|
||||
inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]-rhs)>
|
||||
{
|
||||
Lattice<decltype(lhs._odata[0]-rhs)> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites(); ss++){
|
||||
decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs;
|
||||
vstream(ret._odata[ss],tmp);
|
||||
// ret._odata[ss]=lhs._odata[ss]-rhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
205
Grid/lattice/Lattice_peekpoke.h
Normal file
205
Grid/lattice/Lattice_peekpoke.h
Normal file
@ -0,0 +1,205 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_peekpoke.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_PEEK_H
|
||||
#define GRID_LATTICE_PEEK_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Peeking and poking around
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Peek internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
template<int Index,class vobj>
|
||||
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
|
||||
{
|
||||
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
|
||||
ret.checkerboard=lhs.checkerboard;
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Poke internal indices of a Lattice object
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
|
||||
{
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
|
||||
}
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
void PokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
|
||||
{
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Poke a scalar object into the SIMD array
|
||||
//////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void pokeSite(const sobj &s,Lattice<vobj> &l,const std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
int rank,odx,idx;
|
||||
// Optional to broadcast from node 0.
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
grid->Broadcast(grid->BossRank(),s);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
|
||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
||||
if ( rank == grid->ThisRank() ) {
|
||||
extract(l._odata[odx],buf);
|
||||
buf[idx] = s;
|
||||
merge(l._odata[odx],buf);
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekSite(sobj &s,const Lattice<vobj> &l,const std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard == l._grid->CheckerBoard(site));
|
||||
|
||||
int rank,odx,idx;
|
||||
grid->GlobalCoorToRankIndex(rank,odx,idx,site);
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(l._odata[odx],buf);
|
||||
|
||||
s = buf[idx];
|
||||
|
||||
grid->Broadcast(rank,s);
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Peek a scalar object from the SIMD array
|
||||
//////////////////////////////////////////////////////////
|
||||
template<class vobj,class sobj>
|
||||
void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid = l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
pt[w] = vp[idx+w*Nsimd];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
template<class vobj,class sobj>
|
||||
void pokeLocalSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
|
||||
|
||||
GridBase *grid=l._grid;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||
|
||||
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||
int odx,idx;
|
||||
idx= grid->iIndex(site);
|
||||
odx= grid->oIndex(site);
|
||||
|
||||
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||
scalar_type * pt = (scalar_type *)&s;
|
||||
|
||||
for(int w=0;w<words;w++){
|
||||
vp[idx+w*Nsimd] = pt[w];
|
||||
}
|
||||
|
||||
return;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
57
Grid/lattice/Lattice_reality.h
Normal file
57
Grid/lattice/Lattice_reality.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_reality.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REALITY_H
|
||||
#define GRID_LATTICE_REALITY_H
|
||||
|
||||
|
||||
// FIXME .. this is the sector of the code
|
||||
// I am most worried about the directions
|
||||
// The choice of burying complex in the SIMD
|
||||
// is making the use of "real" and "imag" very cumbersome
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = adj(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
template<class vobj> inline Lattice<vobj> conjugate(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = conjugate(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
#endif
|
733
Grid/lattice/Lattice_reduction.h
Normal file
733
Grid/lattice/Lattice_reduction.h
Normal file
@ -0,0 +1,733 @@
|
||||
/*************************************************************************************
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
Source file: ./lib/lattice/Lattice_reduction.h
|
||||
Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_REDUCTION_H
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
|
||||
namespace Grid {
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
auto nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
// Double inner product
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
GridBase *grid = left._grid;
|
||||
const int pad = 8;
|
||||
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
// All threads sum across SIMD; reduce serial work at end
|
||||
// one write per cacheline with streaming store
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// Fast axpby_norm
|
||||
// z = a x + b y
|
||||
// return norm z
|
||||
/////////////////////////
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpy_norm_fast(Lattice<vobj> &z,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
sobj one(1.0);
|
||||
return axpby_norm_fast(z,a,one,x,y);
|
||||
}
|
||||
|
||||
template<class sobj,class vobj> strong_inline RealD
|
||||
axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
const int pad = 8;
|
||||
z.checkerboard = x.checkerboard;
|
||||
conformable(z,x);
|
||||
conformable(x,y);
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
RealD nrm;
|
||||
|
||||
GridBase *grid = x._grid;
|
||||
|
||||
Vector<RealD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
// private to thread; sub summation
|
||||
decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vobj tmp = a*x._odata[ss]+b*y._odata[ss];
|
||||
vnrm = vnrm + innerProductD(tmp,tmp);
|
||||
vstream(z._odata[ss],tmp);
|
||||
}
|
||||
vstream(sumarray[thr*pad],real(Reduce(TensorRemove(vnrm)))) ;
|
||||
}
|
||||
|
||||
nrm = 0.0; // sum across threads; linear in thread count but fast
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
nrm = nrm+sumarray[i*pad];
|
||||
}
|
||||
z._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
||||
{
|
||||
GridBase *grid=arg._grid;
|
||||
int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
sobj ssum=zero;
|
||||
|
||||
std::vector<sobj> buf(Nsimd);
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg._grid->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
///////////////////////////////////////////////////////
|
||||
// FIXME precision promoted summation
|
||||
// may be important for correlation functions
|
||||
// But easily avoided by using double precision fields
|
||||
///////////////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data._grid;
|
||||
assert(grid!=NULL);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
|
||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
// Parallel over orthog direction
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
lvSum[r]=lvSum[r]+Data._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=zero;
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
localSliceInnerProductVector(result, lhs, rhs, lsSum, orthogdim);
|
||||
globalSliceInnerProductVector(result, lhs, lsSum, orthogdim);
|
||||
// std::cout << GridLogMessage << "End mySliceInnerProductVector" << std::endl;
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
static void localSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, const Lattice<vobj> &rhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
// std::cout << GridLogMessage << "Start prep" << std::endl;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
int fd = result.size();
|
||||
int ld = lsSum.size();
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
GridBase *grid = lhs._grid;
|
||||
assert(grid!=NULL);
|
||||
conformable(grid,rhs._grid);
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
|
||||
parallel_for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
|
||||
lvSum[r]=lvSum[r]+vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
iScalar<vector_type> temp;
|
||||
temp._internal = lvSum[rt];
|
||||
extract(temp,extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
scalar_type gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=scalar_type(0.0);
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int orthogdim,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced tensor_reduced;
|
||||
|
||||
scalar_type zscale(scale);
|
||||
|
||||
GridBase *grid = X._grid;
|
||||
|
||||
int Nsimd =grid->Nsimd();
|
||||
int Nblock =grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
int fd =grid->_fdimensions[orthogdim];
|
||||
int ld =grid->_ldimensions[orthogdim];
|
||||
int rd =grid->_rdimensions[orthogdim];
|
||||
|
||||
int e1 =grid->_slice_nblock[orthogdim];
|
||||
int e2 =grid->_slice_block [orthogdim];
|
||||
int stride =grid->_slice_stride[orthogdim];
|
||||
|
||||
std::vector<int> icoor;
|
||||
|
||||
for(int r=0;r<rd;r++){
|
||||
|
||||
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
|
||||
vector_type av;
|
||||
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
grid->iCoorFromIindex(icoor,l);
|
||||
int ldx =r+icoor[orthogdim]*rd;
|
||||
scalar_type *as =(scalar_type *)&av;
|
||||
as[l] = scalar_type(a[ldx])*zscale;
|
||||
}
|
||||
|
||||
tensor_reduced at; at=av;
|
||||
|
||||
parallel_for_nest2(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss= so+n*stride+b;
|
||||
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = Y[o+i*ostride];
|
||||
for(int j=0;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> s_x(Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
s_x[i] = X[o+i*ostride];
|
||||
}
|
||||
|
||||
vobj dot;
|
||||
for(int i=0;i<Nblock;i++){
|
||||
dot = s_x[0]*(scale*aa(0,i));
|
||||
for(int j=1;j<Nblock;j++){
|
||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
||||
}
|
||||
R[o+i*ostride]=dot;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
int stride=FullGrid->_slice_stride[Orthog];
|
||||
int block =FullGrid->_slice_block [Orthog];
|
||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
||||
int ostride=FullGrid->_ostride[Orthog];
|
||||
|
||||
typedef typename vobj::vector_typeD vector_typeD;
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
std::vector<vobj> Left(Nblock);
|
||||
std::vector<vobj> Right(Nblock);
|
||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
#pragma omp for collapse(2)
|
||||
for(int n=0;n<nblock;n++){
|
||||
for(int b=0;b<block;b++){
|
||||
|
||||
int o = n*stride + b;
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
Left [i] = lhs[o+i*ostride];
|
||||
Right[i] = rhs[o+i*ostride];
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
auto tmp = innerProduct(Left[i],Right[j]);
|
||||
auto rtmp = TensorRemove(tmp);
|
||||
mat_thread(i,j) += Reduce(rtmp);
|
||||
}}
|
||||
}}
|
||||
#pragma omp critical
|
||||
{
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
} /*END NAMESPACE GRID*/
|
||||
#endif
|
||||
|
||||
|
||||
|
520
Grid/lattice/Lattice_rng.h
Normal file
520
Grid/lattice/Lattice_rng.h
Normal file
@ -0,0 +1,520 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_rng.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_RNG_H
|
||||
#define GRID_LATTICE_RNG_H
|
||||
|
||||
#include <random>
|
||||
|
||||
#ifdef RNG_SITMO
|
||||
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
|
||||
#endif
|
||||
|
||||
#if defined(RNG_SITMO)
|
||||
#define RNG_FAST_DISCARD
|
||||
#else
|
||||
#undef RNG_FAST_DISCARD
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Allow the RNG state to be less dense than the fine grid
|
||||
//////////////////////////////////////////////////////////////
|
||||
inline int RNGfillable(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension;
|
||||
assert(lowerdims >= 0);
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
assert(fine->_simd_layout[d]==1);
|
||||
assert(fine->_processors[d]==1);
|
||||
}
|
||||
|
||||
int multiplicity=1;
|
||||
for(int d=0;d<lowerdims;d++){
|
||||
multiplicity=multiplicity*fine->_rdimensions[d];
|
||||
}
|
||||
// local and global volumes subdivide cleanly after SIMDization
|
||||
for(int d=0;d<rngdims;d++){
|
||||
int fd= d+lowerdims;
|
||||
assert(coarse->_processors[d] == fine->_processors[fd]);
|
||||
assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
|
||||
assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]);
|
||||
|
||||
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
|
||||
}
|
||||
return multiplicity;
|
||||
}
|
||||
|
||||
|
||||
// merge of April 11 2017
|
||||
// this function is necessary for the LS vectorised field
|
||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
int rngdims = coarse->_ndimension;
|
||||
|
||||
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
|
||||
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
|
||||
// assumes that the higher dimensions are not using more processors
|
||||
// all further divisions are local
|
||||
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
|
||||
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
|
||||
|
||||
// then divide the number of local sites
|
||||
// check that the total number of sims agree, meanse the iSites are the same
|
||||
assert(fine->Nsimd() == coarse->Nsimd());
|
||||
|
||||
// check that the two grids divide cleanly
|
||||
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
|
||||
|
||||
return fine->lSites() / coarse->lSites();
|
||||
}
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator>
|
||||
void fillScalar(scalar &s,distribution &dist,generator & gen)
|
||||
{
|
||||
s=dist(gen);
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
|
||||
{
|
||||
s=ComplexF(dist(gen),dist(gen));
|
||||
}
|
||||
template<class distribution,class generator>
|
||||
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
|
||||
{
|
||||
s=ComplexD(dist(gen),dist(gen));
|
||||
}
|
||||
|
||||
class GridRNGbase {
|
||||
public:
|
||||
// One generator per site.
|
||||
// Uniform and Gaussian distributions from these generators.
|
||||
#ifdef RNG_RANLUX
|
||||
typedef std::ranlux48 RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 15;
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
typedef std::mt19937 RngEngine;
|
||||
typedef uint32_t RngStateType;
|
||||
static const int RngStateCount = std::mt19937::state_size;
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
typedef sitmo::prng_engine RngEngine;
|
||||
typedef uint64_t RngStateType;
|
||||
static const int RngStateCount = 13;
|
||||
#endif
|
||||
|
||||
std::vector<RngEngine> _generators;
|
||||
std::vector<std::uniform_real_distribution<RealD> > _uniform;
|
||||
std::vector<std::normal_distribution<RealD> > _gaussian;
|
||||
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
|
||||
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
|
||||
|
||||
///////////////////////
|
||||
// support for parallel init
|
||||
///////////////////////
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
static void Skip(RngEngine &eng,uint64_t site)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Skip by 2^40 elements between successive lattice sites
|
||||
// This goes by 10^12.
|
||||
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
|
||||
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
|
||||
// skip ahead.
|
||||
// For HMC unlikely to go at faster than a solve per second, and
|
||||
// tens of seconds per trajectory so this is clean in all reasonable cases,
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
//
|
||||
// Replace with 2^30 ; avoid problem on large volumes
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
const int shift = 30;
|
||||
|
||||
uint64_t skip = site;
|
||||
|
||||
skip = skip<<shift;
|
||||
|
||||
assert((skip >> shift)==site); // check for overflow
|
||||
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
#endif
|
||||
static RngEngine Reseed(RngEngine &eng)
|
||||
{
|
||||
std::vector<uint32_t> newseed;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
return Reseed(eng,newseed,uid);
|
||||
}
|
||||
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
|
||||
std::uniform_int_distribution<uint32_t> &uid)
|
||||
{
|
||||
const int reseeds=4;
|
||||
|
||||
newseed.resize(reseeds);
|
||||
for(int i=0;i<reseeds;i++){
|
||||
newseed[i] = uid(eng);
|
||||
}
|
||||
std::seed_seq sseq(newseed.begin(),newseed.end());
|
||||
return RngEngine(sseq);
|
||||
}
|
||||
|
||||
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
|
||||
saved.resize(RngStateCount);
|
||||
std::stringstream ss;
|
||||
ss<<eng;
|
||||
ss.seekg(0,ss.beg);
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss>>saved[i];
|
||||
}
|
||||
}
|
||||
void GetState(std::vector<RngStateType> & saved,int gen) {
|
||||
GetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
|
||||
assert(saved.size()==RngStateCount);
|
||||
std::stringstream ss;
|
||||
for(int i=0;i<RngStateCount;i++){
|
||||
ss<< saved[i]<<" ";
|
||||
}
|
||||
ss.seekg(0,ss.beg);
|
||||
ss>>eng;
|
||||
}
|
||||
void SetState(std::vector<RngStateType> & saved,int gen){
|
||||
SetState(saved,_generators[gen]);
|
||||
}
|
||||
void SetEngine(RngEngine &Eng, int gen){
|
||||
_generators[gen]=Eng;
|
||||
}
|
||||
void GetEngine(RngEngine &Eng, int gen){
|
||||
Eng=_generators[gen];
|
||||
}
|
||||
template<class source> void Seed(source &src, int gen)
|
||||
{
|
||||
_generators[gen] = RngEngine(src);
|
||||
}
|
||||
};
|
||||
|
||||
class GridSerialRNG : public GridRNGbase {
|
||||
public:
|
||||
|
||||
GridSerialRNG() : GridRNGbase() {
|
||||
_generators.resize(1);
|
||||
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename sobj::scalar_type scalar_type;
|
||||
|
||||
int words = sizeof(sobj)/sizeof(scalar_type);
|
||||
|
||||
scalar_type *buf = (scalar_type *) & l;
|
||||
|
||||
dist[0].reset();
|
||||
for(int idx=0;idx<words;idx++){
|
||||
fillScalar(buf[idx],dist[0],_generators[0]);
|
||||
}
|
||||
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
|
||||
};
|
||||
|
||||
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(ComplexD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealF &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(RealD &l,std::vector<distribution> &dist){
|
||||
dist[0].reset();
|
||||
fillScalar(l,dist[0],_generators[0]);
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
// vector fill
|
||||
template <class distribution> inline void fill(vComplexF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vComplexD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<2*vComplexD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealF &l,std::vector<distribution> &dist){
|
||||
RealF *pointer=(RealF *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealF::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
template <class distribution> inline void fill(vRealD &l,std::vector<distribution> &dist){
|
||||
RealD *pointer=(RealD *)&l;
|
||||
dist[0].reset();
|
||||
for(int i=0;i<vRealD::Nsimd();i++){
|
||||
fillScalar(pointer[i],dist[0],_generators[0]);
|
||||
}
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
|
||||
}
|
||||
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
std::seed_seq src(seeds.begin(),seeds.end());
|
||||
Seed(src,0);
|
||||
}
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising serial RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
};
|
||||
|
||||
class GridParallelRNG : public GridRNGbase {
|
||||
|
||||
double _time_counter;
|
||||
|
||||
public:
|
||||
GridBase *_grid;
|
||||
unsigned int _vol;
|
||||
|
||||
int generator_idx(int os,int is) {
|
||||
return is*_grid->oSites()+os;
|
||||
}
|
||||
|
||||
GridParallelRNG(GridBase *grid) : GridRNGbase() {
|
||||
_grid = grid;
|
||||
_vol =_grid->iSites()*_grid->oSites();
|
||||
|
||||
_generators.resize(_vol);
|
||||
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
|
||||
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
|
||||
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
|
||||
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
|
||||
}
|
||||
|
||||
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
double inner_time_counter = usecond();
|
||||
|
||||
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
|
||||
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
|
||||
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
|
||||
int words = sizeof(scalar_object) / sizeof(scalar_type);
|
||||
|
||||
parallel_for(int ss=0;ss<osites;ss++){
|
||||
std::vector<scalar_object> buf(Nsimd);
|
||||
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
|
||||
|
||||
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
|
||||
|
||||
for (int si = 0; si < Nsimd; si++) {
|
||||
|
||||
int gdx = generator_idx(ss, si); // index of generator state
|
||||
scalar_type *pointer = (scalar_type *)&buf[si];
|
||||
dist[gdx].reset();
|
||||
for (int idx = 0; idx < words; idx++)
|
||||
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
|
||||
}
|
||||
// merge into SIMD lanes, FIXME suboptimal implementation
|
||||
merge(l._odata[sm], buf);
|
||||
}
|
||||
}
|
||||
|
||||
_time_counter += usecond()- inner_time_counter;
|
||||
};
|
||||
|
||||
void SeedUniqueString(const std::string &s){
|
||||
std::vector<int> seeds;
|
||||
std::stringstream sha;
|
||||
seeds = GridChecksum::sha256_seeds(s);
|
||||
for(int i=0;i<seeds.size();i++) {
|
||||
sha << std::hex << seeds[i];
|
||||
}
|
||||
std::cout << GridLogMessage << "Intialising parallel RNG with unique string '"
|
||||
<< s << "'" << std::endl;
|
||||
std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl;
|
||||
SeedFixedIntegers(seeds);
|
||||
}
|
||||
void SeedFixedIntegers(const std::vector<int> &seeds){
|
||||
|
||||
// Everyone generates the same seed_seq based on input seeds
|
||||
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
|
||||
|
||||
std::seed_seq source(seeds.begin(),seeds.end());
|
||||
|
||||
RngEngine master_engine(source);
|
||||
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
////////////////////////////////////////////////
|
||||
// Skip ahead through a single stream.
|
||||
// Applicable to SITMO and other has based/crypto RNGs
|
||||
// Should be applicable to Mersenne Twister, but the C++11
|
||||
// MT implementation does not implement fast discard even though
|
||||
// in principle this is possible
|
||||
////////////////////////////////////////////////
|
||||
|
||||
// Everybody loops over global volume.
|
||||
parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
|
||||
|
||||
// Where is it?
|
||||
int rank,o_idx,i_idx;
|
||||
std::vector<int> gcoor;
|
||||
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// If this is one of mine we take it
|
||||
if( rank == _grid->ThisRank() ){
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Machine and thread decomposition dependent seeding is efficient
|
||||
// and maximally parallel; but NOT reproducible from machine to machine.
|
||||
// Not ideal, but fastest way to reseed all nodes.
|
||||
////////////////////////////////////////////////////////////////
|
||||
{
|
||||
// Obtain one Reseed per processor
|
||||
int Nproc = _grid->ProcessorCount();
|
||||
std::vector<RngEngine> seeders(Nproc);
|
||||
int me= _grid->ThisRank();
|
||||
for(int p=0;p<Nproc;p++){
|
||||
seeders[p] = Reseed(master_engine);
|
||||
}
|
||||
master_engine = seeders[me];
|
||||
}
|
||||
|
||||
{
|
||||
// Obtain one reseeded generator per thread
|
||||
int Nthread = GridThread::GetThreads();
|
||||
std::vector<RngEngine> seeders(Nthread);
|
||||
for(int t=0;t<Nthread;t++){
|
||||
seeders[t] = Reseed(master_engine);
|
||||
}
|
||||
|
||||
parallel_for(int t=0;t<Nthread;t++) {
|
||||
// set up one per local site in threaded fashion
|
||||
std::vector<uint32_t> newseeds;
|
||||
std::uniform_int_distribution<uint32_t> uid;
|
||||
for(int l=0;l<_grid->lSites();l++) {
|
||||
if ( (l%Nthread)==t ) {
|
||||
_generators[l] = Reseed(seeders[t],newseeds,uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Report(){
|
||||
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Support for rigorous test of RNG's
|
||||
// Return uniform random uint32_t from requested site generator
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
uint32_t GlobalU01(int gsite){
|
||||
|
||||
uint32_t the_number;
|
||||
// who
|
||||
std::vector<int> gcoor;
|
||||
int rank,o_idx,i_idx;
|
||||
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
// draw
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
if( rank == _grid->ThisRank() ){
|
||||
the_number = _uid[l_idx](_generators[l_idx]);
|
||||
}
|
||||
|
||||
// share & return
|
||||
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
|
||||
return the_number;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
|
||||
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
|
||||
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
|
||||
|
||||
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
|
||||
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
|
||||
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
|
||||
|
||||
}
|
||||
#endif
|
67
Grid/lattice/Lattice_trace.h
Normal file
67
Grid/lattice/Lattice_trace.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_trace.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRACE_H
|
||||
#define GRID_LATTICE_TRACE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Tracing, transposing, peeking, poking
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline auto trace(const Lattice<vobj> &lhs)
|
||||
-> Lattice<decltype(trace(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = trace(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Trace Index level dependent operation
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
1085
Grid/lattice/Lattice_transfer.h
Normal file
1085
Grid/lattice/Lattice_transfer.h
Normal file
File diff suppressed because it is too large
Load Diff
63
Grid/lattice/Lattice_transpose.h
Normal file
63
Grid/lattice/Lattice_transpose.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_transpose.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_TRANSPOSE_H
|
||||
#define GRID_LATTICE_TRANSPOSE_H
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Transpose
|
||||
///////////////////////////////////////////////
|
||||
|
||||
namespace Grid {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
|
||||
Lattice<vobj> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transpose(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Index level dependent transpose
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto TransposeIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
|
||||
{
|
||||
Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||
parallel_for(int ss=0;ss<lhs._grid->oSites();ss++){
|
||||
ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
}
|
||||
#endif
|
84
Grid/lattice/Lattice_unary.h
Normal file
84
Grid/lattice/Lattice_unary.h
Normal file
@ -0,0 +1,84 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_unary.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_UNARY_H
|
||||
#define GRID_LATTICE_UNARY_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class obj> Lattice<obj> pow(const Lattice<obj> &rhs,RealD y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=pow(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class obj> Lattice<obj> mod(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=mod(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> div(const Lattice<obj> &rhs,Integer y){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=div(rhs._odata[ss],y);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
|
||||
Lattice<obj> ret(rhs._grid);
|
||||
ret.checkerboard = rhs.checkerboard;
|
||||
conformable(ret,rhs);
|
||||
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
|
||||
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
86
Grid/lattice/Lattice_where.h
Normal file
86
Grid/lattice/Lattice_where.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/lattice/Lattice_where.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LATTICE_WHERE_H
|
||||
#define GRID_LATTICE_WHERE_H
|
||||
namespace Grid {
|
||||
// Must implement the predicate gating the
|
||||
// Must be able to reduce the predicate down to a single vInteger per site.
|
||||
// Must be able to require the type be iScalar x iScalar x ....
|
||||
// give a GetVtype method in iScalar
|
||||
// and blow away the tensor structures.
|
||||
//
|
||||
template<class vobj,class iobj>
|
||||
inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue._grid;
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename iobj::vector_type mask_type;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
}
|
||||
|
||||
merge(ret._odata[ss],falsevals);
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
|
||||
{
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue._grid);
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif
|
116
Grid/log/Log.cc
Normal file
116
Grid/log/Log.cc
Normal file
@ -0,0 +1,116 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Log.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/util/CompilerCompatible.h>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <memory>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
std::string demangle(const char* name) {
|
||||
|
||||
int status = -4; // some arbitrary value to eliminate the compiler warning
|
||||
|
||||
// enable c++11 by passing the flag -std=c++11 to g++
|
||||
std::unique_ptr<char, void(*)(void*)> res {
|
||||
abi::__cxa_demangle(name, NULL, NULL, &status),
|
||||
std::free
|
||||
};
|
||||
|
||||
return (status==0) ? res.get() : name ;
|
||||
}
|
||||
|
||||
GridStopWatch Logger::GlobalStopWatch;
|
||||
int Logger::timestamp;
|
||||
std::ostream Logger::devnull(0);
|
||||
|
||||
void GridLogTimestamp(int on){
|
||||
Logger::Timestamp(on);
|
||||
}
|
||||
|
||||
Colours GridLogColours(0);
|
||||
GridLogger GridLogIRL (1, "IRL" , GridLogColours, "NORMAL");
|
||||
GridLogger GridLogSolver (1, "Solver", GridLogColours, "NORMAL");
|
||||
GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
|
||||
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
||||
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
||||
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
||||
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
||||
|
||||
void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
GridLogError.Active(0);
|
||||
GridLogWarning.Active(0);
|
||||
GridLogMessage.Active(1); // at least the messages should be always on
|
||||
GridLogIterative.Active(0);
|
||||
GridLogDebug.Active(0);
|
||||
GridLogPerformance.Active(0);
|
||||
GridLogIntegrator.Active(0);
|
||||
GridLogColours.Active(0);
|
||||
|
||||
for (int i = 0; i < logstreams.size(); i++) {
|
||||
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
|
||||
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
|
||||
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
|
||||
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||
if (logstreams[i] == std::string("Performance"))
|
||||
GridLogPerformance.Active(1);
|
||||
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
|
||||
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Verbose limiter on MPI tasks
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void) {
|
||||
int me = 0;
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
me = shmem_my_pe();
|
||||
#endif
|
||||
if (me) {
|
||||
std::cout.setstate(std::ios::badbit);
|
||||
}
|
||||
}
|
||||
|
||||
void Grid_unquiesce_nodes(void) {
|
||||
#ifdef GRID_COMMS_MPI
|
||||
std::cout.clear();
|
||||
#endif
|
||||
}
|
||||
}
|
216
Grid/log/Log.h
Normal file
216
Grid/log/Log.h
Normal file
@ -0,0 +1,216 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Log.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <map>
|
||||
|
||||
#ifndef GRID_LOG_H
|
||||
#define GRID_LOG_H
|
||||
|
||||
#ifdef HAVE_EXECINFO_H
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class Colours{
|
||||
protected:
|
||||
bool is_active;
|
||||
public:
|
||||
std::map<std::string, std::string> colour;
|
||||
|
||||
Colours(bool activate=false){
|
||||
Active(activate);
|
||||
};
|
||||
|
||||
void Active(bool activate){
|
||||
is_active=activate;
|
||||
if (is_active){
|
||||
colour["BLACK"] ="\033[30m";
|
||||
colour["RED"] ="\033[31m";
|
||||
colour["GREEN"] ="\033[32m";
|
||||
colour["YELLOW"] ="\033[33m";
|
||||
colour["BLUE"] ="\033[34m";
|
||||
colour["PURPLE"] ="\033[35m";
|
||||
colour["CYAN"] ="\033[36m";
|
||||
colour["WHITE"] ="\033[37m";
|
||||
colour["NORMAL"] ="\033[0;39m";
|
||||
} else {
|
||||
colour["BLACK"] ="";
|
||||
colour["RED"] ="";
|
||||
colour["GREEN"] ="";
|
||||
colour["YELLOW"]="";
|
||||
colour["BLUE"] ="";
|
||||
colour["PURPLE"]="";
|
||||
colour["CYAN"] ="";
|
||||
colour["WHITE"] ="";
|
||||
colour["NORMAL"]="";
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
class Logger {
|
||||
protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
int timing_mode;
|
||||
int topWidth{-1}, chanWidth{-1};
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
|
||||
public:
|
||||
static GridStopWatch GlobalStopWatch;
|
||||
GridStopWatch LocalStopWatch;
|
||||
GridStopWatch *StopWatch;
|
||||
static std::ostream devnull;
|
||||
|
||||
std::string background() {return Painter.colour["NORMAL"];}
|
||||
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||
std::string colour() {return Painter.colour[COLOUR];}
|
||||
|
||||
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on),
|
||||
name(nm),
|
||||
topName(topNm),
|
||||
Painter(col_class),
|
||||
timing_mode(0),
|
||||
COLOUR(col)
|
||||
{
|
||||
StopWatch = & GlobalStopWatch;
|
||||
};
|
||||
|
||||
void Active(int on) {active = on;};
|
||||
int isActive(void) {return active;};
|
||||
static void Timestamp(int on) {timestamp = on;};
|
||||
void Reset(void) {
|
||||
StopWatch->Reset();
|
||||
StopWatch->Start();
|
||||
}
|
||||
void TimingMode(int on) {
|
||||
timing_mode = on;
|
||||
if(on) {
|
||||
StopWatch = &LocalStopWatch;
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
void setTopWidth(const int w) {topWidth = w;}
|
||||
void setChanWidth(const int w) {chanWidth = w;}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
stream << log.background()<< std::left;
|
||||
if (log.topWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.topWidth);
|
||||
}
|
||||
stream << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left;
|
||||
if (log.chanWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.chanWidth);
|
||||
}
|
||||
stream << log.name << log.background() << " : ";
|
||||
if ( log.timestamp ) {
|
||||
log.StopWatch->Stop();
|
||||
GridTime now = log.StopWatch->Elapsed();
|
||||
if ( log.timing_mode==1 ) log.StopWatch->Reset();
|
||||
log.StopWatch->Start();
|
||||
stream << log.evidence()<< std::setw(6)<<now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class GridLogger: public Logger {
|
||||
public:
|
||||
GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"):
|
||||
Logger("Grid", on, nm, col_class, col_key){};
|
||||
};
|
||||
|
||||
void GridLogConfigure(std::vector<std::string> &logstreams);
|
||||
|
||||
extern GridLogger GridLogIRL;
|
||||
extern GridLogger GridLogSolver;
|
||||
extern GridLogger GridLogError;
|
||||
extern GridLogger GridLogWarning;
|
||||
extern GridLogger GridLogMessage;
|
||||
extern GridLogger GridLogDebug ;
|
||||
extern GridLogger GridLogPerformance;
|
||||
extern GridLogger GridLogIterative ;
|
||||
extern GridLogger GridLogIntegrator ;
|
||||
extern Colours GridLogColours;
|
||||
|
||||
std::string demangle(const char* name) ;
|
||||
|
||||
#define _NBACKTRACE (256)
|
||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
#define BACKTRACEFILE() {\
|
||||
char string[20]; \
|
||||
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
|
||||
std::FILE * fp = std::fopen(string,"w"); \
|
||||
BACKTRACEFP(fp)\
|
||||
std::fclose(fp); \
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_EXECINFO_H
|
||||
#define BACKTRACEFP(fp) { \
|
||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
||||
for (int i = 0; i < symbols; i++){\
|
||||
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \
|
||||
}\
|
||||
}
|
||||
#else
|
||||
#define BACKTRACEFP(fp) { \
|
||||
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
||||
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
||||
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
||||
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define BACKTRACE() BACKTRACEFP(stdout)
|
||||
|
||||
|
||||
}
|
||||
#endif
|
729
Grid/parallelIO/BinaryIO.h
Normal file
729
Grid/parallelIO/BinaryIO.h
Normal file
@ -0,0 +1,729 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/BinaryIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu<guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BINARY_IO_H
|
||||
#define GRID_BINARY_IO_H
|
||||
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT)
|
||||
#define USE_MPI_IO
|
||||
#else
|
||||
#undef USE_MPI_IO
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ENDIAN_H
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Byte reversal garbage
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
inline uint32_t byte_reverse32(uint32_t f) {
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
return f;
|
||||
}
|
||||
inline uint64_t byte_reverse64(uint64_t f) {
|
||||
uint64_t g;
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
return g;
|
||||
}
|
||||
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
inline uint64_t Grid_ntohll(uint64_t A) { return A; }
|
||||
#else
|
||||
inline uint64_t Grid_ntohll(uint64_t A) {
|
||||
return byte_reverse64(A);
|
||||
}
|
||||
#endif
|
||||
|
||||
// A little helper
|
||||
inline void removeWhitespace(std::string &key)
|
||||
{
|
||||
key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end());
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Static class holding the parallel IO code
|
||||
// Could just use a namespace
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class BinaryIO {
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// more byte manipulation helpers
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vobj> static inline void Uint32Checksum(Lattice<vobj> &lat,uint32_t &nersc_csum)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = lat._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
unvectorizeToLexOrdArray(scalardata,lat);
|
||||
|
||||
NerscChecksum(grid,scalardata,nersc_csum);
|
||||
}
|
||||
|
||||
template <class fobj>
|
||||
static inline void NerscChecksum(GridBase *grid, std::vector<fobj> &fbuf, uint32_t &nersc_csum)
|
||||
{
|
||||
const uint64_t size32 = sizeof(fobj) / sizeof(uint32_t);
|
||||
|
||||
uint64_t lsites = grid->lSites();
|
||||
if (fbuf.size() == 1)
|
||||
{
|
||||
lsites = 1;
|
||||
}
|
||||
|
||||
PARALLEL_REGION
|
||||
{
|
||||
uint32_t nersc_csum_thr = 0;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||
{
|
||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||
for (uint64_t j = 0; j < size32; j++)
|
||||
{
|
||||
nersc_csum_thr = nersc_csum_thr + site_buf[j];
|
||||
}
|
||||
}
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
nersc_csum += nersc_csum_thr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class fobj> static inline void ScidacChecksum(GridBase *grid,std::vector<fobj> &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb)
|
||||
{
|
||||
const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t);
|
||||
|
||||
|
||||
int nd = grid->_ndimension;
|
||||
|
||||
uint64_t lsites =grid->lSites();
|
||||
if (fbuf.size()==1) {
|
||||
lsites=1;
|
||||
}
|
||||
std::vector<int> local_vol =grid->LocalDimensions();
|
||||
std::vector<int> local_start =grid->LocalStarts();
|
||||
std::vector<int> global_vol =grid->FullDimensions();
|
||||
|
||||
PARALLEL_REGION
|
||||
{
|
||||
std::vector<int> coor(nd);
|
||||
uint32_t scidac_csuma_thr=0;
|
||||
uint32_t scidac_csumb_thr=0;
|
||||
uint32_t site_crc=0;
|
||||
|
||||
PARALLEL_FOR_LOOP_INTERN
|
||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||
|
||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||
|
||||
/*
|
||||
* Scidac csum is rather more heavyweight
|
||||
* FIXME -- 128^3 x 256 x 16 will overflow.
|
||||
*/
|
||||
|
||||
int global_site;
|
||||
|
||||
Lexicographic::CoorFromIndex(coor,local_site,local_vol);
|
||||
|
||||
for(int d=0;d<nd;d++) {
|
||||
coor[d] = coor[d]+local_start[d];
|
||||
}
|
||||
|
||||
Lexicographic::IndexFromCoor(coor,global_site,global_vol);
|
||||
|
||||
uint32_t gsite29 = global_site%29;
|
||||
uint32_t gsite31 = global_site%31;
|
||||
|
||||
site_crc = crc32(0,(unsigned char *)site_buf,sizeof(fobj));
|
||||
// std::cout << "Site "<<local_site << " crc "<<std::hex<<site_crc<<std::dec<<std::endl;
|
||||
// std::cout << "Site "<<local_site << std::hex<<site_buf[0] <<site_buf[1]<<std::dec <<std::endl;
|
||||
scidac_csuma_thr ^= site_crc<<gsite29 | site_crc>>(32-gsite29);
|
||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||
}
|
||||
|
||||
PARALLEL_CRITICAL
|
||||
{
|
||||
scidac_csuma^= scidac_csuma_thr;
|
||||
scidac_csumb^= scidac_csumb_thr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Network is big endian
|
||||
static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);}
|
||||
static inline void htobe64_v(void *file_object,uint32_t bytes){ be64toh_v(file_object,bytes);}
|
||||
static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);}
|
||||
static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);}
|
||||
|
||||
static inline void be32toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint32_t * f = (uint32_t *)file_object;
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f[i] = ntohl(f[i]);
|
||||
}
|
||||
}
|
||||
// LE must Swap and switch to host
|
||||
static inline void le32toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint32_t *fp = (uint32_t *)file_object;
|
||||
uint32_t f;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint32_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = ntohl(f);
|
||||
}
|
||||
}
|
||||
|
||||
// BE is same as network
|
||||
static inline void be64toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint64_t * f = (uint64_t *)file_object;
|
||||
uint64_t count = bytes/sizeof(uint64_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f[i] = Grid_ntohll(f[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// LE must swap and switch;
|
||||
static inline void le64toh_v(void *file_object,uint64_t bytes)
|
||||
{
|
||||
uint64_t *fp = (uint64_t *)file_object;
|
||||
uint64_t f,g;
|
||||
|
||||
uint64_t count = bytes/sizeof(uint64_t);
|
||||
parallel_for(uint64_t i=0;i<count;i++){
|
||||
f = fp[i];
|
||||
// got network order and the network to host
|
||||
g = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
g = g << 32;
|
||||
f = f >> 32;
|
||||
g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ;
|
||||
fp[i] = Grid_ntohll(g);
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Real action:
|
||||
// Read or Write distributed lexico array of ANY object to a specific location in file
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static const int BINARYIO_MASTER_APPEND = 0x10;
|
||||
static const int BINARYIO_UNORDERED = 0x08;
|
||||
static const int BINARYIO_LEXICOGRAPHIC = 0x04;
|
||||
static const int BINARYIO_READ = 0x02;
|
||||
static const int BINARYIO_WRITE = 0x01;
|
||||
|
||||
template<class word,class fobj>
|
||||
static inline void IOobject(word w,
|
||||
GridBase *grid,
|
||||
std::vector<fobj> &iodata,
|
||||
std::string file,
|
||||
uint64_t& offset,
|
||||
const std::string &format, int control,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
grid->Barrier();
|
||||
GridStopWatch timer;
|
||||
GridStopWatch bstimer;
|
||||
|
||||
nersc_csum=0;
|
||||
scidac_csuma=0;
|
||||
scidac_csumb=0;
|
||||
|
||||
int ndim = grid->Dimensions();
|
||||
int nrank = grid->ProcessorCount();
|
||||
int myrank = grid->ThisRank();
|
||||
|
||||
std::vector<int> psizes = grid->ProcessorGrid();
|
||||
std::vector<int> pcoor = grid->ThisProcessorCoor();
|
||||
std::vector<int> gLattice= grid->GlobalDimensions();
|
||||
std::vector<int> lLattice= grid->LocalDimensions();
|
||||
|
||||
std::vector<int> lStart(ndim);
|
||||
std::vector<int> gStart(ndim);
|
||||
|
||||
// Flatten the file
|
||||
uint64_t lsites = grid->lSites();
|
||||
if ( control & BINARYIO_MASTER_APPEND ) {
|
||||
assert(iodata.size()==1);
|
||||
} else {
|
||||
assert(lsites==iodata.size());
|
||||
}
|
||||
for(int d=0;d<ndim;d++){
|
||||
gStart[d] = lLattice[d]*pcoor[d];
|
||||
lStart[d] = 0;
|
||||
}
|
||||
|
||||
#ifdef USE_MPI_IO
|
||||
std::vector<int> distribs(ndim,MPI_DISTRIBUTE_BLOCK);
|
||||
std::vector<int> dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG);
|
||||
MPI_Datatype mpiObject;
|
||||
MPI_Datatype fileArray;
|
||||
MPI_Datatype localArray;
|
||||
MPI_Datatype mpiword;
|
||||
MPI_Offset disp = offset;
|
||||
MPI_File fh ;
|
||||
MPI_Status status;
|
||||
int numword;
|
||||
|
||||
if ( sizeof( word ) == sizeof(float ) ) {
|
||||
numword = sizeof(fobj)/sizeof(float);
|
||||
mpiword = MPI_FLOAT;
|
||||
} else {
|
||||
numword = sizeof(fobj)/sizeof(double);
|
||||
mpiword = MPI_DOUBLE;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Sobj in MPI phrasing
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int ierr;
|
||||
ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject); assert(ierr==0);
|
||||
ierr = MPI_Type_commit(&mpiObject);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// File global array data type
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray); assert(ierr==0);
|
||||
ierr=MPI_Type_commit(&fileArray); assert(ierr==0);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// local lattice array
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0);
|
||||
ierr=MPI_Type_commit(&localArray); assert(ierr==0);
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Byte order
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Do the I/O
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
if ( control & BINARYIO_READ ) {
|
||||
|
||||
timer.Start();
|
||||
|
||||
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
|
||||
#ifdef USE_MPI_IO
|
||||
std::cout<< GridLogMessage<<"IOobject: MPI read I/O "<< file<< std::endl;
|
||||
ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0);
|
||||
ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0);
|
||||
ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0);
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
} else {
|
||||
std::cout << GridLogMessage <<"IOobject: C++ read I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes" << std::endl;
|
||||
std::ifstream fin;
|
||||
fin.open(file, std::ios::binary | std::ios::in);
|
||||
if (control & BINARYIO_MASTER_APPEND)
|
||||
{
|
||||
fin.seekg(-sizeof(fobj), fin.end);
|
||||
}
|
||||
else
|
||||
{
|
||||
fin.seekg(offset + myrank * lsites * sizeof(fobj));
|
||||
}
|
||||
fin.read((char *)&iodata[0], iodata.size() * sizeof(fobj));
|
||||
assert(fin.fail() == 0);
|
||||
fin.close();
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
bstimer.Start();
|
||||
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
|
||||
if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
NerscChecksum(grid,iodata,nersc_csum);
|
||||
bstimer.Stop();
|
||||
}
|
||||
|
||||
if ( control & BINARYIO_WRITE ) {
|
||||
|
||||
bstimer.Start();
|
||||
NerscChecksum(grid,iodata,nersc_csum);
|
||||
if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size());
|
||||
ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb);
|
||||
bstimer.Stop();
|
||||
|
||||
grid->Barrier();
|
||||
|
||||
timer.Start();
|
||||
if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) {
|
||||
#ifdef USE_MPI_IO
|
||||
std::cout << GridLogMessage <<"IOobject: MPI write I/O " << file << std::endl;
|
||||
ierr = MPI_File_open(grid->communicator, (char *)file.c_str(), MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
|
||||
// std::cout << GridLogMessage << "Checking for errors" << std::endl;
|
||||
if (ierr != MPI_SUCCESS)
|
||||
{
|
||||
char error_string[BUFSIZ];
|
||||
int length_of_error_string, error_class;
|
||||
|
||||
MPI_Error_class(ierr, &error_class);
|
||||
MPI_Error_string(error_class, error_string, &length_of_error_string);
|
||||
fprintf(stderr, "%3d: %s\n", myrank, error_string);
|
||||
MPI_Error_string(ierr, error_string, &length_of_error_string);
|
||||
fprintf(stderr, "%3d: %s\n", myrank, error_string);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1); //assert(ierr == 0);
|
||||
}
|
||||
|
||||
std::cout << GridLogDebug << "MPI write I/O set view " << file << std::endl;
|
||||
ierr = MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL);
|
||||
assert(ierr == 0);
|
||||
|
||||
std::cout << GridLogDebug << "MPI write I/O write all " << file << std::endl;
|
||||
ierr = MPI_File_write_all(fh, &iodata[0], 1, localArray, &status);
|
||||
assert(ierr == 0);
|
||||
|
||||
MPI_Offset os;
|
||||
MPI_File_get_position(fh, &os);
|
||||
MPI_File_get_byte_offset(fh, os, &disp);
|
||||
offset = disp;
|
||||
|
||||
|
||||
MPI_File_close(&fh);
|
||||
MPI_Type_free(&fileArray);
|
||||
MPI_Type_free(&localArray);
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
} else {
|
||||
|
||||
std::cout << GridLogMessage << "IOobject: C++ write I/O " << file << " : "
|
||||
<< iodata.size() * sizeof(fobj) << " bytes and offset " << offset << std::endl;
|
||||
|
||||
std::ofstream fout;
|
||||
fout.exceptions ( std::fstream::failbit | std::fstream::badbit );
|
||||
try {
|
||||
if (offset) { // Must already exist and contain data
|
||||
fout.open(file,std::ios::binary|std::ios::out|std::ios::in);
|
||||
} else { // Allow create
|
||||
fout.open(file,std::ios::binary|std::ios::out);
|
||||
}
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << GridLogError << "Error in opening the file " << file << " for output" <<std::endl;
|
||||
std::cout << GridLogError << "Exception description: " << exc.what() << std::endl;
|
||||
// std::cout << GridLogError << "Probable cause: wrong path, inaccessible location "<< std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( control & BINARYIO_MASTER_APPEND ) {
|
||||
try {
|
||||
fout.seekp(0,fout.end);
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in seeking file end " << file << std::endl;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
fout.seekp(offset+myrank*lsites*sizeof(fobj));
|
||||
} catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in seeking file " << file <<" offset "<< offset << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
fout.write((char *)&iodata[0],iodata.size()*sizeof(fobj));//assert( fout.fail()==0);
|
||||
}
|
||||
catch (const std::fstream::failure& exc) {
|
||||
std::cout << "Exception in writing file " << file << std::endl;
|
||||
std::cout << GridLogError << "Exception description: "<< exc.what() << std::endl;
|
||||
#ifdef USE_MPI_IO
|
||||
MPI_Abort(MPI_COMM_WORLD,1);
|
||||
#else
|
||||
exit(1);
|
||||
#endif
|
||||
}
|
||||
offset = fout.tellp();
|
||||
fout.close();
|
||||
}
|
||||
timer.Stop();
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"IOobject: ";
|
||||
if ( control & BINARYIO_READ) std::cout << " read ";
|
||||
else std::cout << " write ";
|
||||
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
|
||||
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Safety check
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// if the data size is 1 we do not want to sum over the MPI ranks
|
||||
if (iodata.size() != 1){
|
||||
grid->Barrier();
|
||||
grid->GlobalSum(nersc_csum);
|
||||
grid->GlobalXOR(scidac_csuma);
|
||||
grid->GlobalXOR(scidac_csumb);
|
||||
grid->Barrier();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Read a Lattice of object
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline void readLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(iodata[x], scalardata[x]);
|
||||
|
||||
vectorizeFromLexOrdArray(scalardata,Umu);
|
||||
grid->Barrier();
|
||||
|
||||
timer.Stop();
|
||||
std::cout<<GridLogMessage<<"readLatticeObject: vectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Write a Lattice of object
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj,class fobj,class munger>
|
||||
static inline void writeLatticeObject(Lattice<vobj> &Umu,
|
||||
std::string file,
|
||||
munger munge,
|
||||
uint64_t offset,
|
||||
const std::string &format,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::Realified::scalar_type word; word w=0;
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
std::vector<sobj> scalardata(lsites);
|
||||
std::vector<fobj> iodata(lsites); // Munge, checksum, byte order in here
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Munge [ .e.g 3rd row recon ]
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
GridStopWatch timer; timer.Start();
|
||||
unvectorizeToLexOrdArray(scalardata,Umu);
|
||||
|
||||
parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
|
||||
|
||||
grid->Barrier();
|
||||
timer.Stop();
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
std::cout<<GridLogMessage<<"writeLatticeObject: unvectorize overhead "<<timer.Elapsed() <<std::endl;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Read a RNG; use IOobject and lexico map to an array of state
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static inline void readRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename GridSerialRNG::RngStateType RngStateType;
|
||||
const int RngStateCount = GridSerialRNG::RngStateCount;
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
typedef RngStateType word; word w=0;
|
||||
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp = 0;
|
||||
uint32_t scidac_csuma_tmp = 0;
|
||||
uint32_t scidac_csumb_tmp = 0;
|
||||
|
||||
GridStopWatch timer;
|
||||
|
||||
std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl;
|
||||
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
timer.Start();
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
|
||||
parallel.SetState(tmp,lidx);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
iodata.resize(1);
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND,
|
||||
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
|
||||
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin());
|
||||
serial.SetState(tmp,0);
|
||||
}
|
||||
|
||||
nersc_csum = nersc_csum + nersc_csum_tmp;
|
||||
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
|
||||
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
|
||||
|
||||
std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Write a RNG; lexico map to an array of state and use IOobject
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
static inline void writeRNG(GridSerialRNG &serial,
|
||||
GridParallelRNG ¶llel,
|
||||
std::string file,
|
||||
uint64_t offset,
|
||||
uint32_t &nersc_csum,
|
||||
uint32_t &scidac_csuma,
|
||||
uint32_t &scidac_csumb)
|
||||
{
|
||||
typedef typename GridSerialRNG::RngStateType RngStateType;
|
||||
typedef RngStateType word; word w=0;
|
||||
const int RngStateCount = GridSerialRNG::RngStateCount;
|
||||
typedef std::array<RngStateType,RngStateCount> RNGstate;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
uint64_t gsites = grid->gSites();
|
||||
uint64_t lsites = grid->lSites();
|
||||
|
||||
uint32_t nersc_csum_tmp;
|
||||
uint32_t scidac_csuma_tmp;
|
||||
uint32_t scidac_csumb_tmp;
|
||||
|
||||
GridStopWatch timer;
|
||||
std::string format = "IEEE32BIG";
|
||||
|
||||
std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl;
|
||||
|
||||
timer.Start();
|
||||
std::vector<RNGstate> iodata(lsites);
|
||||
parallel_for(uint64_t lidx=0;lidx<lsites;lidx++){
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
parallel.GetState(tmp,lidx);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
iodata.resize(1);
|
||||
{
|
||||
std::vector<RngStateType> tmp(RngStateCount);
|
||||
serial.GetState(tmp,0);
|
||||
std::copy(tmp.begin(),tmp.end(),iodata[0].begin());
|
||||
}
|
||||
IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND,
|
||||
nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp);
|
||||
|
||||
nersc_csum = nersc_csum + nersc_csum_tmp;
|
||||
scidac_csuma = scidac_csuma ^ scidac_csuma_tmp;
|
||||
scidac_csumb = scidac_csumb ^ scidac_csumb_tmp;
|
||||
|
||||
std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl;
|
||||
std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
868
Grid/parallelIO/IldgIO.h
Normal file
868
Grid/parallelIO/IldgIO.h
Normal file
@ -0,0 +1,868 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/IldgIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ILDG_IO_H
|
||||
#define GRID_ILDG_IO_H
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
#include <pwd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//C-Lime is a must have for this functionality
|
||||
extern "C" {
|
||||
#include "lime.h"
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
/////////////////////////////////
|
||||
// Encode word types as strings
|
||||
/////////////////////////////////
|
||||
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
|
||||
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
|
||||
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
|
||||
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Encode a generic tensor as a string
|
||||
/////////////////////////////////////////
|
||||
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
|
||||
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
int _ColourN = indexRank<ColourIndex,vobj>();
|
||||
int _ColourScalar = isScalar<ColourIndex,vobj>();
|
||||
int _ColourVector = isVector<ColourIndex,vobj>();
|
||||
int _ColourMatrix = isMatrix<ColourIndex,vobj>();
|
||||
|
||||
int _SpinN = indexRank<SpinIndex,vobj>();
|
||||
int _SpinScalar = isScalar<SpinIndex,vobj>();
|
||||
int _SpinVector = isVector<SpinIndex,vobj>();
|
||||
int _SpinMatrix = isMatrix<SpinIndex,vobj>();
|
||||
|
||||
int _LorentzN = indexRank<LorentzIndex,vobj>();
|
||||
int _LorentzScalar = isScalar<LorentzIndex,vobj>();
|
||||
int _LorentzVector = isVector<LorentzIndex,vobj>();
|
||||
int _LorentzMatrix = isMatrix<LorentzIndex,vobj>();
|
||||
|
||||
std::stringstream stream;
|
||||
|
||||
stream << "GRID_";
|
||||
stream << ScidacWordMnemonic<stype>();
|
||||
|
||||
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
|
||||
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
|
||||
|
||||
if ( _SpinVector ) stream << "_SpinVector"<<_SpinN;
|
||||
if ( _SpinMatrix ) stream << "_SpinMatrix"<<_SpinN;
|
||||
|
||||
if ( _ColourVector ) stream << "_ColourVector"<<_ColourN;
|
||||
if ( _ColourMatrix ) stream << "_ColourMatrix"<<_ColourN;
|
||||
|
||||
if ( _ColourScalar && _LorentzScalar && _SpinScalar ) stream << "_Complex";
|
||||
|
||||
|
||||
typesize = sizeof(typename vobj::scalar_type);
|
||||
|
||||
if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
|
||||
else typesize*= _ColourN;
|
||||
|
||||
if ( _SpinMatrix ) typesize*= _SpinN*_SpinN;
|
||||
else typesize*= _SpinN;
|
||||
|
||||
colors = _ColourN;
|
||||
spins = _SpinN;
|
||||
datacount = _LorentzN;
|
||||
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
|
||||
return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Helper to fill out metadata
|
||||
////////////////////////////////////////////////////////////
|
||||
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||
FieldMetaData &header,
|
||||
scidacRecord & _scidacRecord,
|
||||
scidacFile & _scidacFile)
|
||||
{
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
|
||||
/////////////////////////////////////
|
||||
// Pull Grid's metadata
|
||||
/////////////////////////////////////
|
||||
PrepareMetaData(field,header);
|
||||
|
||||
/////////////////////////////////////
|
||||
// Scidac Private File structure
|
||||
/////////////////////////////////////
|
||||
_scidacFile = scidacFile(field._grid);
|
||||
|
||||
/////////////////////////////////////
|
||||
// Scidac Private Record structure
|
||||
/////////////////////////////////////
|
||||
scidacRecord sr;
|
||||
sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
|
||||
sr.date = header.creation_date;
|
||||
sr.precision = ScidacWordMnemonic<stype>();
|
||||
sr.recordtype = GRID_IO_FIELD;
|
||||
|
||||
_scidacRecord = sr;
|
||||
|
||||
// std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Scidac checksum
|
||||
///////////////////////////////////////////////////////
|
||||
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
|
||||
{
|
||||
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
|
||||
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
|
||||
if ( scidac_csuma !=scidac_checksuma) return 0;
|
||||
if ( scidac_csumb !=scidac_checksumb) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Lime, ILDG and Scidac I/O classes
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
class GridLimeReader : public BinaryIO {
|
||||
public:
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
FILE *File;
|
||||
LimeReader *LimeR;
|
||||
std::string filename;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Open the file
|
||||
/////////////////////////////////////////////
|
||||
void open(const std::string &_filename)
|
||||
{
|
||||
filename= _filename;
|
||||
File = fopen(filename.c_str(), "r");
|
||||
if (File == nullptr)
|
||||
{
|
||||
std::cerr << "cannot open file '" << filename << "'" << std::endl;
|
||||
abort();
|
||||
}
|
||||
LimeR = limeCreateReader(File);
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void){
|
||||
fclose(File);
|
||||
// limeDestroyReader(LimeR);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Read a generic lattice field and verify checksum
|
||||
////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
scidacChecksum scidacChecksum_;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
|
||||
std::string format = getFormatString<vobj>();
|
||||
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
uint64_t file_bytes =limeReaderBytes(LimeR);
|
||||
|
||||
// std::cout << GridLogMessage << limeReaderType(LimeR) << " "<< file_bytes <<" bytes "<<std::endl;
|
||||
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <<std::endl;
|
||||
|
||||
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
|
||||
|
||||
// std::cout << "R sizeof(sobj)= " <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "R Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "R Payload expected " <<PayloadSize<<std::endl;
|
||||
// std::cout << "R file size " <<file_bytes <<std::endl;
|
||||
|
||||
assert(PayloadSize == file_bytes);// Must match or user error
|
||||
|
||||
uint64_t offset= ftello(File);
|
||||
// std::cout << " ReadLatticeObject from offset "<<offset << std::endl;
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Insist checksum is next record
|
||||
/////////////////////////////////////////////
|
||||
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Verify checksums
|
||||
/////////////////////////////////////////////
|
||||
assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Read a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
template<class serialisable_object>
|
||||
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
|
||||
{
|
||||
// should this be a do while; can we miss a first record??
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeObject seeking "<< record_name <<" found record :" <<limeReaderType(LimeR) <<std::endl;
|
||||
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
|
||||
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches ! " << record_name <<std::endl;
|
||||
std::vector<char> xmlc(nbytes+1,'\0');
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< " readLimeObject matches XML " << &xmlc[0] <<std::endl;
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,object_name,object);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
|
||||
class GridLimeWriter : public BinaryIO
|
||||
{
|
||||
public:
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// FIXME: format for RNG? Now just binary out instead
|
||||
// FIXME: collective calls or not ?
|
||||
// : must know if I am the I/O boss
|
||||
///////////////////////////////////////////////////
|
||||
FILE *File;
|
||||
LimeWriter *LimeW;
|
||||
std::string filename;
|
||||
bool boss_node;
|
||||
GridLimeWriter( bool isboss = true) {
|
||||
boss_node = isboss;
|
||||
}
|
||||
void open(const std::string &_filename) {
|
||||
filename= _filename;
|
||||
if ( boss_node ) {
|
||||
File = fopen(filename.c_str(), "w");
|
||||
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////
|
||||
// Close the file
|
||||
/////////////////////////////////////////////
|
||||
void close(void) {
|
||||
if ( boss_node ) {
|
||||
fclose(File);
|
||||
}
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
///////////////////////////////////////////////////////
|
||||
// Lime utility functions
|
||||
///////////////////////////////////////////////////////
|
||||
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
LimeRecordHeader *h;
|
||||
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
|
||||
assert(limeWriteRecordHeader(h, LimeW) >= 0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
return LIME_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////
|
||||
// Write a generic serialisable object
|
||||
////////////////////////////////////////////
|
||||
void writeLimeObject(int MB,int ME,XmlWriter &writer,std::string object_name,std::string record_name)
|
||||
{
|
||||
if ( boss_node ) {
|
||||
std::string xmlstring = writer.docString();
|
||||
|
||||
// std::cout << "WriteLimeObject" << record_name <<std::endl;
|
||||
uint64_t nbytes = xmlstring.size();
|
||||
// std::cout << " xmlstring "<< nbytes<< " " << xmlstring <<std::endl;
|
||||
int err;
|
||||
LimeRecordHeader *h = limeCreateHeader(MB, ME,const_cast<char *>(record_name.c_str()), nbytes);
|
||||
assert(h!= NULL);
|
||||
|
||||
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
|
||||
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
limeDestroyHeader(h);
|
||||
}
|
||||
}
|
||||
|
||||
template<class serialisable_object>
|
||||
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name, const unsigned int scientificPrec = 0)
|
||||
{
|
||||
XmlWriter WR("","");
|
||||
|
||||
if (scientificPrec)
|
||||
{
|
||||
WR.scientificFormat(true);
|
||||
WR.setPrecision(scientificPrec);
|
||||
}
|
||||
write(WR,object_name,object);
|
||||
writeLimeObject(MB, ME, WR, object_name, record_name);
|
||||
}
|
||||
////////////////////////////////////////////////////
|
||||
// Write a generic lattice field and csum
|
||||
// This routine is Collectively called by all nodes
|
||||
// in communicator used by the field._grid
|
||||
////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// NB: FILE and iostream are jointly writing disjoint sequences in the
|
||||
// the same file through different file handles (integer units).
|
||||
//
|
||||
// These are both buffered, so why I think this code is right is as follows.
|
||||
//
|
||||
// i) write record header to FILE *File, telegraphing the size; flush
|
||||
// ii) ftello reads the offset from FILE *File .
|
||||
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
|
||||
// Closes iostream and flushes.
|
||||
// iv) fseek on FILE * to end of this disjoint section.
|
||||
// v) Continue writing scidac record.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
assert(boss_node == field._grid->IsBoss() );
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Create record header
|
||||
////////////////////////////////////////////
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
int err;
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
uint64_t PayloadSize = sizeof(sobj) * grid->_gsites;
|
||||
if ( boss_node ) {
|
||||
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
|
||||
fflush(File);
|
||||
}
|
||||
|
||||
// std::cout << "W sizeof(sobj)" <<sizeof(sobj)<<std::endl;
|
||||
// std::cout << "W Gsites " <<field._grid->_gsites<<std::endl;
|
||||
// std::cout << "W Payload expected " <<PayloadSize<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Check all nodes agree on file position
|
||||
////////////////////////////////////////////////
|
||||
uint64_t offset1;
|
||||
if ( boss_node ) {
|
||||
offset1 = ftello(File);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset1,sizeof(offset1));
|
||||
|
||||
///////////////////////////////////////////
|
||||
// The above is collective. Write by other means into the binary record
|
||||
///////////////////////////////////////////
|
||||
std::string format = getFormatString<vobj>();
|
||||
BinarySimpleMunger<sobj,sobj> munge;
|
||||
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset1, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Wind forward and close the record
|
||||
///////////////////////////////////////////
|
||||
if ( boss_node ) {
|
||||
fseek(File,0,SEEK_END);
|
||||
uint64_t offset2 = ftello(File); // std::cout << " now at offset "<<offset2 << std::endl;
|
||||
assert( (offset2-offset1) == PayloadSize);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Check MPI-2 I/O did what we expect to file
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
if ( boss_node ) {
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Write checksum element, propagaing forward from the BinaryIO
|
||||
// Always pair a checksum with a binary object, and close message
|
||||
////////////////////////////////////////
|
||||
scidacChecksum checksum;
|
||||
std::stringstream streama; streama << std::hex << scidac_csuma;
|
||||
std::stringstream streamb; streamb << std::hex << scidac_csumb;
|
||||
checksum.suma= streama.str();
|
||||
checksum.sumb= streamb.str();
|
||||
if ( boss_node ) {
|
||||
writeLimeObject(0,1,checksum,std::string("scidacChecksum"),std::string(SCIDAC_CHECKSUM));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ScidacWriter : public GridLimeWriter {
|
||||
public:
|
||||
|
||||
ScidacWriter(bool isboss =true ) : GridLimeWriter(isboss) { };
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord,
|
||||
const unsigned int recordScientificPrec = 0)
|
||||
{
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
ScidacMetaData(field,header,_scidacRecord,_scidacFile);
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
if ( this->boss_node ) {
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML), recordScientificPrec);
|
||||
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
}
|
||||
// Collective call
|
||||
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ScidacReader : public GridLimeReader {
|
||||
public:
|
||||
|
||||
template<class SerialisableUserFile>
|
||||
void readScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
|
||||
{
|
||||
scidacFile _scidacFile(grid);
|
||||
readLimeObject(_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
readLimeObject(_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Write generic lattice field in scidac format
|
||||
////////////////////////////////////////////////
|
||||
template <class vobj, class userRecord>
|
||||
void readScidacFieldRecord(Lattice<vobj> &field,userRecord &_userRecord)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase * grid = field._grid;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
readLimeObject(header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
readLimeObject(_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
readLimeObject(_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
readLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));
|
||||
}
|
||||
void skipPastBinaryRecord(void) {
|
||||
std::string rec_name(ILDG_BINARY_DATA);
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
|
||||
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void skipPastObjectRecord(std::string rec_name) {
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void skipScidacFieldRecord() {
|
||||
skipPastObjectRecord(std::string(GRID_FORMAT));
|
||||
skipPastObjectRecord(std::string(SCIDAC_RECORD_XML));
|
||||
skipPastObjectRecord(std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
skipPastBinaryRecord();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class IldgWriter : public ScidacWriter {
|
||||
public:
|
||||
|
||||
IldgWriter(bool isboss) : ScidacWriter(isboss) {};
|
||||
|
||||
///////////////////////////////////
|
||||
// A little helper
|
||||
///////////////////////////////////
|
||||
void writeLimeIldgLFN(std::string &LFN)
|
||||
{
|
||||
uint64_t PayloadSize = LFN.size();
|
||||
int err;
|
||||
createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
|
||||
err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
|
||||
err=limeWriterCloseRecord(LimeW); assert(err>=0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Special ILDG operations ; gauge configs only.
|
||||
// Don't require scidac records EXCEPT checksum
|
||||
// Use Grid MetaData object if present.
|
||||
////////////////////////////////////////////////////////////////
|
||||
template <class vsimd>
|
||||
void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
|
||||
{
|
||||
GridBase * grid = Umu._grid;
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
////////////////////////////////////////
|
||||
// fill the Grid header
|
||||
////////////////////////////////////////
|
||||
FieldMetaData header;
|
||||
scidacRecord _scidacRecord;
|
||||
scidacFile _scidacFile;
|
||||
|
||||
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
|
||||
|
||||
std::string format = header.floating_point;
|
||||
header.ensemble_id = description;
|
||||
header.ensemble_label = description;
|
||||
header.sequence_number = sequence;
|
||||
header.ildg_lfn = LFN;
|
||||
|
||||
assert ( (format == std::string("IEEE32BIG"))
|
||||
||(format == std::string("IEEE64BIG")) );
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Fill ILDG header data struct
|
||||
//////////////////////////////////////////////////////
|
||||
ildgFormat ildgfmt ;
|
||||
ildgfmt.field = std::string("su3gauge");
|
||||
|
||||
if ( format == std::string("IEEE32BIG") ) {
|
||||
ildgfmt.precision = 32;
|
||||
} else {
|
||||
ildgfmt.precision = 64;
|
||||
}
|
||||
ildgfmt.version = 1.0;
|
||||
ildgfmt.lx = header.dimension[0];
|
||||
ildgfmt.ly = header.dimension[1];
|
||||
ildgfmt.lz = header.dimension[2];
|
||||
ildgfmt.lt = header.dimension[3];
|
||||
assert(header.nd==4);
|
||||
assert(header.nd==header.dimension.size());
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Fill the USQCD info field
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
usqcdInfo info;
|
||||
info.version=1.0;
|
||||
info.plaq = header.plaquette;
|
||||
info.linktr = header.link_trace;
|
||||
|
||||
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
|
||||
//////////////////////////////////////////////
|
||||
// Fill the Lime file record by record
|
||||
//////////////////////////////////////////////
|
||||
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
|
||||
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
|
||||
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
|
||||
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
|
||||
writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
|
||||
writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec
|
||||
writeLimeIldgLFN(header.ildg_lfn); // rec
|
||||
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
|
||||
// limeDestroyWriter(LimeW);
|
||||
}
|
||||
};
|
||||
|
||||
class IldgReader : public GridLimeReader {
|
||||
public:
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Read either Grid/SciDAC/ILDG configuration
|
||||
// Don't require scidac records EXCEPT checksum
|
||||
// Use Grid MetaData object if present.
|
||||
// Else use ILDG MetaData object if present.
|
||||
// Else use SciDAC MetaData object if present.
|
||||
////////////////////////////////////////////////////////////////
|
||||
template <class vsimd>
|
||||
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
|
||||
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
typedef typename GaugeField::vector_object vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
typedef LorentzColourMatrixF fobj;
|
||||
typedef LorentzColourMatrixD dobj;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::vector<int> dims = Umu._grid->FullDimensions();
|
||||
|
||||
assert(dims.size()==4);
|
||||
|
||||
// Metadata holders
|
||||
ildgFormat ildgFormat_ ;
|
||||
std::string ildgLFN_ ;
|
||||
scidacChecksum scidacChecksum_;
|
||||
usqcdInfo usqcdInfo_ ;
|
||||
|
||||
// track what we read from file
|
||||
int found_ildgFormat =0;
|
||||
int found_ildgLFN =0;
|
||||
int found_scidacChecksum=0;
|
||||
int found_usqcdInfo =0;
|
||||
int found_ildgBinary =0;
|
||||
int found_FieldMetaData =0;
|
||||
|
||||
uint32_t nersc_csum;
|
||||
uint32_t scidac_csuma;
|
||||
uint32_t scidac_csumb;
|
||||
|
||||
// Binary format
|
||||
std::string format;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Loop over all records
|
||||
// -- Order is poorly guaranteed except ILDG header preceeds binary section.
|
||||
// -- Run like an event loop.
|
||||
// -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
|
||||
// that Scidac.
|
||||
// -- Insist on Scidac checksum record.
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
|
||||
|
||||
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// If not BINARY_DATA read a string and parse
|
||||
//////////////////////////////////////////////////////////////////
|
||||
if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) {
|
||||
|
||||
// Copy out the string
|
||||
std::vector<char> xmlc(nbytes+1,'\0');
|
||||
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
|
||||
// std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// ILDG format record
|
||||
|
||||
std::string xmlstring(&xmlc[0]);
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"ildgFormat",ildgFormat_);
|
||||
|
||||
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
|
||||
if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
|
||||
|
||||
assert( ildgFormat_.lx == dims[0]);
|
||||
assert( ildgFormat_.ly == dims[1]);
|
||||
assert( ildgFormat_.lz == dims[2]);
|
||||
assert( ildgFormat_.lt == dims[3]);
|
||||
|
||||
found_ildgFormat = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
|
||||
FieldMetaData_.ildg_lfn = xmlstring;
|
||||
found_ildgLFN = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
|
||||
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"FieldMetaData",FieldMetaData_);
|
||||
|
||||
format = FieldMetaData_.floating_point;
|
||||
|
||||
assert(FieldMetaData_.dimension[0] == dims[0]);
|
||||
assert(FieldMetaData_.dimension[1] == dims[1]);
|
||||
assert(FieldMetaData_.dimension[2] == dims[2]);
|
||||
assert(FieldMetaData_.dimension[3] == dims[3]);
|
||||
|
||||
found_FieldMetaData = 1;
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
|
||||
// is it a USQCD info field
|
||||
if ( xmlstring.find(std::string("usqcdInfo")) != std::string::npos ) {
|
||||
// std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"usqcdInfo",usqcdInfo_);
|
||||
found_usqcdInfo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
|
||||
XmlReader RD(xmlstring, true, "");
|
||||
read(RD,"scidacChecksum",scidacChecksum_);
|
||||
found_scidacChecksum = 1;
|
||||
}
|
||||
|
||||
} else {
|
||||
/////////////////////////////////
|
||||
// Binary data
|
||||
/////////////////////////////////
|
||||
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
|
||||
uint64_t offset= ftello(File);
|
||||
if ( format == std::string("IEEE64BIG") ) {
|
||||
GaugeSimpleMunger<dobj, sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
} else {
|
||||
GaugeSimpleMunger<fobj, sobj> munge;
|
||||
BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
|
||||
found_ildgBinary = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Minimally must find binary segment and checksum
|
||||
// Since this is an ILDG reader require ILDG format
|
||||
//////////////////////////////////////////////////////
|
||||
assert(found_ildgBinary);
|
||||
assert(found_ildgFormat);
|
||||
assert(found_scidacChecksum);
|
||||
|
||||
// Must find something with the lattice dimensions
|
||||
assert(found_FieldMetaData||found_ildgFormat);
|
||||
|
||||
if ( found_FieldMetaData ) {
|
||||
|
||||
std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
|
||||
|
||||
} else {
|
||||
|
||||
assert(found_ildgFormat);
|
||||
assert ( ildgFormat_.field == std::string("su3gauge") );
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Populate our Grid metadata as best we can
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::ostringstream vers; vers << ildgFormat_.version;
|
||||
FieldMetaData_.hdr_version = vers.str();
|
||||
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
|
||||
|
||||
FieldMetaData_.nd=4;
|
||||
FieldMetaData_.dimension.resize(4);
|
||||
|
||||
FieldMetaData_.dimension[0] = ildgFormat_.lx ;
|
||||
FieldMetaData_.dimension[1] = ildgFormat_.ly ;
|
||||
FieldMetaData_.dimension[2] = ildgFormat_.lz ;
|
||||
FieldMetaData_.dimension[3] = ildgFormat_.lt ;
|
||||
|
||||
if ( found_usqcdInfo ) {
|
||||
FieldMetaData_.plaquette = usqcdInfo_.plaq;
|
||||
FieldMetaData_.link_trace= usqcdInfo_.linktr;
|
||||
std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
|
||||
std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
|
||||
std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
|
||||
} else {
|
||||
FieldMetaData_.plaquette = 0.0;
|
||||
FieldMetaData_.link_trace= 0.0;
|
||||
std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Really really want to mandate a scidac checksum
|
||||
////////////////////////////////////////////////////////////
|
||||
if ( found_scidacChecksum ) {
|
||||
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
|
||||
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
|
||||
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
|
||||
assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
|
||||
assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
|
||||
std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
|
||||
assert(0); // Can I insist always checksum ?
|
||||
}
|
||||
|
||||
if ( found_FieldMetaData || found_usqcdInfo ) {
|
||||
FieldMetaData checker;
|
||||
GaugeStatistics(Umu,checker);
|
||||
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
|
||||
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
|
||||
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
//HAVE_LIME
|
||||
#endif
|
||||
|
||||
#endif
|
237
Grid/parallelIO/IldgIOtypes.h
Normal file
237
Grid/parallelIO/IldgIOtypes.h
Normal file
@ -0,0 +1,237 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/IldgIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ILDGTYPES_IO_H
|
||||
#define GRID_ILDGTYPES_IO_H
|
||||
|
||||
#ifdef HAVE_LIME
|
||||
extern "C" { // for linkage
|
||||
#include "lime.h"
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Data representation of records that enter ILDG and SciDac formats
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define GRID_FORMAT "grid-format"
|
||||
#define ILDG_FORMAT "ildg-format"
|
||||
#define ILDG_BINARY_DATA "ildg-binary-data"
|
||||
#define ILDG_DATA_LFN "ildg-data-lfn"
|
||||
#define SCIDAC_CHECKSUM "scidac-checksum"
|
||||
#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml"
|
||||
#define SCIDAC_FILE_XML "scidac-file-xml"
|
||||
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
|
||||
#define SCIDAC_RECORD_XML "scidac-record-xml"
|
||||
#define SCIDAC_BINARY_DATA "scidac-binary-data"
|
||||
// Unused SCIDAC records names; could move to support this functionality
|
||||
#define SCIDAC_SITELIST "scidac-sitelist"
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
|
||||
const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat
|
||||
const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat
|
||||
const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// QIO uses mandatory "private" records fixed format
|
||||
// Private is in principle "opaque" however it can't be changed now because that would break existing
|
||||
// file compatability, so should be correct to assume the undocumented but defacto file structure.
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct emptyUserRecord : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(emptyUserRecord,int,dummy);
|
||||
emptyUserRecord() { dummy=0; };
|
||||
};
|
||||
|
||||
////////////////////////
|
||||
// Scidac private file xml
|
||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
|
||||
////////////////////////
|
||||
struct scidacFile : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
|
||||
double, version,
|
||||
int, spacetime,
|
||||
std::string, dims, // must convert to int
|
||||
int, volfmt);
|
||||
|
||||
std::vector<int> getDimensions(void) {
|
||||
std::stringstream stream(dims);
|
||||
std::vector<int> dimensions;
|
||||
int n;
|
||||
while(stream >> n){
|
||||
dimensions.push_back(n);
|
||||
}
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
void setDimensions(std::vector<int> dimensions) {
|
||||
char delimiter = ' ';
|
||||
std::stringstream stream;
|
||||
for(int i=0;i<dimensions.size();i++){
|
||||
stream << dimensions[i];
|
||||
if ( i != dimensions.size()-1) {
|
||||
stream << delimiter <<std::endl;
|
||||
}
|
||||
}
|
||||
dims = stream.str();
|
||||
}
|
||||
|
||||
// Constructor provides Grid
|
||||
scidacFile() =default; // default constructor
|
||||
scidacFile(GridBase * grid){
|
||||
version = 1.0;
|
||||
spacetime = grid->_ndimension;
|
||||
setDimensions(grid->FullDimensions());
|
||||
volfmt = GRID_IO_SINGLEFILE;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// scidac-private-record-xml : example
|
||||
// <scidacRecord>
|
||||
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
|
||||
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
|
||||
// <typesize>144</typesize><datacount>4</datacount>
|
||||
// </scidacRecord>
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct scidacRecord : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
|
||||
double, version,
|
||||
std::string, date,
|
||||
int, recordtype,
|
||||
std::string, datatype,
|
||||
std::string, precision,
|
||||
int, colors,
|
||||
int, spins,
|
||||
int, typesize,
|
||||
int, datacount);
|
||||
|
||||
scidacRecord()
|
||||
: version(1.0), recordtype(0), colors(0), spins(0), typesize(0), datacount(0)
|
||||
{}
|
||||
};
|
||||
|
||||
////////////////////////
|
||||
// ILDG format
|
||||
////////////////////////
|
||||
struct ildgFormat : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
|
||||
double, version,
|
||||
std::string, field,
|
||||
int, precision,
|
||||
int, lx,
|
||||
int, ly,
|
||||
int, lz,
|
||||
int, lt);
|
||||
ildgFormat() { version=1.0; };
|
||||
};
|
||||
////////////////////////
|
||||
// USQCD info
|
||||
////////////////////////
|
||||
struct usqcdInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
|
||||
double, version,
|
||||
double, plaq,
|
||||
double, linktr,
|
||||
std::string, info);
|
||||
usqcdInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
////////////////////////
|
||||
// Scidac Checksum
|
||||
////////////////////////
|
||||
struct scidacChecksum : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
|
||||
double, version,
|
||||
std::string, suma,
|
||||
std::string, sumb);
|
||||
scidacChecksum() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Type: scidac-file-xml <title>MILC ILDG archival gauge configuration</title>
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Type:
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////
|
||||
// Scidac private file xml
|
||||
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
|
||||
////////////////////////
|
||||
|
||||
#if 0
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
struct usqcdPropFile : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
|
||||
double, version,
|
||||
std::string, type,
|
||||
std::string, info);
|
||||
usqcdPropFile() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
struct usqcdSourceInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
|
||||
double, version,
|
||||
std::string, info);
|
||||
usqcdSourceInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
struct usqcdPropInfo : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
|
||||
double, version,
|
||||
int, spin,
|
||||
int, color,
|
||||
std::string, info);
|
||||
usqcdPropInfo() {
|
||||
version=1.0;
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
327
Grid/parallelIO/MetaData.h
Normal file
327
Grid/parallelIO/MetaData.h
Normal file
@ -0,0 +1,327 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <unistd.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pwd.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Precision mapping
|
||||
///////////////////////////////////////////////////////
|
||||
template<class vobj> static std::string getFormatString (void)
|
||||
{
|
||||
std::string format;
|
||||
typedef typename getPrecision<vobj>::real_scalar_type stype;
|
||||
if ( sizeof(stype) == sizeof(float) ) {
|
||||
format = std::string("IEEE32BIG");
|
||||
}
|
||||
if ( sizeof(stype) == sizeof(double) ) {
|
||||
format = std::string("IEEE64BIG");
|
||||
}
|
||||
return format;
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// header specification/interpretation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class FieldMetaData : Serializable {
|
||||
public:
|
||||
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
|
||||
int, nd,
|
||||
std::vector<int>, dimension,
|
||||
std::vector<std::string>, boundary,
|
||||
int, data_start,
|
||||
std::string, hdr_version,
|
||||
std::string, storage_format,
|
||||
double, link_trace,
|
||||
double, plaquette,
|
||||
uint32_t, checksum,
|
||||
uint32_t, scidac_checksuma,
|
||||
uint32_t, scidac_checksumb,
|
||||
unsigned int, sequence_number,
|
||||
std::string, data_type,
|
||||
std::string, ensemble_id,
|
||||
std::string, ensemble_label,
|
||||
std::string, ildg_lfn,
|
||||
std::string, creator,
|
||||
std::string, creator_hardware,
|
||||
std::string, creation_date,
|
||||
std::string, archive_date,
|
||||
std::string, floating_point);
|
||||
// WARNING: non-initialised values might lead to twisted parallel IO
|
||||
// issues, std::string are fine because they initliase to size 0
|
||||
// as per C++ standard.
|
||||
FieldMetaData(void)
|
||||
: nd(4), dimension(4,0), boundary(4, ""), data_start(0),
|
||||
link_trace(0.), plaquette(0.), checksum(0),
|
||||
scidac_checksuma(0), scidac_checksumb(0), sequence_number(0)
|
||||
{}
|
||||
};
|
||||
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Bit and Physical Checksumming and QA of data
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
|
||||
{
|
||||
int nd = grid->_ndimension;
|
||||
header.nd = nd;
|
||||
header.dimension.resize(nd);
|
||||
header.boundary.resize(nd);
|
||||
header.data_start = 0;
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.dimension[d] = grid->_fdimensions[d];
|
||||
}
|
||||
for(int d=0;d<nd;d++) {
|
||||
header.boundary[d] = std::string("PERIODIC");
|
||||
}
|
||||
}
|
||||
|
||||
inline void MachineCharacteristics(FieldMetaData &header)
|
||||
{
|
||||
// Who
|
||||
struct passwd *pw = getpwuid (getuid());
|
||||
if (pw) header.creator = std::string(pw->pw_name);
|
||||
|
||||
// When
|
||||
std::time_t t = std::time(nullptr);
|
||||
std::tm tm_ = *std::localtime(&t);
|
||||
std::ostringstream oss;
|
||||
// oss << std::put_time(&tm_, "%c %Z");
|
||||
header.creation_date = oss.str();
|
||||
header.archive_date = header.creation_date;
|
||||
|
||||
// What
|
||||
struct utsname name; uname(&name);
|
||||
header.creator_hardware = std::string(name.nodename)+"-";
|
||||
header.creator_hardware+= std::string(name.machine)+"-";
|
||||
header.creator_hardware+= std::string(name.sysname)+"-";
|
||||
header.creator_hardware+= std::string(name.release);
|
||||
}
|
||||
|
||||
#define dump_meta_data(field, s) \
|
||||
s << "BEGIN_HEADER" << std::endl; \
|
||||
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
|
||||
s << "DATATYPE = " << field.data_type << std::endl; \
|
||||
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
|
||||
} \
|
||||
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
|
||||
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
|
||||
for(int i=0;i<4;i++){ \
|
||||
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
|
||||
} \
|
||||
\
|
||||
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
|
||||
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
|
||||
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
|
||||
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
|
||||
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
|
||||
s << "CREATOR = " << field.creator << std::endl; \
|
||||
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
|
||||
s << "CREATION_DATE = " << field.creation_date << std::endl; \
|
||||
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
|
||||
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
|
||||
s << "END_HEADER" << std::endl;
|
||||
|
||||
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vobj>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
|
||||
{
|
||||
// How to convert data precision etc...
|
||||
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
|
||||
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
|
||||
}
|
||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
|
||||
{
|
||||
// How to convert data precision etc...
|
||||
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
|
||||
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
|
||||
}
|
||||
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
|
||||
{
|
||||
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vLorentzColourMatrixF>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
GaugeStatistics(field,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
|
||||
{
|
||||
GridBase *grid = field._grid;
|
||||
std::string format = getFormatString<vLorentzColourMatrixD>();
|
||||
header.floating_point = format;
|
||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||
GridMetaData(grid,header);
|
||||
GaugeStatistics(field,header);
|
||||
MachineCharacteristics(header);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Utilities ; these are QCD aware
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
inline void reconstruct3(LorentzColourMatrix & cm)
|
||||
{
|
||||
const int x=0;
|
||||
const int y=1;
|
||||
const int z=2;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
|
||||
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
|
||||
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Some data types for intermediate storage
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
|
||||
|
||||
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
|
||||
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
|
||||
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Simple classes for precision conversion
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleUnmunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *out_buffer = (fobj_stype *)&out;
|
||||
sobj_stype *in_buffer = (sobj_stype *)∈
|
||||
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct BinarySimpleMunger {
|
||||
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
|
||||
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
|
||||
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
// take word by word and transform accoding to the status
|
||||
fobj_stype *in_buffer = (fobj_stype *)∈
|
||||
sobj_stype *out_buffer = (sobj_stype *)&out;
|
||||
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
|
||||
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
|
||||
assert(fobj_words == sobj_words);
|
||||
|
||||
for (unsigned int word = 0; word < sobj_words; word++)
|
||||
out_buffer[word] = in_buffer[word]; // type conversion on the fly
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct GaugeSimpleMunger{
|
||||
void operator()(fobj &in, sobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class fobj, class sobj>
|
||||
struct GaugeSimpleUnmunger {
|
||||
|
||||
void operator()(sobj &in, fobj &out) {
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
for (int i = 0; i < Nc; i++) {
|
||||
for (int j = 0; j < Nc; j++) {
|
||||
out(mu)()(i, j) = in(mu)()(i, j);
|
||||
}}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2munger{
|
||||
void operator() (fobj &in,sobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)()(i,j) = in(mu)(i)(j);
|
||||
}}
|
||||
}
|
||||
reconstruct3(out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class fobj,class sobj>
|
||||
struct Gauge3x2unmunger{
|
||||
void operator() (sobj &in,fobj &out){
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
for(int i=0;i<2;i++){
|
||||
for(int j=0;j<3;j++){
|
||||
out(mu)(i)(j) = in(mu)()(i,j);
|
||||
}}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
363
Grid/parallelIO/NerscIO.h
Normal file
363
Grid/parallelIO/NerscIO.h
Normal file
@ -0,0 +1,363 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/parallelIO/NerscIO.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Matt Spraggs <matthew.spraggs@gmail.com>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NERSC_IO_H
|
||||
#define GRID_NERSC_IO_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Write and read from fstream; comput header offset for payload
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class NerscIO : public BinaryIO {
|
||||
public:
|
||||
|
||||
static inline void truncate(std::string file){
|
||||
std::ofstream fout(file,std::ios::out);
|
||||
}
|
||||
|
||||
static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
|
||||
{
|
||||
std::ofstream fout(file,std::ios::out|std::ios::in);
|
||||
fout.seekp(0,std::ios::beg);
|
||||
dump_meta_data(field, fout);
|
||||
field.data_start = fout.tellp();
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
// for the header-reader
|
||||
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field)
|
||||
{
|
||||
uint64_t offset=0;
|
||||
std::map<std::string,std::string> header;
|
||||
std::string line;
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// read the header
|
||||
//////////////////////////////////////////////////
|
||||
std::ifstream fin(file);
|
||||
|
||||
getline(fin,line); // read one line and insist is
|
||||
|
||||
removeWhitespace(line);
|
||||
std::cout << GridLogMessage << "* " << line << std::endl;
|
||||
|
||||
assert(line==std::string("BEGIN_HEADER"));
|
||||
|
||||
do {
|
||||
getline(fin,line); // read one line
|
||||
std::cout << GridLogMessage << "* "<<line<< std::endl;
|
||||
int eq = line.find("=");
|
||||
if(eq >0) {
|
||||
std::string key=line.substr(0,eq);
|
||||
std::string val=line.substr(eq+1);
|
||||
removeWhitespace(key);
|
||||
removeWhitespace(val);
|
||||
|
||||
header[key] = val;
|
||||
}
|
||||
} while( line.find("END_HEADER") == std::string::npos );
|
||||
|
||||
field.data_start = fin.tellg();
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// chomp the values
|
||||
//////////////////////////////////////////////////
|
||||
field.hdr_version = header["HDR_VERSION"];
|
||||
field.data_type = header["DATATYPE"];
|
||||
field.storage_format = header["STORAGE_FORMAT"];
|
||||
|
||||
field.dimension[0] = std::stol(header["DIMENSION_1"]);
|
||||
field.dimension[1] = std::stol(header["DIMENSION_2"]);
|
||||
field.dimension[2] = std::stol(header["DIMENSION_3"]);
|
||||
field.dimension[3] = std::stol(header["DIMENSION_4"]);
|
||||
|
||||
assert(grid->_ndimension == 4);
|
||||
for(int d=0;d<4;d++){
|
||||
assert(grid->_fdimensions[d]==field.dimension[d]);
|
||||
}
|
||||
|
||||
field.link_trace = std::stod(header["LINK_TRACE"]);
|
||||
field.plaquette = std::stod(header["PLAQUETTE"]);
|
||||
|
||||
field.boundary[0] = header["BOUNDARY_1"];
|
||||
field.boundary[1] = header["BOUNDARY_2"];
|
||||
field.boundary[2] = header["BOUNDARY_3"];
|
||||
field.boundary[3] = header["BOUNDARY_4"];
|
||||
|
||||
field.checksum = std::stoul(header["CHECKSUM"],0,16);
|
||||
field.ensemble_id = header["ENSEMBLE_ID"];
|
||||
field.ensemble_label = header["ENSEMBLE_LABEL"];
|
||||
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
|
||||
field.creator = header["CREATOR"];
|
||||
field.creator_hardware = header["CREATOR_HARDWARE"];
|
||||
field.creation_date = header["CREATION_DATE"];
|
||||
field.archive_date = header["ARCHIVE_DATE"];
|
||||
field.floating_point = header["FLOATING_POINT"];
|
||||
|
||||
return field.data_start;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Now the meat: the object readers
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vsimd>
|
||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
||||
FieldMetaData& header,
|
||||
std::string file)
|
||||
{
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
uint64_t offset = readHeader(file,Umu._grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
|
||||
int ieee32big = (format == std::string("IEEE32BIG"));
|
||||
int ieee32 = (format == std::string("IEEE32"));
|
||||
int ieee64big = (format == std::string("IEEE64BIG"));
|
||||
int ieee64 = (format == std::string("IEEE64"));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
||||
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
|
||||
if ( ieee32 || ieee32big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
if ( ieee64 || ieee64big ) {
|
||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
}
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
GaugeStatistics(Umu,clone);
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
|
||||
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
|
||||
<<" header "<<header.plaquette<<std::endl;
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
|
||||
<<" header "<<header.link_trace<<std::endl;
|
||||
|
||||
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
|
||||
std::cout << " Plaquette mismatch "<<std::endl;
|
||||
std::cout << Umu[0]<<std::endl;
|
||||
std::cout << Umu[1]<<std::endl;
|
||||
}
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << " checksum mismatch " << std::endl;
|
||||
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
|
||||
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
|
||||
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
|
||||
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||
}
|
||||
|
||||
template<class vsimd>
|
||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
||||
std::string file,
|
||||
int two_row,
|
||||
int bits32)
|
||||
{
|
||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
||||
|
||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
FieldMetaData header;
|
||||
///////////////////////////////////////////
|
||||
// Following should become arguments
|
||||
///////////////////////////////////////////
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
typedef LorentzColourMatrixD fobj3D;
|
||||
typedef LorentzColour2x3D fobj2D;
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
GaugeStatistics(Umu,header);
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
// Sod it -- always write 3x3 double
|
||||
header.floating_point = std::string("IEEE64BIG");
|
||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||
nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
|
||||
|
||||
}
|
||||
///////////////////////////////
|
||||
// RNG state
|
||||
///////////////////////////////
|
||||
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
// Following should become arguments
|
||||
FieldMetaData header;
|
||||
header.sequence_number = 1;
|
||||
header.ensemble_id = "UKQCD";
|
||||
header.ensemble_label = "DWF";
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
GridMetaData(grid,header);
|
||||
assert(header.nd==4);
|
||||
header.link_trace=0.0;
|
||||
header.plaquette=0.0;
|
||||
MachineCharacteristics(header);
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("RANLUX48");
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
header.floating_point = std::string("UINT32");
|
||||
header.data_type = std::string("MT19937");
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
header.floating_point = std::string("UINT64");
|
||||
header.data_type = std::string("SITMO");
|
||||
#endif
|
||||
|
||||
if ( grid->IsBoss() ) {
|
||||
truncate(file);
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
header.checksum = nersc_csum;
|
||||
if ( grid->IsBoss() ) {
|
||||
offset = writeHeader(header,file);
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage
|
||||
<<"Written NERSC RNG STATE "<<file<< " checksum "
|
||||
<<std::hex<<header.checksum
|
||||
<<std::dec<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file)
|
||||
{
|
||||
typedef typename GridParallelRNG::RngStateType RngStateType;
|
||||
|
||||
GridBase *grid = parallel._grid;
|
||||
|
||||
uint64_t offset = readHeader(file,grid,header);
|
||||
|
||||
FieldMetaData clone(header);
|
||||
|
||||
std::string format(header.floating_point);
|
||||
std::string data_type(header.data_type);
|
||||
|
||||
#ifdef RNG_RANLUX
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("RANLUX48"));
|
||||
#endif
|
||||
#ifdef RNG_MT19937
|
||||
assert(format == std::string("UINT32"));
|
||||
assert(data_type == std::string("MT19937"));
|
||||
#endif
|
||||
#ifdef RNG_SITMO
|
||||
assert(format == std::string("UINT64"));
|
||||
assert(data_type == std::string("SITMO"));
|
||||
#endif
|
||||
|
||||
// depending on datatype, set up munger;
|
||||
// munger is a function of <floating point, Real, data_type>
|
||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
|
||||
|
||||
if ( nersc_csum != header.checksum ) {
|
||||
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
|
||||
exit(0);
|
||||
}
|
||||
assert(nersc_csum == header.checksum );
|
||||
|
||||
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
#endif
|
75
Grid/perfmon/PerfCount.cc
Normal file
75
Grid/perfmon/PerfCount.cc
Normal file
@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/PerfCount.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||
#define RawConfig(A,B) (A<<8|B)
|
||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||
#ifdef __linux__
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." , INSTRUCTIONS},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." , CACHE_REFERENCES},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES },
|
||||
// 4
|
||||
#ifdef KNL
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||
// 11
|
||||
#else
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS....",INSTRUCTIONS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS.....",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS...",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS..",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||
// 11
|
||||
#endif
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS.......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS.....",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS....",L1D_READ_ACCESS},
|
||||
//15
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS...",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......",INSTRUCTIONS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS....",INSTRUCTIONS}
|
||||
//19
|
||||
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
|
||||
#endif
|
||||
};
|
||||
}
|
245
Grid/perfmon/PerfCount.h
Normal file
245
Grid/perfmon/PerfCount.h
Normal file
@ -0,0 +1,245 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/PerfCount.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PERFCOUNT_H
|
||||
#define GRID_PERFCOUNT_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <syscall.h>
|
||||
#include <linux/perf_event.h>
|
||||
#else
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#ifdef __linux__
|
||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags)
|
||||
{
|
||||
int ret=0;
|
||||
|
||||
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TIMERS_OFF
|
||||
|
||||
|
||||
inline uint64_t cyclecount(void){
|
||||
return 0;
|
||||
}
|
||||
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
|
||||
#define __SSC_STOP __SSC_MARK(0x110)
|
||||
#define __SSC_START __SSC_MARK(0x111)
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define __SSC_MARK(mark)
|
||||
#define __SSC_STOP
|
||||
#define __SSC_START
|
||||
|
||||
/*
|
||||
* cycle counters arch dependent
|
||||
*/
|
||||
|
||||
#ifdef __bgq__
|
||||
inline uint64_t cyclecount(void){
|
||||
uint64_t tmp;
|
||||
asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) );
|
||||
return tmp;
|
||||
}
|
||||
#elif defined __x86_64__
|
||||
inline uint64_t cyclecount(void){
|
||||
return __rdtsc();
|
||||
// unsigned int dummy;
|
||||
// return __rdtscp(&dummy);
|
||||
}
|
||||
#else
|
||||
|
||||
inline uint64_t cyclecount(void){
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
class PerformanceCounter {
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
public:
|
||||
uint32_t type;
|
||||
uint64_t config;
|
||||
const char *name;
|
||||
int normalisation;
|
||||
} PerformanceCounterConfig;
|
||||
|
||||
static const PerformanceCounterConfig PerformanceCounterConfigs [];
|
||||
|
||||
public:
|
||||
|
||||
enum PerformanceCounterType {
|
||||
CACHE_REFERENCES=0,
|
||||
CACHE_MISSES=1,
|
||||
CPUCYCLES=2,
|
||||
INSTRUCTIONS=3,
|
||||
L1D_READ_ACCESS=4,
|
||||
PERFORMANCE_COUNTER_NUM_TYPES=19
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
int PCT;
|
||||
|
||||
long long count;
|
||||
long long cycles;
|
||||
int fd;
|
||||
int cyclefd;
|
||||
unsigned long long elapsed;
|
||||
uint64_t begin;
|
||||
|
||||
static int NumTypes(void){
|
||||
return PERFORMANCE_COUNTER_NUM_TYPES;
|
||||
}
|
||||
|
||||
PerformanceCounter(int _pct) {
|
||||
#ifdef __linux__
|
||||
assert(_pct>=0);
|
||||
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
|
||||
fd=-1;
|
||||
cyclefd=-1;
|
||||
count=0;
|
||||
cycles=0;
|
||||
PCT =_pct;
|
||||
Open();
|
||||
#endif
|
||||
}
|
||||
void Open(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
struct perf_event_attr pe;
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
|
||||
pe.disabled = 1;
|
||||
pe.exclude_kernel = 1;
|
||||
pe.exclude_hv = 1;
|
||||
pe.inherit = 1;
|
||||
|
||||
pe.type = PerformanceCounterConfigs[PCT].type;
|
||||
pe.config= PerformanceCounterConfigs[PCT].config;
|
||||
const char * name = PerformanceCounterConfigs[PCT].name;
|
||||
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
int norm = PerformanceCounterConfigs[PCT].normalisation;
|
||||
pe.type = PerformanceCounterConfigs[norm].type;
|
||||
pe.config= PerformanceCounterConfigs[norm].config;
|
||||
name = PerformanceCounterConfigs[norm].name;
|
||||
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (cyclefd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Start(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
if ( fd!= -1) {
|
||||
::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||
::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
}
|
||||
begin =cyclecount();
|
||||
#else
|
||||
begin = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Stop(void) {
|
||||
count=0;
|
||||
cycles=0;
|
||||
#ifdef __linux__
|
||||
ssize_t ign;
|
||||
if ( fd!= -1) {
|
||||
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
ign=::read(fd, &count, sizeof(long long));
|
||||
ign+=::read(cyclefd, &cycles, sizeof(long long));
|
||||
assert(ign=2*sizeof(long long));
|
||||
}
|
||||
elapsed = cyclecount() - begin;
|
||||
#else
|
||||
elapsed = 0;
|
||||
#endif
|
||||
|
||||
}
|
||||
void Report(void) {
|
||||
#ifdef __linux__
|
||||
int N = PerformanceCounterConfigs[PCT].normalisation;
|
||||
const char * sn = PerformanceCounterConfigs[N].name ;
|
||||
const char * sc = PerformanceCounterConfigs[PCT].name;
|
||||
std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles,
|
||||
sc, count, sc,sn, (double)count/(double)cycles);
|
||||
#else
|
||||
std::printf("%llu cycles \n", elapsed );
|
||||
#endif
|
||||
}
|
||||
|
||||
~PerformanceCounter()
|
||||
{
|
||||
#ifdef __linux__
|
||||
::close(fd); ::close(cyclefd);
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
245
Grid/perfmon/Stat.cc
Normal file
245
Grid/perfmon/Stat.cc
Normal file
@ -0,0 +1,245 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/perfmon/Stat.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
bool PmuStat::pmu_initialized=false;
|
||||
|
||||
|
||||
void PmuStat::init(const char *regname)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
name = regname;
|
||||
if (!pmu_initialized)
|
||||
{
|
||||
std::cout<<"initialising pmu"<<std::endl;
|
||||
pmu_initialized = true;
|
||||
pmu_init();
|
||||
}
|
||||
clear();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::clear(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
count = 0;
|
||||
tregion = 0;
|
||||
pmc0 = 0;
|
||||
pmc1 = 0;
|
||||
inst = 0;
|
||||
cyc = 0;
|
||||
ref = 0;
|
||||
tcycles = 0;
|
||||
reads = 0;
|
||||
writes = 0;
|
||||
#endif
|
||||
}
|
||||
void PmuStat::print(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
std::cout <<"Reg "<<std::string(name)<<":\n";
|
||||
std::cout <<" region "<<tregion<<std::endl;
|
||||
std::cout <<" cycles "<<tcycles<<std::endl;
|
||||
std::cout <<" inst "<<inst <<std::endl;
|
||||
std::cout <<" cyc "<<cyc <<std::endl;
|
||||
std::cout <<" ref "<<ref <<std::endl;
|
||||
std::cout <<" pmc0 "<<pmc0 <<std::endl;
|
||||
std::cout <<" pmc1 "<<pmc1 <<std::endl;
|
||||
std::cout <<" count "<<count <<std::endl;
|
||||
std::cout <<" reads "<<reads <<std::endl;
|
||||
std::cout <<" writes "<<writes <<std::endl;
|
||||
#endif
|
||||
}
|
||||
void PmuStat::start(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
pmu_start();
|
||||
++count;
|
||||
xmemctrs(&mrstart, &mwstart);
|
||||
tstart = __rdtsc();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::enter(int t)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
counters[0][t] = __rdpmc(0);
|
||||
counters[1][t] = __rdpmc(1);
|
||||
counters[2][t] = __rdpmc((1<<30)|0);
|
||||
counters[3][t] = __rdpmc((1<<30)|1);
|
||||
counters[4][t] = __rdpmc((1<<30)|2);
|
||||
counters[5][t] = __rdtsc();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::exit(int t)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
counters[0][t] = __rdpmc(0) - counters[0][t];
|
||||
counters[1][t] = __rdpmc(1) - counters[1][t];
|
||||
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
|
||||
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
|
||||
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
|
||||
counters[5][t] = __rdtsc() - counters[5][t];
|
||||
#endif
|
||||
}
|
||||
void PmuStat::accum(int nthreads)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
tend = __rdtsc();
|
||||
xmemctrs(&mrend, &mwend);
|
||||
pmu_stop();
|
||||
for (int t = 0; t < nthreads; ++t) {
|
||||
pmc0 += counters[0][t];
|
||||
pmc1 += counters[1][t];
|
||||
inst += counters[2][t];
|
||||
cyc += counters[3][t];
|
||||
ref += counters[4][t];
|
||||
tcycles += counters[5][t];
|
||||
}
|
||||
uint64_t region = tend - tstart;
|
||||
tregion += region;
|
||||
uint64_t mreads = mrend - mrstart;
|
||||
reads += mreads;
|
||||
uint64_t mwrites = mwend - mwstart;
|
||||
writes += mwrites;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void PmuStat::pmu_fini(void) {}
|
||||
void PmuStat::pmu_start(void) {};
|
||||
void PmuStat::pmu_stop(void) {};
|
||||
void PmuStat::pmu_init(void)
|
||||
{
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
KNLsetup();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
|
||||
{
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
ctrs c;
|
||||
KNLreadctrs(c);
|
||||
uint64_t emr = 0, emw = 0;
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
emr += c.edcrd[i];
|
||||
emw += c.edcwr[i];
|
||||
}
|
||||
*mr = emr;
|
||||
*mw = emw;
|
||||
#else
|
||||
*mr = *mw = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
|
||||
struct knl_gbl_ PmuStat::gbl;
|
||||
|
||||
#define PMU_MEM
|
||||
|
||||
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
|
||||
{
|
||||
char fname[1024];
|
||||
snprintf(fname, sizeof(fname), "%s/type", ename);
|
||||
FILE *fp = fopen(fname, "r");
|
||||
if (fp == 0) {
|
||||
::printf("open %s", fname);
|
||||
::exit(0);
|
||||
}
|
||||
int type;
|
||||
int ret = fscanf(fp, "%d", &type);
|
||||
assert(ret == 1);
|
||||
fclose(fp);
|
||||
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
|
||||
|
||||
struct perf_event_attr hw = {};
|
||||
hw.size = sizeof(hw);
|
||||
hw.type = type;
|
||||
// see /sys/devices/uncore_*/format/*
|
||||
// All of the events we are interested in are configured the same way, but
|
||||
// that isn't always true. Proper code would parse the format files
|
||||
hw.config = event | (umask << 8);
|
||||
//hw.read_format = PERF_FORMAT_GROUP;
|
||||
// unfortunately the above only works within a single PMU; might
|
||||
// as well just read them one at a time
|
||||
int cpu = 0;
|
||||
fd = perf_event_open(&hw, -1, cpu, -1, 0);
|
||||
if (fd == -1) {
|
||||
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
|
||||
::exit(0);
|
||||
} else {
|
||||
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PmuStat::KNLsetup(void){
|
||||
|
||||
int ret;
|
||||
char fname[1024];
|
||||
|
||||
// MC RPQ inserts and WPQ inserts (reads & writes)
|
||||
for (int mc = 0; mc < NMC; ++mc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
|
||||
// RPQ Inserts
|
||||
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
|
||||
// WPQ Inserts
|
||||
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
|
||||
}
|
||||
// EDC RPQ inserts and WPQ inserts
|
||||
for (int edc=0; edc < NEDC; ++edc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
|
||||
// RPQ inserts
|
||||
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
|
||||
// WPQ inserts
|
||||
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
|
||||
}
|
||||
// EDC HitE, HitM, MissE, MissM
|
||||
for (int edc=0; edc < NEDC; ++edc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
|
||||
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
|
||||
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
|
||||
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
|
||||
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t PmuStat::KNLreadctr(int fd)
|
||||
{
|
||||
uint64_t data;
|
||||
size_t s = ::read(fd, &data, sizeof(data));
|
||||
if (s != sizeof(uint64_t)){
|
||||
::printf("read counter %lu", s);
|
||||
::exit(0);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
void PmuStat::KNLreadctrs(ctrs &c)
|
||||
{
|
||||
for (int i = 0; i < NMC; ++i)
|
||||
{
|
||||
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
|
||||
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
|
||||
}
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
|
||||
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
|
||||
}
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
|
||||
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
|
||||
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
|
||||
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
104
Grid/perfmon/Stat.h
Normal file
104
Grid/perfmon/Stat.h
Normal file
@ -0,0 +1,104 @@
|
||||
#ifndef _GRID_STAT_H
|
||||
#define _GRID_STAT_H
|
||||
|
||||
#ifdef AVX512
|
||||
#define _KNIGHTS_LANDING_ROOTONLY
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Extra KNL counters from MCDRAM
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
#define NMC 6
|
||||
#define NEDC 8
|
||||
struct ctrs
|
||||
{
|
||||
uint64_t mcrd[NMC];
|
||||
uint64_t mcwr[NMC];
|
||||
uint64_t edcrd[NEDC];
|
||||
uint64_t edcwr[NEDC];
|
||||
uint64_t edchite[NEDC];
|
||||
uint64_t edchitm[NEDC];
|
||||
uint64_t edcmisse[NEDC];
|
||||
uint64_t edcmissm[NEDC];
|
||||
};
|
||||
// Peter/Azusa:
|
||||
// Our modification of a code provided by Larry Meadows from Intel
|
||||
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||
// so is already public and in the linux kernel for KNL.
|
||||
struct knl_gbl_
|
||||
{
|
||||
int mc_rd[NMC];
|
||||
int mc_wr[NMC];
|
||||
int edc_rd[NEDC];
|
||||
int edc_wr[NEDC];
|
||||
int edc_hite[NEDC];
|
||||
int edc_hitm[NEDC];
|
||||
int edc_misse[NEDC];
|
||||
int edc_missm[NEDC];
|
||||
};
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class PmuStat
|
||||
{
|
||||
uint64_t counters[8][256];
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static struct knl_gbl_ gbl;
|
||||
#endif
|
||||
const char *name;
|
||||
|
||||
uint64_t reads; // memory reads
|
||||
uint64_t writes; // memory writes
|
||||
uint64_t mrstart; // memory read counter at start of parallel region
|
||||
uint64_t mrend; // memory read counter at end of parallel region
|
||||
uint64_t mwstart; // memory write counter at start of parallel region
|
||||
uint64_t mwend; // memory write counter at end of parallel region
|
||||
|
||||
// cumulative counters
|
||||
uint64_t count; // number of invocations
|
||||
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||
uint64_t tcycles; // total cycles inside parallel region
|
||||
uint64_t inst, ref, cyc; // fixed counters
|
||||
uint64_t pmc0, pmc1;// pmu
|
||||
// add memory counters here
|
||||
// temp variables
|
||||
uint64_t tstart; // tsc at start of parallel region
|
||||
uint64_t tend; // tsc at end of parallel region
|
||||
// map for ctrs values
|
||||
// 0 pmc0 start
|
||||
// 1 pmc0 end
|
||||
// 2 pmc1 start
|
||||
// 3 pmc1 end
|
||||
// 4 tsc start
|
||||
// 5 tsc end
|
||||
static bool pmu_initialized;
|
||||
public:
|
||||
static bool is_init(void){ return pmu_initialized;}
|
||||
static void pmu_init(void);
|
||||
static void pmu_fini(void);
|
||||
static void pmu_start(void);
|
||||
static void pmu_stop(void);
|
||||
void accum(int nthreads);
|
||||
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||
void start(void);
|
||||
void enter(int t);
|
||||
void exit(int t);
|
||||
void print(void);
|
||||
void init(const char *regname);
|
||||
void clear(void);
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static void KNLsetup(void);
|
||||
static uint64_t KNLreadctr(int fd);
|
||||
static void KNLreadctrs(ctrs &c);
|
||||
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
111
Grid/perfmon/Timer.h
Normal file
111
Grid/perfmon/Timer.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Timer.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_TIME_H
|
||||
#define GRID_TIME_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
// Dress the output; use std::chrono
|
||||
|
||||
// C++11 time facilities better?
|
||||
inline double usecond(void) {
|
||||
struct timeval tv;
|
||||
#ifdef TIMERS_ON
|
||||
gettimeofday(&tv,NULL);
|
||||
#endif
|
||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||
}
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
typedef std::chrono::milliseconds GridMillisecs;
|
||||
typedef std::chrono::microseconds GridTime;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||
{
|
||||
stream << time.count()<<" ms";
|
||||
return stream;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::microseconds & time)
|
||||
{
|
||||
stream << time.count()<<" usec";
|
||||
return stream;
|
||||
}
|
||||
|
||||
class GridStopWatch {
|
||||
private:
|
||||
bool running;
|
||||
GridTimePoint start;
|
||||
GridUsecs accumulator;
|
||||
public:
|
||||
GridStopWatch () {
|
||||
Reset();
|
||||
}
|
||||
void Start(void) {
|
||||
assert(running == false);
|
||||
#ifdef TIMERS_ON
|
||||
start = GridClock::now();
|
||||
#endif
|
||||
running = true;
|
||||
}
|
||||
void Stop(void) {
|
||||
assert(running == true);
|
||||
#ifdef TIMERS_ON
|
||||
accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start);
|
||||
#endif
|
||||
running = false;
|
||||
};
|
||||
void Reset(void){
|
||||
running = false;
|
||||
#ifdef TIMERS_ON
|
||||
start = GridClock::now();
|
||||
#endif
|
||||
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
||||
}
|
||||
GridTime Elapsed(void) {
|
||||
assert(running == false);
|
||||
return std::chrono::duration_cast<GridTime>( accumulator );
|
||||
}
|
||||
uint64_t useconds(void){
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
bool isRunning(void){
|
||||
return running;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
74
Grid/pugixml/pugiconfig.hpp
Normal file
74
Grid/pugixml/pugiconfig.hpp
Normal file
@ -0,0 +1,74 @@
|
||||
/**
|
||||
* pugixml parser - version 1.9
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
* of this file.
|
||||
*
|
||||
* This work is based on the pugxml parser, which is:
|
||||
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
|
||||
*/
|
||||
|
||||
#ifndef HEADER_PUGICONFIG_HPP
|
||||
#define HEADER_PUGICONFIG_HPP
|
||||
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to enable compact mode
|
||||
// #define PUGIXML_COMPACT
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
// Uncomment this to disable STL
|
||||
// #define PUGIXML_NO_STL
|
||||
|
||||
// Uncomment this to disable exceptions
|
||||
// #define PUGIXML_NO_EXCEPTIONS
|
||||
|
||||
// Set this to control attributes for public classes/functions, i.e.:
|
||||
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
|
||||
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
|
||||
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
|
||||
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
|
||||
|
||||
// Tune these constants to adjust memory-related behavior
|
||||
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
|
||||
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
|
||||
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
|
||||
|
||||
// Uncomment this to switch to header-only version
|
||||
// #define PUGIXML_HEADER_ONLY
|
||||
|
||||
// Uncomment this to enable long long support
|
||||
// #define PUGIXML_HAS_LONG_LONG
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
12796
Grid/pugixml/pugixml.cc
Normal file
12796
Grid/pugixml/pugixml.cc
Normal file
File diff suppressed because it is too large
Load Diff
1461
Grid/pugixml/pugixml.h
Normal file
1461
Grid/pugixml/pugixml.h
Normal file
File diff suppressed because it is too large
Load Diff
52
Grid/pugixml/readme.txt
Normal file
52
Grid/pugixml/readme.txt
Normal file
@ -0,0 +1,52 @@
|
||||
pugixml 1.9 - an XML processing library
|
||||
|
||||
Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
Report bugs and download new versions at http://pugixml.org/
|
||||
|
||||
This is the distribution of pugixml, which is a C++ XML processing library,
|
||||
which consists of a DOM-like interface with rich traversal/modification
|
||||
capabilities, an extremely fast XML parser which constructs the DOM tree from
|
||||
an XML file/buffer, and an XPath 1.0 implementation for complex data-driven
|
||||
tree queries. Full Unicode support is also available, with Unicode interface
|
||||
variants and conversions between different Unicode encodings (which happen
|
||||
automatically during parsing/saving).
|
||||
|
||||
The distribution contains the following folders:
|
||||
|
||||
contrib/ - various contributions to pugixml
|
||||
|
||||
docs/ - documentation
|
||||
docs/samples - pugixml usage examples
|
||||
docs/quickstart.html - quick start guide
|
||||
docs/manual.html - complete manual
|
||||
|
||||
scripts/ - project files for IDE/build systems
|
||||
|
||||
src/ - header and source files
|
||||
|
||||
readme.txt - this file.
|
||||
|
||||
This library is distributed under the MIT License:
|
||||
|
||||
Copyright (c) 2006-2018 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation
|
||||
files (the "Software"), to deal in the Software without
|
||||
restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
124
Grid/qcd/LatticeTheories.h
Normal file
124
Grid/qcd/LatticeTheories.h
Normal file
@ -0,0 +1,124 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/QCD.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LT_H
|
||||
#define GRID_LT_H
|
||||
namespace Grid{
|
||||
|
||||
// First steps in the complete generalization of the Physics part
|
||||
// Design not final
|
||||
namespace LatticeTheories {
|
||||
|
||||
template <int Dimensions>
|
||||
struct LatticeTheory {
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nds = Dimensions * 2; // double stored field
|
||||
template <typename vtype>
|
||||
using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
};
|
||||
|
||||
template <int Dimensions, int Colours>
|
||||
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
|
||||
static const int Nds = Dimensions * 2;
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nc = Colours;
|
||||
|
||||
template <typename vtype>
|
||||
using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
|
||||
template <typename vtype>
|
||||
using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
|
||||
template <typename vtype>
|
||||
using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
|
||||
template <typename vtype>
|
||||
using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
};
|
||||
|
||||
template <int Dimensions, int Colours, int Spin>
|
||||
struct FermionicLatticeGaugeTheory
|
||||
: public LatticeGaugeTheory<Dimensions, Colours> {
|
||||
static const int Nd = Dimensions;
|
||||
static const int Nds = Dimensions * 2;
|
||||
static const int Nc = Colours;
|
||||
static const int Ns = Spin;
|
||||
|
||||
template <typename vtype>
|
||||
using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template <typename vtype>
|
||||
using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
// These 2 only if Spin is a multiple of 2
|
||||
static const int Nhs = Spin / 2;
|
||||
template <typename vtype>
|
||||
using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template <typename vtype>
|
||||
using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
//tests
|
||||
typedef iColourMatrix<Complex> ColourMatrix;
|
||||
typedef iColourMatrix<ComplexF> ColourMatrixF;
|
||||
typedef iColourMatrix<ComplexD> ColourMatrixD;
|
||||
|
||||
|
||||
};
|
||||
|
||||
// Examples, not complete now.
|
||||
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
|
||||
static const int Xp = 0;
|
||||
static const int Yp = 1;
|
||||
static const int Zp = 2;
|
||||
static const int Tp = 3;
|
||||
static const int Xm = 4;
|
||||
static const int Ym = 5;
|
||||
static const int Zm = 6;
|
||||
static const int Tm = 7;
|
||||
|
||||
typedef FermionicLatticeGaugeTheory FLGT;
|
||||
|
||||
typedef FLGT::iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef FLGT::iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
typedef FLGT::iSpinMatrix<ComplexD > SpinMatrixD;
|
||||
|
||||
};
|
||||
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
|
||||
};
|
||||
|
||||
template <int Dimensions>
|
||||
struct Scalar : public LatticeTheory<Dimensions> {};
|
||||
|
||||
}; // LatticeTheories
|
||||
|
||||
} // Grid
|
||||
|
||||
#endif
|
511
Grid/qcd/QCD.h
Normal file
511
Grid/qcd/QCD.h
Normal file
@ -0,0 +1,511 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/QCD.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_BASE_H
|
||||
#define GRID_QCD_BASE_H
|
||||
namespace Grid{
|
||||
namespace QCD {
|
||||
|
||||
static const int Xdir = 0;
|
||||
static const int Ydir = 1;
|
||||
static const int Zdir = 2;
|
||||
static const int Tdir = 3;
|
||||
|
||||
|
||||
static const int Xp = 0;
|
||||
static const int Yp = 1;
|
||||
static const int Zp = 2;
|
||||
static const int Tp = 3;
|
||||
static const int Xm = 4;
|
||||
static const int Ym = 5;
|
||||
static const int Zm = 6;
|
||||
static const int Tm = 7;
|
||||
|
||||
static const int Nc=3;
|
||||
static const int Ns=4;
|
||||
static const int Nd=4;
|
||||
static const int Nhs=2; // half spinor
|
||||
static const int Nds=8; // double stored gauge field
|
||||
static const int Ngp=2; // gparity index range
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// QCD iMatrix types
|
||||
// Index conventions: Lorentz x Spin x Colour
|
||||
// note: static const int or constexpr will work for type deductions
|
||||
// with the intel compiler (up to version 17)
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
#define ColourIndex 2
|
||||
#define SpinIndex 1
|
||||
#define LorentzIndex 0
|
||||
|
||||
// Also should make these a named enum type
|
||||
static const int DaggerNo=0;
|
||||
static const int DaggerYes=1;
|
||||
static const int InverseNo=0;
|
||||
static const int InverseYes=1;
|
||||
|
||||
// Useful traits is this a spin index
|
||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||
|
||||
const int SpinorIndex = 2;
|
||||
template<typename T> struct isSpinor {
|
||||
static const bool value = (SpinorIndex==T::TensorLevel);
|
||||
};
|
||||
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||
|
||||
// ChrisK very keen to add extra space for Gparity doubling.
|
||||
//
|
||||
// Also add domain wall index, in a way where Wilson operator
|
||||
// naturally distributes across the 5th dimensions.
|
||||
//
|
||||
// That probably makes for GridRedBlack4dCartesian grid.
|
||||
|
||||
// s,sp,c,spc,lc
|
||||
template<typename vtype> using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
|
||||
template<typename vtype> using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
|
||||
template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;
|
||||
template<typename vtype> using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds > ;
|
||||
template<typename vtype> using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
|
||||
template<typename vtype> using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
|
||||
template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
|
||||
// Spin matrix
|
||||
typedef iSpinMatrix<Complex > SpinMatrix;
|
||||
typedef iSpinMatrix<ComplexF > SpinMatrixF;
|
||||
typedef iSpinMatrix<ComplexD > SpinMatrixD;
|
||||
|
||||
typedef iSpinMatrix<vComplex > vSpinMatrix;
|
||||
typedef iSpinMatrix<vComplexF> vSpinMatrixF;
|
||||
typedef iSpinMatrix<vComplexD> vSpinMatrixD;
|
||||
|
||||
// Colour Matrix
|
||||
typedef iColourMatrix<Complex > ColourMatrix;
|
||||
typedef iColourMatrix<ComplexF > ColourMatrixF;
|
||||
typedef iColourMatrix<ComplexD > ColourMatrixD;
|
||||
|
||||
typedef iColourMatrix<vComplex > vColourMatrix;
|
||||
typedef iColourMatrix<vComplexF> vColourMatrixF;
|
||||
typedef iColourMatrix<vComplexD> vColourMatrixD;
|
||||
|
||||
// SpinColour matrix
|
||||
typedef iSpinColourMatrix<Complex > SpinColourMatrix;
|
||||
typedef iSpinColourMatrix<ComplexF > SpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<ComplexD > SpinColourMatrixD;
|
||||
|
||||
typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
|
||||
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
|
||||
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
|
||||
|
||||
// LorentzColour
|
||||
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<ComplexF > LorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<ComplexD > LorentzColourMatrixD;
|
||||
|
||||
typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
|
||||
typedef iLorentzColourMatrix<vComplexF> vLorentzColourMatrixF;
|
||||
typedef iLorentzColourMatrix<vComplexD> vLorentzColourMatrixD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef iDoubleStoredColourMatrix<Complex > DoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<ComplexF > DoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<ComplexD > DoubleStoredColourMatrixD;
|
||||
|
||||
typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
|
||||
typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
|
||||
typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
|
||||
|
||||
// Spin vector
|
||||
typedef iSpinVector<Complex > SpinVector;
|
||||
typedef iSpinVector<ComplexF> SpinVectorF;
|
||||
typedef iSpinVector<ComplexD> SpinVectorD;
|
||||
|
||||
typedef iSpinVector<vComplex > vSpinVector;
|
||||
typedef iSpinVector<vComplexF> vSpinVectorF;
|
||||
typedef iSpinVector<vComplexD> vSpinVectorD;
|
||||
|
||||
// Colour vector
|
||||
typedef iColourVector<Complex > ColourVector;
|
||||
typedef iColourVector<ComplexF> ColourVectorF;
|
||||
typedef iColourVector<ComplexD> ColourVectorD;
|
||||
|
||||
typedef iColourVector<vComplex > vColourVector;
|
||||
typedef iColourVector<vComplexF> vColourVectorF;
|
||||
typedef iColourVector<vComplexD> vColourVectorD;
|
||||
|
||||
// SpinColourVector
|
||||
typedef iSpinColourVector<Complex > SpinColourVector;
|
||||
typedef iSpinColourVector<ComplexF> SpinColourVectorF;
|
||||
typedef iSpinColourVector<ComplexD> SpinColourVectorD;
|
||||
|
||||
typedef iSpinColourVector<vComplex > vSpinColourVector;
|
||||
typedef iSpinColourVector<vComplexF> vSpinColourVectorF;
|
||||
typedef iSpinColourVector<vComplexD> vSpinColourVectorD;
|
||||
|
||||
// HalfSpin vector
|
||||
typedef iHalfSpinVector<Complex > HalfSpinVector;
|
||||
typedef iHalfSpinVector<ComplexF> HalfSpinVectorF;
|
||||
typedef iHalfSpinVector<ComplexD> HalfSpinVectorD;
|
||||
|
||||
typedef iHalfSpinVector<vComplex > vHalfSpinVector;
|
||||
typedef iHalfSpinVector<vComplexF> vHalfSpinVectorF;
|
||||
typedef iHalfSpinVector<vComplexD> vHalfSpinVectorD;
|
||||
|
||||
// HalfSpinColour vector
|
||||
typedef iHalfSpinColourVector<Complex > HalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<ComplexF> HalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
|
||||
|
||||
typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
|
||||
typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
|
||||
typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
|
||||
|
||||
// singlets
|
||||
typedef iSinglet<Complex > TComplex; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexF> TComplexF; // FIXME This is painful. Tensor singlet complex type.
|
||||
typedef iSinglet<ComplexD> TComplexD; // FIXME This is painful. Tensor singlet complex type.
|
||||
|
||||
typedef iSinglet<vComplex > vTComplex ; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexF> vTComplexF; // what if we don't know the tensor structure
|
||||
typedef iSinglet<vComplexD> vTComplexD; // what if we don't know the tensor structure
|
||||
|
||||
typedef iSinglet<Real > TReal; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealF> TRealF; // Shouldn't need these; can I make it work without?
|
||||
typedef iSinglet<RealD> TRealD; // Shouldn't need these; can I make it work without?
|
||||
|
||||
typedef iSinglet<vReal > vTReal;
|
||||
typedef iSinglet<vRealF> vTRealF;
|
||||
typedef iSinglet<vRealD> vTRealD;
|
||||
|
||||
typedef iSinglet<vInteger> vTInteger;
|
||||
typedef iSinglet<Integer > TInteger;
|
||||
|
||||
|
||||
// Lattices of these
|
||||
typedef Lattice<vColourMatrix> LatticeColourMatrix;
|
||||
typedef Lattice<vColourMatrixF> LatticeColourMatrixF;
|
||||
typedef Lattice<vColourMatrixD> LatticeColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;
|
||||
typedef Lattice<vSpinMatrixF> LatticeSpinMatrixF;
|
||||
typedef Lattice<vSpinMatrixD> LatticeSpinMatrixD;
|
||||
|
||||
typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;
|
||||
typedef Lattice<vSpinColourMatrixF> LatticeSpinColourMatrixF;
|
||||
typedef Lattice<vSpinColourMatrixD> LatticeSpinColourMatrixD;
|
||||
|
||||
|
||||
typedef Lattice<vLorentzColourMatrix> LatticeLorentzColourMatrix;
|
||||
typedef Lattice<vLorentzColourMatrixF> LatticeLorentzColourMatrixF;
|
||||
typedef Lattice<vLorentzColourMatrixD> LatticeLorentzColourMatrixD;
|
||||
|
||||
// DoubleStored gauge field
|
||||
typedef Lattice<vDoubleStoredColourMatrix> LatticeDoubleStoredColourMatrix;
|
||||
typedef Lattice<vDoubleStoredColourMatrixF> LatticeDoubleStoredColourMatrixF;
|
||||
typedef Lattice<vDoubleStoredColourMatrixD> LatticeDoubleStoredColourMatrixD;
|
||||
|
||||
typedef Lattice<vSpinVector> LatticeSpinVector;
|
||||
typedef Lattice<vSpinVectorF> LatticeSpinVectorF;
|
||||
typedef Lattice<vSpinVectorD> LatticeSpinVectorD;
|
||||
|
||||
typedef Lattice<vColourVector> LatticeColourVector;
|
||||
typedef Lattice<vColourVectorF> LatticeColourVectorF;
|
||||
typedef Lattice<vColourVectorD> LatticeColourVectorD;
|
||||
|
||||
typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
|
||||
typedef Lattice<vSpinColourVectorF> LatticeSpinColourVectorF;
|
||||
typedef Lattice<vSpinColourVectorD> LatticeSpinColourVectorD;
|
||||
|
||||
typedef Lattice<vHalfSpinVector> LatticeHalfSpinVector;
|
||||
typedef Lattice<vHalfSpinVectorF> LatticeHalfSpinVectorF;
|
||||
typedef Lattice<vHalfSpinVectorD> LatticeHalfSpinVectorD;
|
||||
|
||||
typedef Lattice<vHalfSpinColourVector> LatticeHalfSpinColourVector;
|
||||
typedef Lattice<vHalfSpinColourVectorF> LatticeHalfSpinColourVectorF;
|
||||
typedef Lattice<vHalfSpinColourVectorD> LatticeHalfSpinColourVectorD;
|
||||
|
||||
typedef Lattice<vTReal> LatticeReal;
|
||||
typedef Lattice<vTRealF> LatticeRealF;
|
||||
typedef Lattice<vTRealD> LatticeRealD;
|
||||
|
||||
typedef Lattice<vTComplex> LatticeComplex;
|
||||
typedef Lattice<vTComplexF> LatticeComplexF;
|
||||
typedef Lattice<vTComplexD> LatticeComplexD;
|
||||
|
||||
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Physical names for things
|
||||
///////////////////////////////////////////
|
||||
typedef LatticeHalfSpinColourVector LatticeHalfFermion;
|
||||
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF;
|
||||
typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD;
|
||||
|
||||
typedef LatticeSpinColourVector LatticeFermion;
|
||||
typedef LatticeSpinColourVectorF LatticeFermionF;
|
||||
typedef LatticeSpinColourVectorD LatticeFermionD;
|
||||
|
||||
typedef LatticeSpinColourMatrix LatticePropagator;
|
||||
typedef LatticeSpinColourMatrixF LatticePropagatorF;
|
||||
typedef LatticeSpinColourMatrixD LatticePropagatorD;
|
||||
|
||||
typedef LatticeLorentzColourMatrix LatticeGaugeField;
|
||||
typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF;
|
||||
typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD;
|
||||
|
||||
typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField;
|
||||
typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF;
|
||||
typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD;
|
||||
|
||||
template<class GF> using LorentzScalar = Lattice<iScalar<typename GF::vector_object::element> >;
|
||||
|
||||
// Uhgg... typing this hurt ;)
|
||||
// (my keyboard got burning hot when I typed this, must be the anti-Fermion)
|
||||
typedef Lattice<vColourVector> LatticeStaggeredFermion;
|
||||
typedef Lattice<vColourVectorF> LatticeStaggeredFermionF;
|
||||
typedef Lattice<vColourVectorD> LatticeStaggeredFermionD;
|
||||
|
||||
typedef Lattice<vColourMatrix> LatticeStaggeredPropagator;
|
||||
typedef Lattice<vColourMatrixF> LatticeStaggeredPropagatorF;
|
||||
typedef Lattice<vColourMatrixD> LatticeStaggeredPropagatorD;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Peek and Poke named after physics attributes
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//spin
|
||||
template<class vobj> auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<SpinIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekSpin(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<SpinIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<SpinIndex>(rhs,i,j);
|
||||
}
|
||||
//colour
|
||||
template<class vobj> auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||
}
|
||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<ColourIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekColour(const Lattice<vobj> &rhs,int i,int j) -> decltype(PeekIndex<ColourIndex>(rhs,0,0))
|
||||
{
|
||||
return PeekIndex<ColourIndex>(rhs,i,j);
|
||||
}
|
||||
//lorentz
|
||||
template<class vobj> auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<LorentzIndex>(rhs,i);
|
||||
}
|
||||
template<class vobj> auto peekLorentz(const Lattice<vobj> &rhs,int i) -> decltype(PeekIndex<LorentzIndex>(rhs,0))
|
||||
{
|
||||
return PeekIndex<LorentzIndex>(rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Poke lattice
|
||||
//////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
void pokeColour(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<ColourIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeColour(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
|
||||
int i,int j)
|
||||
{
|
||||
PokeIndex<ColourIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeSpin(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<SpinIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeSpin(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
|
||||
int i,int j)
|
||||
{
|
||||
PokeIndex<SpinIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
template<class vobj>
|
||||
void pokeLorentz(Lattice<vobj> &lhs,
|
||||
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
|
||||
int i)
|
||||
{
|
||||
PokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Poke scalars
|
||||
//////////////////////////////////////////////
|
||||
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<SpinIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0,0)) & rhs,int i,int j)
|
||||
{
|
||||
pokeIndex<SpinIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
|
||||
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<ColourIndex>(lhs,rhs,i);
|
||||
}
|
||||
template<class vobj> void pokeColour(vobj &lhs,const decltype(peekIndex<ColourIndex>(lhs,0,0)) & rhs,int i,int j)
|
||||
{
|
||||
pokeIndex<ColourIndex>(lhs,rhs,i,j);
|
||||
}
|
||||
|
||||
template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<LorentzIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fermion <-> propagator assignements
|
||||
//////////////////////////////////////////////
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(pjs, peekColour(fj, i), i, c);
|
||||
}
|
||||
pokeSpin(p, pjs, j, s);
|
||||
}
|
||||
}
|
||||
|
||||
//template <class Prop, class Ferm>
|
||||
template <class Fimpl>
|
||||
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Fimpl::Dimension; ++i)
|
||||
{
|
||||
pokeColour(fj, peekColour(pjs, i, c), i);
|
||||
}
|
||||
pokeSpin(f, fj, j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// transpose array and scalar
|
||||
//////////////////////////////////////////////
|
||||
template<int Index,class vobj> inline Lattice<vobj> transposeSpin(const Lattice<vobj> &lhs){
|
||||
return transposeIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline Lattice<vobj> transposeColour(const Lattice<vobj> &lhs){
|
||||
return transposeIndex<ColourIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline vobj transposeSpin(const vobj &lhs){
|
||||
return transposeIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj> inline vobj transposeColour(const vobj &lhs){
|
||||
return transposeIndex<ColourIndex>(lhs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Trace lattice and non-lattice
|
||||
//////////////////////////////////////////
|
||||
template<int Index,class vobj>
|
||||
inline auto traceSpin(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs._odata[0]))>
|
||||
{
|
||||
return traceIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceColour(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs._odata[0]))>
|
||||
{
|
||||
return traceIndex<ColourIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceSpin(const vobj &lhs) -> Lattice<decltype(traceIndex<SpinIndex>(lhs))>
|
||||
{
|
||||
return traceIndex<SpinIndex>(lhs);
|
||||
}
|
||||
template<int Index,class vobj>
|
||||
inline auto traceColour(const vobj &lhs) -> Lattice<decltype(traceIndex<ColourIndex>(lhs))>
|
||||
{
|
||||
return traceIndex<ColourIndex>(lhs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Current types
|
||||
//////////////////////////////////////////
|
||||
GRID_SERIALIZABLE_ENUM(Current, undef,
|
||||
Vector, 0,
|
||||
Axial, 1,
|
||||
Tadpole, 2);
|
||||
|
||||
} //namespace QCD
|
||||
} // Grid
|
||||
|
||||
|
||||
|
||||
#endif
|
50
Grid/qcd/action/Action.h
Normal file
50
Grid/qcd/action/Action.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/Actions.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_QCD_ACTION_H
|
||||
#define GRID_QCD_ACTION_H
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Abstract base interface
|
||||
////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/ActionCore.h>
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Fermion actions; prevent coupling fermion.cc files to other headers
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
#include <Grid/qcd/action/fermion/Fermion.h>
|
||||
////////////////////////////////////////
|
||||
// Pseudo fermion combinations for HMC
|
||||
////////////////////////////////////////
|
||||
#include <Grid/qcd/action/pseudofermion/PseudoFermion.h>
|
||||
|
||||
#endif
|
56
Grid/qcd/action/ActionBase.h
Normal file
56
Grid/qcd/action/ActionBase.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionBase.h
|
||||
|
||||
Copyright (C) 2015-2016
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef ACTION_BASE_H
|
||||
#define ACTION_BASE_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
template <class GaugeField >
|
||||
class Action
|
||||
{
|
||||
|
||||
public:
|
||||
bool is_smeared = false;
|
||||
// Heatbath?
|
||||
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||
virtual std::string action_name() = 0; // return the action name
|
||||
virtual std::string LogParameters() = 0; // prints action parameters
|
||||
virtual ~Action(){}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ACTION_BASE_H
|
61
Grid/qcd/action/ActionCore.h
Normal file
61
Grid/qcd/action/ActionCore.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionCore.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: neo <cossu@post.kek.jp>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef QCD_ACTION_CORE
|
||||
#define QCD_ACTION_CORE
|
||||
|
||||
#include <Grid/qcd/action/ActionBase.h>
|
||||
#include <Grid/qcd/action/ActionSet.h>
|
||||
#include <Grid/qcd/action/ActionParams.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Gauge Actions
|
||||
////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/gauge/Gauge.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Fermion prereqs
|
||||
////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Scalar Actions
|
||||
////////////////////////////////////////////
|
||||
#include <Grid/qcd/action/scalar/Scalar.h>
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Utility functions
|
||||
////////////////////////////////////////////
|
||||
#include <Grid/qcd/utils/Metric.h>
|
||||
#include <Grid/qcd/utils/CovariantLaplacian.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
92
Grid/qcd/action/ActionParams.h
Normal file
92
Grid/qcd/action/ActionParams.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/ActionParams.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef GRID_QCD_ACTION_PARAMS_H
|
||||
#define GRID_QCD_ACTION_PARAMS_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
// These can move into a params header and be given MacroMagic serialisation
|
||||
struct GparityWilsonImplParams {
|
||||
bool overlapCommsCompute;
|
||||
std::vector<int> twists;
|
||||
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
|
||||
};
|
||||
|
||||
struct WilsonImplParams {
|
||||
bool overlapCommsCompute;
|
||||
std::vector<Complex> boundary_phases;
|
||||
WilsonImplParams() : overlapCommsCompute(false) {
|
||||
boundary_phases.resize(Nd, 1.0);
|
||||
};
|
||||
WilsonImplParams(const std::vector<Complex> phi)
|
||||
: boundary_phases(phi), overlapCommsCompute(false) {}
|
||||
};
|
||||
|
||||
struct StaggeredImplParams {
|
||||
StaggeredImplParams() {};
|
||||
};
|
||||
|
||||
struct OneFlavourRationalParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
|
||||
RealD, lo,
|
||||
RealD, hi,
|
||||
int, MaxIter,
|
||||
RealD, tolerance,
|
||||
int, degree,
|
||||
int, precision);
|
||||
|
||||
// MaxIter and tolerance, vectors??
|
||||
|
||||
// constructor
|
||||
OneFlavourRationalParams( RealD _lo = 0.0,
|
||||
RealD _hi = 1.0,
|
||||
int _maxit = 1000,
|
||||
RealD tol = 1.0e-8,
|
||||
int _degree = 10,
|
||||
int _precision = 64)
|
||||
: lo(_lo),
|
||||
hi(_hi),
|
||||
MaxIter(_maxit),
|
||||
tolerance(tol),
|
||||
degree(_degree),
|
||||
precision(_precision){};
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user