mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet
This commit is contained in:
commit
5c85774ee3
@ -109,6 +109,9 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||||
|
|
||||||
|
precisionChangeWorkspace pc_wk_sp_to_dp(DoublePrecGrid, SinglePrecGrid);
|
||||||
|
precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, DoublePrecGrid);
|
||||||
|
|
||||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||||
//Compute double precision rsd and also new RHS vector.
|
//Compute double precision rsd and also new RHS vector.
|
||||||
Linop_d.HermOp(sol_d, tmp_d);
|
Linop_d.HermOp(sol_d, tmp_d);
|
||||||
@ -123,7 +126,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||||
|
|
||||||
PrecChangeTimer.Start();
|
PrecChangeTimer.Start();
|
||||||
precisionChange(src_f, src_d);
|
precisionChange(src_f, src_d, pc_wk_dp_to_sp);
|
||||||
PrecChangeTimer.Stop();
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
sol_f = Zero();
|
sol_f = Zero();
|
||||||
@ -142,7 +145,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
//Convert sol back to double and add to double prec solution
|
//Convert sol back to double and add to double prec solution
|
||||||
PrecChangeTimer.Start();
|
PrecChangeTimer.Start();
|
||||||
precisionChange(tmp_d, sol_f);
|
precisionChange(tmp_d, sol_f, pc_wk_sp_to_dp);
|
||||||
PrecChangeTimer.Stop();
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||||
|
373
Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h
Normal file
373
Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision.
|
||||||
|
//The residual is stored in single precision, but the search directions and solution are stored in double precision.
|
||||||
|
//Every update_freq iterations the residual is corrected in double precision.
|
||||||
|
//For safety the a final regular CG is applied to clean up if necessary
|
||||||
|
|
||||||
|
//PB Pure single, then double fixup
|
||||||
|
|
||||||
|
template<class FieldD, class FieldF,
|
||||||
|
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||||
|
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||||
|
class ConjugateGradientMultiShiftMixedPrecCleanup : public OperatorMultiFunction<FieldD>,
|
||||||
|
public OperatorFunction<FieldD>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
using OperatorFunction<FieldD>::operator();
|
||||||
|
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxIterationsMshift;
|
||||||
|
Integer MaxIterations;
|
||||||
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||||
|
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||||
|
int verbose;
|
||||||
|
MultiShiftFunction shifts;
|
||||||
|
std::vector<RealD> TrueResidualShift;
|
||||||
|
|
||||||
|
int ReliableUpdateFreq; //number of iterations between reliable updates
|
||||||
|
|
||||||
|
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||||
|
LinearOperatorBase<FieldF> &Linop_f; //single precision
|
||||||
|
|
||||||
|
ConjugateGradientMultiShiftMixedPrecCleanup(Integer maxit, const MultiShiftFunction &_shifts,
|
||||||
|
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||||
|
int _ReliableUpdateFreq) :
|
||||||
|
MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq),
|
||||||
|
MaxIterations(20000)
|
||||||
|
{
|
||||||
|
verbose=1;
|
||||||
|
IterationsToCompleteShift.resize(_shifts.order);
|
||||||
|
TrueResidualShift.resize(_shifts.order);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
|
||||||
|
{
|
||||||
|
GridBase *grid = src.Grid();
|
||||||
|
int nshift = shifts.order;
|
||||||
|
std::vector<FieldD> results(nshift,grid);
|
||||||
|
(*this)(Linop,src,results,psi);
|
||||||
|
}
|
||||||
|
void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
|
||||||
|
{
|
||||||
|
int nshift = shifts.order;
|
||||||
|
|
||||||
|
(*this)(Linop,src,results);
|
||||||
|
|
||||||
|
psi = shifts.norm*src;
|
||||||
|
for(int i=0;i<nshift;i++){
|
||||||
|
psi = psi + shifts.residues[i]*results[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||||
|
{
|
||||||
|
GRID_TRACE("ConjugateGradientMultiShiftMixedPrecCleanup");
|
||||||
|
GridBase *DoublePrecGrid = src_d.Grid();
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convenience references to the info stored in "MultiShiftFunction"
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
int nshift = shifts.order;
|
||||||
|
|
||||||
|
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||||
|
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||||
|
std::vector<RealD> alpha(nshift,1.0);
|
||||||
|
|
||||||
|
//Double precision search directions
|
||||||
|
FieldD p_d(DoublePrecGrid);
|
||||||
|
std::vector<FieldF> ps_f (nshift, SinglePrecGrid);// Search directions (single precision)
|
||||||
|
std::vector<FieldF> psi_f(nshift, SinglePrecGrid);// solutions (single precision)
|
||||||
|
|
||||||
|
FieldD tmp_d(DoublePrecGrid);
|
||||||
|
FieldD r_d(DoublePrecGrid);
|
||||||
|
FieldF r_f(SinglePrecGrid);
|
||||||
|
FieldD mmp_d(DoublePrecGrid);
|
||||||
|
|
||||||
|
assert(psi_d.size()==nshift);
|
||||||
|
assert(mass.size()==nshift);
|
||||||
|
assert(mresidual.size()==nshift);
|
||||||
|
|
||||||
|
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||||
|
RealD bs[nshift];
|
||||||
|
RealD rsq[nshift];
|
||||||
|
RealD rsqf[nshift];
|
||||||
|
RealD z[nshift][2];
|
||||||
|
int converged[nshift];
|
||||||
|
|
||||||
|
const int primary =0;
|
||||||
|
|
||||||
|
//Primary shift fields CG iteration
|
||||||
|
RealD a,b,c,d;
|
||||||
|
RealD cp,bp,qq; //prev
|
||||||
|
|
||||||
|
// Matrix mult fields
|
||||||
|
FieldF p_f(SinglePrecGrid);
|
||||||
|
FieldF mmp_f(SinglePrecGrid);
|
||||||
|
|
||||||
|
// Check lightest mass
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
assert( mass[s]>= mass[primary] );
|
||||||
|
converged[s]=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wire guess to zero
|
||||||
|
// Residuals "r" are src
|
||||||
|
// First search direction "p" is also src
|
||||||
|
cp = norm2(src_d);
|
||||||
|
|
||||||
|
// Handle trivial case of zero src.
|
||||||
|
if( cp == 0. ){
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
psi_d[s] = Zero();
|
||||||
|
psi_f[s] = Zero();
|
||||||
|
IterationsToCompleteShift[s] = 1;
|
||||||
|
TrueResidualShift[s] = 0.;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||||
|
rsqf[s] =rsq[s];
|
||||||
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
|
||||||
|
// ps_d[s] = src_d;
|
||||||
|
precisionChangeFast(ps_f[s],src_d);
|
||||||
|
}
|
||||||
|
// r and p for primary
|
||||||
|
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
|
||||||
|
r_d = p_d;
|
||||||
|
|
||||||
|
//MdagM+m[0]
|
||||||
|
precisionChangeFast(p_f,p_d);
|
||||||
|
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||||
|
precisionChangeFast(tmp_d,mmp_f);
|
||||||
|
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||||
|
tmp_d = tmp_d - mmp_d;
|
||||||
|
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||||
|
// assert(norm2(tmp_d)< 1.0e-4);
|
||||||
|
|
||||||
|
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||||
|
RealD rn = norm2(p_d);
|
||||||
|
d += rn*mass[0];
|
||||||
|
|
||||||
|
b = -cp /d;
|
||||||
|
|
||||||
|
// Set up the various shift variables
|
||||||
|
int iz=0;
|
||||||
|
z[0][1-iz] = 1.0;
|
||||||
|
z[0][iz] = 1.0;
|
||||||
|
bs[0] = b;
|
||||||
|
for(int s=1;s<nshift;s++){
|
||||||
|
z[s][1-iz] = 1.0;
|
||||||
|
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||||
|
bs[s] = b*z[s][iz];
|
||||||
|
}
|
||||||
|
|
||||||
|
// r += b[0] A.p[0]
|
||||||
|
// c= norm(r)
|
||||||
|
c=axpy_norm(r_d,b,mmp_d,r_d);
|
||||||
|
|
||||||
|
for(int s=0;s<nshift;s++) {
|
||||||
|
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
|
||||||
|
precisionChangeFast(psi_f[s],psi_d[s]);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// Timers
|
||||||
|
///////////////////////////////////////
|
||||||
|
GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
|
||||||
|
|
||||||
|
SolverTimer.Start();
|
||||||
|
|
||||||
|
// Iteration loop
|
||||||
|
int k;
|
||||||
|
|
||||||
|
for (k=1;k<=MaxIterationsMshift;k++){
|
||||||
|
|
||||||
|
a = c /cp;
|
||||||
|
AXPYTimer.Start();
|
||||||
|
axpy(p_d,a,p_d,r_d);
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChangeFast(r_f, r_d);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
AXPYTimer.Start();
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
if ( ! converged[s] ) {
|
||||||
|
if (s==0){
|
||||||
|
axpy(ps_f[s],a,ps_f[s],r_f);
|
||||||
|
} else{
|
||||||
|
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||||
|
axpby(ps_f[s],z[s][iz],as,r_f,ps_f[s]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
|
cp=c;
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
MatrixTimer.Start();
|
||||||
|
Linop_f.HermOp(p_f,mmp_f);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
d=real(innerProduct(p_d,mmp_d));
|
||||||
|
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||||
|
RealD rn = norm2(p_d);
|
||||||
|
d += rn*mass[0];
|
||||||
|
|
||||||
|
bp=b;
|
||||||
|
b=-cp/d;
|
||||||
|
|
||||||
|
// Toggle the recurrence history
|
||||||
|
bs[0] = b;
|
||||||
|
iz = 1-iz;
|
||||||
|
ShiftTimer.Start();
|
||||||
|
for(int s=1;s<nshift;s++){
|
||||||
|
if((!converged[s])){
|
||||||
|
RealD z0 = z[s][1-iz];
|
||||||
|
RealD z1 = z[s][iz];
|
||||||
|
z[s][iz] = z0*z1*bp
|
||||||
|
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||||
|
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ShiftTimer.Stop();
|
||||||
|
|
||||||
|
//Update single precision solutions
|
||||||
|
AXPYTimer.Start();
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
int ss = s;
|
||||||
|
if( (!converged[s]) ) {
|
||||||
|
axpy(psi_f[ss],-bs[s]*alpha[s],ps_f[s],psi_f[ss]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c = axpy_norm(r_d,b,mmp_d,r_d);
|
||||||
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
|
// Convergence checks
|
||||||
|
int all_converged = 1;
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
|
||||||
|
if ( (!converged[s]) ){
|
||||||
|
IterationsToCompleteShift[s] = k;
|
||||||
|
|
||||||
|
RealD css = c * z[s][iz]* z[s][iz];
|
||||||
|
|
||||||
|
if(css<rsqf[s]){
|
||||||
|
if ( ! converged[s] )
|
||||||
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||||
|
converged[s]=1;
|
||||||
|
} else {
|
||||||
|
all_converged=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( all_converged || k == MaxIterationsMshift-1){
|
||||||
|
|
||||||
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
for(int s=0;s<nshift;s++){
|
||||||
|
precisionChangeFast(psi_d[s],psi_f[s]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( all_converged ){
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: All shifts have converged iteration "<<k<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: Checking solutions"<<std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrecCleanup: Not all shifts have converged iteration "<<k<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check answers
|
||||||
|
for(int s=0; s < nshift; s++) {
|
||||||
|
Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
|
||||||
|
axpy(tmp_d,mass[s],psi_d[s],mmp_d);
|
||||||
|
axpy(r_d,-alpha[s],src_d,tmp_d);
|
||||||
|
RealD rn = norm2(r_d);
|
||||||
|
RealD cn = norm2(src_d);
|
||||||
|
TrueResidualShift[s] = std::sqrt(rn/cn);
|
||||||
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
|
||||||
|
|
||||||
|
//If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
|
||||||
|
if(rn >= rsq[s]){
|
||||||
|
CleanupTimer.Start();
|
||||||
|
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: performing cleanup step for shift " << s << std::endl;
|
||||||
|
|
||||||
|
//Setup linear operators for final cleanup
|
||||||
|
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
|
||||||
|
ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
|
||||||
|
|
||||||
|
MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d);
|
||||||
|
cg(src_d, psi_d[s]);
|
||||||
|
|
||||||
|
TrueResidualShift[s] = cg.TrueResidual;
|
||||||
|
CleanupTimer.Stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrecCleanup: Time Breakdown for body"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tSolver " << SolverTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
|
||||||
|
|
||||||
|
IterationsToComplete = k;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
@ -81,6 +81,7 @@ public:
|
|||||||
using OperatorFunction<FieldD>::operator();
|
using OperatorFunction<FieldD>::operator();
|
||||||
|
|
||||||
RealD Tolerance;
|
RealD Tolerance;
|
||||||
|
Integer MaxIterationsMshift;
|
||||||
Integer MaxIterations;
|
Integer MaxIterations;
|
||||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||||
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
std::vector<int> IterationsToCompleteShift; // Iterations for this shift
|
||||||
@ -95,9 +96,9 @@ public:
|
|||||||
|
|
||||||
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
|
||||||
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
|
||||||
int _ReliableUpdateFreq
|
int _ReliableUpdateFreq) :
|
||||||
) :
|
MaxIterationsMshift(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq),
|
||||||
MaxIterations(maxit), shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
|
MaxIterations(20000)
|
||||||
{
|
{
|
||||||
verbose=1;
|
verbose=1;
|
||||||
IterationsToCompleteShift.resize(_shifts.order);
|
IterationsToCompleteShift.resize(_shifts.order);
|
||||||
@ -130,6 +131,9 @@ public:
|
|||||||
GRID_TRACE("ConjugateGradientMultiShiftMixedPrec");
|
GRID_TRACE("ConjugateGradientMultiShiftMixedPrec");
|
||||||
GridBase *DoublePrecGrid = src_d.Grid();
|
GridBase *DoublePrecGrid = src_d.Grid();
|
||||||
|
|
||||||
|
precisionChangeWorkspace pc_wk_s_to_d(DoublePrecGrid,SinglePrecGrid);
|
||||||
|
precisionChangeWorkspace pc_wk_d_to_s(SinglePrecGrid,DoublePrecGrid);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Convenience references to the info stored in "MultiShiftFunction"
|
// Convenience references to the info stored in "MultiShiftFunction"
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -200,10 +204,10 @@ public:
|
|||||||
r_d = p_d;
|
r_d = p_d;
|
||||||
|
|
||||||
//MdagM+m[0]
|
//MdagM+m[0]
|
||||||
precisionChangeFast(p_f,p_d);
|
precisionChange(p_f, p_d, pc_wk_d_to_s);
|
||||||
|
|
||||||
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||||
precisionChangeFast(tmp_d,mmp_f);
|
precisionChange(tmp_d, mmp_f, pc_wk_s_to_d);
|
||||||
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||||
tmp_d = tmp_d - mmp_d;
|
tmp_d = tmp_d - mmp_d;
|
||||||
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||||
@ -244,7 +248,7 @@ public:
|
|||||||
// Iteration loop
|
// Iteration loop
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
for (k=1;k<=MaxIterations;k++){
|
for (k=1;k<=MaxIterationsMshift;k++){
|
||||||
|
|
||||||
a = c /cp;
|
a = c /cp;
|
||||||
AXPYTimer.Start();
|
AXPYTimer.Start();
|
||||||
@ -263,7 +267,7 @@ public:
|
|||||||
AXPYTimer.Stop();
|
AXPYTimer.Stop();
|
||||||
|
|
||||||
PrecChangeTimer.Start();
|
PrecChangeTimer.Start();
|
||||||
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
|
precisionChange(p_f, p_d, pc_wk_d_to_s); //get back single prec search direction for linop
|
||||||
PrecChangeTimer.Stop();
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
cp=c;
|
cp=c;
|
||||||
@ -272,7 +276,7 @@ public:
|
|||||||
MatrixTimer.Stop();
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
PrecChangeTimer.Start();
|
PrecChangeTimer.Start();
|
||||||
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
|
precisionChange(mmp_d, mmp_f, pc_wk_s_to_d); // From Float to Double
|
||||||
PrecChangeTimer.Stop();
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
AXPYTimer.Start();
|
AXPYTimer.Start();
|
||||||
@ -350,11 +354,16 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( all_converged ){
|
if ( all_converged || k == MaxIterationsMshift-1){
|
||||||
|
|
||||||
SolverTimer.Stop();
|
SolverTimer.Stop();
|
||||||
|
|
||||||
|
if ( all_converged ){
|
||||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
|
||||||
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Not all shifts have converged iteration "<<k<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Check answers
|
// Check answers
|
||||||
for(int s=0; s < nshift; s++) {
|
for(int s=0; s < nshift; s++) {
|
||||||
@ -397,11 +406,9 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
// ugly hack
|
|
||||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||||
// assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -48,7 +48,7 @@ public:
|
|||||||
LinearOperatorBase<FieldF> &Linop_f;
|
LinearOperatorBase<FieldF> &Linop_f;
|
||||||
LinearOperatorBase<FieldD> &Linop_d;
|
LinearOperatorBase<FieldD> &Linop_d;
|
||||||
GridBase* SinglePrecGrid;
|
GridBase* SinglePrecGrid;
|
||||||
RealD Delta; //reliable update parameter
|
RealD Delta; //reliable update parameter. A reliable update is performed when the residual drops by a factor of Delta relative to its value at the last update
|
||||||
|
|
||||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||||
@ -65,7 +65,9 @@ public:
|
|||||||
ErrorOnNoConverge(err_on_no_conv),
|
ErrorOnNoConverge(err_on_no_conv),
|
||||||
DoFinalCleanup(true),
|
DoFinalCleanup(true),
|
||||||
Linop_fallback(NULL)
|
Linop_fallback(NULL)
|
||||||
{};
|
{
|
||||||
|
assert(Delta > 0. && Delta < 1. && "Expect 0 < Delta < 1");
|
||||||
|
};
|
||||||
|
|
||||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||||
Linop_fallback = &_Linop_fallback;
|
Linop_fallback = &_Linop_fallback;
|
||||||
@ -116,9 +118,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
//Single prec initialization
|
//Single prec initialization
|
||||||
|
precisionChangeWorkspace pc_wk_sp_to_dp(src.Grid(), SinglePrecGrid);
|
||||||
|
precisionChangeWorkspace pc_wk_dp_to_sp(SinglePrecGrid, src.Grid());
|
||||||
|
|
||||||
FieldF r_f(SinglePrecGrid);
|
FieldF r_f(SinglePrecGrid);
|
||||||
r_f.Checkerboard() = r.Checkerboard();
|
r_f.Checkerboard() = r.Checkerboard();
|
||||||
precisionChange(r_f, r);
|
precisionChange(r_f, r, pc_wk_dp_to_sp);
|
||||||
|
|
||||||
FieldF psi_f(r_f);
|
FieldF psi_f(r_f);
|
||||||
psi_f = Zero();
|
psi_f = Zero();
|
||||||
@ -134,6 +139,7 @@ public:
|
|||||||
GridStopWatch LinalgTimer;
|
GridStopWatch LinalgTimer;
|
||||||
GridStopWatch MatrixTimer;
|
GridStopWatch MatrixTimer;
|
||||||
GridStopWatch SolverTimer;
|
GridStopWatch SolverTimer;
|
||||||
|
GridStopWatch PrecChangeTimer;
|
||||||
|
|
||||||
SolverTimer.Start();
|
SolverTimer.Start();
|
||||||
int k = 0;
|
int k = 0;
|
||||||
@ -173,7 +179,9 @@ public:
|
|||||||
// Stopping condition
|
// Stopping condition
|
||||||
if (cp <= rsq) {
|
if (cp <= rsq) {
|
||||||
//Although not written in the paper, I assume that I have to add on the final solution
|
//Although not written in the paper, I assume that I have to add on the final solution
|
||||||
precisionChange(mmp, psi_f);
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(mmp, psi_f, pc_wk_sp_to_dp);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
psi = psi + mmp;
|
psi = psi + mmp;
|
||||||
|
|
||||||
|
|
||||||
@ -194,6 +202,9 @@ public:
|
|||||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tPrecChange " << PrecChangeTimer.Elapsed() <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "\tPrecChange avg time " << PrecChangeTimer.Elapsed()/(2*l+1) <<std::endl;
|
||||||
|
|
||||||
|
|
||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
ReliableUpdatesPerformed = l;
|
ReliableUpdatesPerformed = l;
|
||||||
@ -214,14 +225,21 @@ public:
|
|||||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||||
precisionChange(mmp, psi_f);
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(mmp, psi_f, pc_wk_sp_to_dp);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
psi = psi + mmp;
|
psi = psi + mmp;
|
||||||
|
|
||||||
|
MatrixTimer.Start();
|
||||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
r = src - mmp;
|
r = src - mmp;
|
||||||
|
|
||||||
psi_f = Zero();
|
psi_f = Zero();
|
||||||
precisionChange(r_f, r);
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(r_f, r, pc_wk_dp_to_sp);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
cp = norm2(r);
|
cp = norm2(r);
|
||||||
MaxResidSinceLastRelUp = cp;
|
MaxResidSinceLastRelUp = cp;
|
||||||
|
|
||||||
|
1412
Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h
Normal file
1412
Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -248,7 +248,7 @@ public:
|
|||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// user defined constructor
|
// user defined constructor
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
Lattice(GridBase *grid,ViewMode mode=AcceleratorWrite) {
|
Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) {
|
||||||
this->_grid = grid;
|
this->_grid = grid;
|
||||||
resize(this->_grid->oSites());
|
resize(this->_grid->oSites());
|
||||||
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
assert((((uint64_t)&this->_odata[0])&0xF) ==0);
|
||||||
|
@ -440,7 +440,17 @@ public:
|
|||||||
_grid->GlobalCoorToGlobalIndex(gcoor,gidx);
|
_grid->GlobalCoorToGlobalIndex(gcoor,gidx);
|
||||||
|
|
||||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||||
|
#if 1
|
||||||
assert(rank == _grid->ThisRank() );
|
assert(rank == _grid->ThisRank() );
|
||||||
|
#else
|
||||||
|
//
|
||||||
|
if (rank != _grid->ThisRank() ){
|
||||||
|
std::cout <<"rank "<<rank<<" _grid->ThisRank() "<<_grid->ThisRank()<< std::endl;
|
||||||
|
// exit(-42);
|
||||||
|
// assert(0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int l_idx=generator_idx(o_idx,i_idx);
|
int l_idx=generator_idx(o_idx,i_idx);
|
||||||
_generators[l_idx] = master_engine;
|
_generators[l_idx] = master_engine;
|
||||||
|
@ -1080,6 +1080,7 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Very fast precision change. Requires in/out objects to reside on same Grid (e.g. by using double2 for the double-precision field)
|
||||||
template<class VobjOut, class VobjIn>
|
template<class VobjOut, class VobjIn>
|
||||||
void precisionChangeFast(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
void precisionChangeFast(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
||||||
{
|
{
|
||||||
@ -1097,9 +1098,9 @@ void precisionChangeFast(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
|||||||
precisionChange(vout,vin,N);
|
precisionChange(vout,vin,N);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
//Convert a Lattice from one precision to another
|
//Convert a Lattice from one precision to another (original, slow implementation)
|
||||||
template<class VobjOut, class VobjIn>
|
template<class VobjOut, class VobjIn>
|
||||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
void precisionChangeOrig(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
||||||
{
|
{
|
||||||
assert(out.Grid()->Nd() == in.Grid()->Nd());
|
assert(out.Grid()->Nd() == in.Grid()->Nd());
|
||||||
for(int d=0;d<out.Grid()->Nd();d++){
|
for(int d=0;d<out.Grid()->Nd();d++){
|
||||||
@ -1145,6 +1146,128 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls
|
||||||
|
class precisionChangeWorkspace{
|
||||||
|
std::pair<Integer,Integer>* fmap_device; //device pointer
|
||||||
|
//maintain grids for checking
|
||||||
|
GridBase* _out_grid;
|
||||||
|
GridBase* _in_grid;
|
||||||
|
public:
|
||||||
|
precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid): _out_grid(out_grid), _in_grid(in_grid){
|
||||||
|
//Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device
|
||||||
|
assert(out_grid->Nd() == in_grid->Nd());
|
||||||
|
for(int d=0;d<out_grid->Nd();d++){
|
||||||
|
assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]);
|
||||||
|
}
|
||||||
|
int Nsimd_out = out_grid->Nsimd();
|
||||||
|
|
||||||
|
std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these
|
||||||
|
for(int lane=0; lane < out_grid->Nsimd(); lane++)
|
||||||
|
out_grid->iCoorFromIindex(out_icorrs[lane], lane);
|
||||||
|
|
||||||
|
std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd
|
||||||
|
thread_for(out_oidx,out_grid->oSites(),{
|
||||||
|
Coordinate out_ocorr;
|
||||||
|
out_grid->oCoorFromOindex(out_ocorr, out_oidx);
|
||||||
|
|
||||||
|
Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate)
|
||||||
|
for(int out_lane=0; out_lane < Nsimd_out; out_lane++){
|
||||||
|
out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr);
|
||||||
|
|
||||||
|
//int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr);
|
||||||
|
//Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice
|
||||||
|
//Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity
|
||||||
|
int in_oidx = 0, in_lane = 0;
|
||||||
|
for(int d=0;d<in_grid->_ndimension;d++){
|
||||||
|
in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] );
|
||||||
|
in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] );
|
||||||
|
}
|
||||||
|
fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane );
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
//Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines)
|
||||||
|
size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>);
|
||||||
|
fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes);
|
||||||
|
acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Prevent moving or copying
|
||||||
|
precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete;
|
||||||
|
precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete;
|
||||||
|
precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete;
|
||||||
|
precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete;
|
||||||
|
|
||||||
|
std::pair<Integer,Integer> const* getMap() const{ return fmap_device; }
|
||||||
|
|
||||||
|
void checkGrids(GridBase* out, GridBase* in) const{
|
||||||
|
conformable(out, _out_grid);
|
||||||
|
conformable(in, _in_grid);
|
||||||
|
}
|
||||||
|
|
||||||
|
~precisionChangeWorkspace(){
|
||||||
|
acceleratorFreeDevice(fmap_device);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//We would like to use precisionChangeFast when possible. However usage of this requires the Grids to be the same (runtime check)
|
||||||
|
//*and* the precisionChange(VobjOut::vector_type, VobjIn, int) function to be defined for the types; this requires an extra compile-time check which we do using some SFINAE trickery
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
auto _precisionChangeFastWrap(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, int dummy)->decltype( precisionChange( ((typename VobjOut::vector_type*)0), ((typename VobjIn::vector_type*)0), 1), int()){
|
||||||
|
if(out.Grid() == in.Grid()){
|
||||||
|
precisionChangeFast(out,in);
|
||||||
|
return 1;
|
||||||
|
}else{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
int _precisionChangeFastWrap(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, long dummy){ //note long here is intentional; it means the above is preferred if available
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Convert a lattice of one precision to another. Much faster than original implementation but requires a pregenerated workspace
|
||||||
|
//which contains the mapping data.
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){
|
||||||
|
if(_precisionChangeFastWrap(out,in,0)) return;
|
||||||
|
|
||||||
|
static_assert( std::is_same<typename VobjOut::scalar_typeD, typename VobjIn::scalar_typeD>::value == 1, "precisionChange: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
|
||||||
|
|
||||||
|
out.Checkerboard() = in.Checkerboard();
|
||||||
|
constexpr int Nsimd_out = VobjOut::Nsimd();
|
||||||
|
|
||||||
|
workspace.checkGrids(out.Grid(),in.Grid());
|
||||||
|
std::pair<Integer,Integer> const* fmap_device = workspace.getMap();
|
||||||
|
|
||||||
|
//Do the copy/precision change
|
||||||
|
autoView( out_v , out, AcceleratorWrite);
|
||||||
|
autoView( in_v , in, AcceleratorRead);
|
||||||
|
|
||||||
|
accelerator_for(out_oidx, out.Grid()->oSites(), 1,{
|
||||||
|
std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out;
|
||||||
|
for(int out_lane=0; out_lane < Nsimd_out; out_lane++){
|
||||||
|
int in_oidx = fmap_osite[out_lane].first;
|
||||||
|
int in_lane = fmap_osite[out_lane].second;
|
||||||
|
copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
//Convert a Lattice from one precision to another. Much faster than original implementation but slower than precisionChangeFast
|
||||||
|
//or precisionChange called with pregenerated workspace, as it needs to internally generate the workspace on the host and copy to device
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||||
|
if(_precisionChangeFastWrap(out,in,0)) return;
|
||||||
|
precisionChangeWorkspace workspace(out.Grid(), in.Grid());
|
||||||
|
precisionChange(out, in, workspace);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Communicate between grids
|
// Communicate between grids
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -36,7 +36,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Wilson compressor will need FaceGather policies for:
|
// Wilson compressor will need FaceGather policies for:
|
||||||
// Periodic, Dirichlet, and partial Dirichlet for DWF
|
// Periodic, Dirichlet, and partial Dirichlet for DWF
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
const int dwf_compressor_depth=1;
|
const int dwf_compressor_depth=2;
|
||||||
#define DWF_COMPRESS
|
#define DWF_COMPRESS
|
||||||
class FaceGatherPartialDWF
|
class FaceGatherPartialDWF
|
||||||
{
|
{
|
||||||
|
@ -127,6 +127,8 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
ApproxNegPowerAction.tolerances[i] = action_tolerance[i];
|
ApproxNegPowerAction.tolerances[i] = action_tolerance[i];
|
||||||
ApproxHalfPowerAction.tolerances[i] = action_tolerance[i];
|
ApproxHalfPowerAction.tolerances[i] = action_tolerance[i];
|
||||||
ApproxNegHalfPowerAction.tolerances[i]= action_tolerance[i];
|
ApproxNegHalfPowerAction.tolerances[i]= action_tolerance[i];
|
||||||
|
}
|
||||||
|
for(int i=0;i<ApproxPowerMD.tolerances.size();i++){
|
||||||
ApproxPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
ApproxNegPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxNegPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
ApproxHalfPowerMD.tolerances[i] = md_tolerance[i];
|
ApproxHalfPowerMD.tolerances[i] = md_tolerance[i];
|
||||||
|
@ -29,6 +29,8 @@
|
|||||||
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
||||||
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
|
||||||
|
|
||||||
|
#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftCleanup.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -58,7 +60,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
//Allow derived classes to override the multishift CG
|
//Allow derived classes to override the multishift CG
|
||||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
|
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
|
||||||
#if 0
|
#if 0
|
||||||
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOp : DenOp);
|
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
|
||||||
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
|
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
|
||||||
msCG(schurOp,in, out);
|
msCG(schurOp,in, out);
|
||||||
#else
|
#else
|
||||||
@ -67,6 +69,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
||||||
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
||||||
|
|
||||||
|
// Action better with higher precision?
|
||||||
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
||||||
precisionChange(inD2,in);
|
precisionChange(inD2,in);
|
||||||
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
std::cout << "msCG single solve "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
||||||
@ -76,12 +79,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
}
|
}
|
||||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
|
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
|
||||||
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
|
SchurDifferentiableOperator<ImplD2> schurOpD2(numerator ? NumOpD2 : DenOpD2);
|
||||||
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
|
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
|
||||||
|
|
||||||
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 inD2(NumOpD2.FermionRedBlackGrid());
|
||||||
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
FermionFieldD2 outD2(NumOpD2.FermionRedBlackGrid());
|
||||||
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
|
std::vector<FermionFieldD2> out_elemsD2(out_elems.size(),NumOpD2.FermionRedBlackGrid());
|
||||||
ConjugateGradientMultiShiftMixedPrec<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
ConjugateGradientMultiShiftMixedPrecCleanup<FermionFieldD2, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
|
||||||
precisionChange(inD2,in);
|
precisionChange(inD2,in);
|
||||||
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
std::cout << "msCG in "<<norm2(inD2)<<" " <<norm2(in)<<std::endl;
|
||||||
msCG(schurOpD2, inD2, out_elemsD2, outD2);
|
msCG(schurOpD2, inD2, out_elemsD2, outD2);
|
||||||
|
@ -284,6 +284,15 @@ public:
|
|||||||
<< as[level].actions.at(actionID)->deriv_us*1.0e-6<<" s"<< std::endl;
|
<< as[level].actions.at(actionID)->deriv_us*1.0e-6<<" s"<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Dslash counts "<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "------------------------- "<<std::endl;
|
||||||
|
uint64_t full, partial, dirichlet;
|
||||||
|
DslashGetCounts(dirichlet,partial,full);
|
||||||
|
std::cout << GridLogMessage << " Full BCs : "<<full<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Partial dirichlet BCs : "<<partial<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Dirichlet BCs : "<<dirichlet<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
|
std::cout << GridLogMessage << "--------------------------- "<<std::endl;
|
||||||
std::cout << GridLogMessage << " Force average size "<<std::endl;
|
std::cout << GridLogMessage << " Force average size "<<std::endl;
|
||||||
std::cout << GridLogMessage << "------------------------- "<<std::endl;
|
std::cout << GridLogMessage << "------------------------- "<<std::endl;
|
||||||
|
@ -29,6 +29,27 @@
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
uint64_t DslashFullCount;
|
||||||
|
uint64_t DslashPartialCount;
|
||||||
|
uint64_t DslashDirichletCount;
|
||||||
|
|
||||||
|
void DslashResetCounts(void)
|
||||||
|
{
|
||||||
|
DslashFullCount=0;
|
||||||
|
DslashPartialCount=0;
|
||||||
|
DslashDirichletCount=0;
|
||||||
|
}
|
||||||
|
void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full)
|
||||||
|
{
|
||||||
|
dirichlet = DslashDirichletCount;
|
||||||
|
partial = DslashPartialCount;
|
||||||
|
full = DslashFullCount;
|
||||||
|
}
|
||||||
|
void DslashLogFull(void) { DslashFullCount++;}
|
||||||
|
void DslashLogPartial(void) { DslashPartialCount++;}
|
||||||
|
void DslashLogDirichlet(void){ DslashDirichletCount++;}
|
||||||
|
|
||||||
|
|
||||||
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
|
||||||
int off,std::vector<std::pair<int,int> > & table)
|
int off,std::vector<std::pair<int,int> > & table)
|
||||||
{
|
{
|
||||||
|
@ -120,6 +120,12 @@ void Gather_plane_exchange_table(commVector<std::pair<int,int> >& table,
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void DslashResetCounts(void);
|
||||||
|
void DslashGetCounts(uint64_t &dirichlet,uint64_t &partial,uint64_t &full);
|
||||||
|
void DslashLogFull(void);
|
||||||
|
void DslashLogPartial(void);
|
||||||
|
void DslashLogDirichlet(void);
|
||||||
|
|
||||||
struct StencilEntry {
|
struct StencilEntry {
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
uint64_t _byte_offset; // 8 bytes
|
uint64_t _byte_offset; // 8 bytes
|
||||||
@ -312,6 +318,7 @@ public:
|
|||||||
|
|
||||||
int face_table_computed;
|
int face_table_computed;
|
||||||
int partialDirichlet;
|
int partialDirichlet;
|
||||||
|
int fullDirichlet;
|
||||||
std::vector<commVector<std::pair<int,int> > > face_table ;
|
std::vector<commVector<std::pair<int,int> > > face_table ;
|
||||||
Vector<int> surface_list;
|
Vector<int> surface_list;
|
||||||
|
|
||||||
@ -442,6 +449,9 @@ public:
|
|||||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
|
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
|
||||||
|
if ( this->partialDirichlet ) DslashLogPartial();
|
||||||
|
else if ( this->fullDirichlet ) DslashLogDirichlet();
|
||||||
|
else DslashLogFull();
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Blocking send and receive. Either sequential or parallel.
|
// Blocking send and receive. Either sequential or parallel.
|
||||||
@ -770,6 +780,10 @@ public:
|
|||||||
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
||||||
partialDirichlet = p.partialDirichlet;
|
partialDirichlet = p.partialDirichlet;
|
||||||
DirichletBlock(p.dirichlet); // comms send/recv set up
|
DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||||
|
fullDirichlet=0;
|
||||||
|
for(int d=0;d<p.dirichlet.size();d++){
|
||||||
|
if (p.dirichlet[d]) fullDirichlet=1;
|
||||||
|
}
|
||||||
|
|
||||||
_unified_buffer_size=0;
|
_unified_buffer_size=0;
|
||||||
surface_list.resize(0);
|
surface_list.resize(0);
|
||||||
|
@ -226,7 +226,7 @@ template<class vobjOut, class vobjIn>
|
|||||||
accelerator_inline
|
accelerator_inline
|
||||||
void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
|
void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
|
||||||
{
|
{
|
||||||
static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
|
static_assert( std::is_same<typename vobjOut::scalar_typeD, typename vobjIn::scalar_typeD>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
|
||||||
|
|
||||||
typedef typename vobjOut::vector_type ovector_type;
|
typedef typename vobjOut::vector_type ovector_type;
|
||||||
typedef typename vobjIn::vector_type ivector_type;
|
typedef typename vobjIn::vector_type ivector_type;
|
||||||
@ -251,9 +251,9 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest
|
|||||||
ovector_type * __restrict__ op = (ovector_type *)&vecOut;
|
ovector_type * __restrict__ op = (ovector_type *)&vecOut;
|
||||||
ivector_type * __restrict__ ip = (ivector_type *)&vecIn;
|
ivector_type * __restrict__ ip = (ivector_type *)&vecIn;
|
||||||
for(int w=0;w<owords;w++){
|
for(int w=0;w<owords;w++){
|
||||||
itmp = ip[iNsimd*w].getlane(lane_in);
|
itmp = ip[w].getlane(lane_in);
|
||||||
otmp = itmp; //potential precision change
|
otmp = itmp; //potential precision change
|
||||||
op[oNsimd*w].putlane(otmp,lane_out);
|
op[w].putlane(otmp,lane_out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,14 +167,13 @@ void GridCmdOptionInt(std::string &str,int & val)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GridCmdOptionFloat(std::string &str,float & val)
|
void GridCmdOptionFloat(std::string &str,double & val)
|
||||||
{
|
{
|
||||||
std::stringstream ss(str);
|
std::stringstream ss(str);
|
||||||
ss>>val;
|
ss>>val;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
Coordinate &latt_c,
|
Coordinate &latt_c,
|
||||||
Coordinate &mpi_c)
|
Coordinate &mpi_c)
|
||||||
|
@ -57,7 +57,7 @@ void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
|||||||
template<class VectorInt>
|
template<class VectorInt>
|
||||||
void GridCmdOptionIntVector(const std::string &str,VectorInt & vec);
|
void GridCmdOptionIntVector(const std::string &str,VectorInt & vec);
|
||||||
void GridCmdOptionInt(std::string &str,int & val);
|
void GridCmdOptionInt(std::string &str,int & val);
|
||||||
void GridCmdOptionFloat(std::string &str,float & val);
|
void GridCmdOptionFloat(std::string &str,double & val);
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
|
@ -164,11 +164,6 @@ int main(int argc, char **argv) {
|
|||||||
typedef MobiusEOFAFermionF FermionEOFAActionF;
|
typedef MobiusEOFAFermionF FermionEOFAActionF;
|
||||||
typedef typename FermionActionF::FermionField FermionFieldF;
|
typedef typename FermionActionF::FermionField FermionFieldF;
|
||||||
|
|
||||||
typedef WilsonImplD2 FermionImplPolicyD2;
|
|
||||||
typedef MobiusFermionD2 FermionActionD2;
|
|
||||||
typedef MobiusEOFAFermionD2 FermionEOFAActionD2;
|
|
||||||
typedef typename FermionActionD2::FermionField FermionFieldD2;
|
|
||||||
|
|
||||||
typedef Grid::XmlReader Serialiser;
|
typedef Grid::XmlReader Serialiser;
|
||||||
|
|
||||||
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
@ -232,31 +227,34 @@ int main(int argc, char **argv) {
|
|||||||
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
OneFlavourRationalParams OFRp; // Up/down
|
int SP_iters=10000;
|
||||||
OFRp.lo = 4.0e-5;
|
|
||||||
|
RationalActionParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 6.0e-5;
|
||||||
OFRp.hi = 90.0;
|
OFRp.hi = 90.0;
|
||||||
OFRp.MaxIter = 60000;
|
OFRp.inv_pow = 2;
|
||||||
OFRp.tolerance= 1.0e-5;
|
OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space
|
||||||
OFRp.mdtolerance= 1.0e-3;
|
OFRp.action_tolerance= 1.0e-8;
|
||||||
|
OFRp.action_degree = 18;
|
||||||
|
OFRp.md_tolerance= 1.0e-5;
|
||||||
|
OFRp.md_degree = 14;
|
||||||
// OFRp.degree = 20; converges
|
// OFRp.degree = 20; converges
|
||||||
// OFRp.degree = 16;
|
// OFRp.degree = 16;
|
||||||
OFRp.degree = 18;
|
|
||||||
OFRp.precision= 80;
|
OFRp.precision= 80;
|
||||||
OFRp.BoundsCheckFreq=0;
|
OFRp.BoundsCheckFreq=0;
|
||||||
std::vector<RealD> ActionTolByPole({
|
std::vector<RealD> ActionTolByPole({
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-7,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8
|
1.0e-8,1.0e-8
|
||||||
});
|
});
|
||||||
std::vector<RealD> MDTolByPole({
|
std::vector<RealD> MDTolByPole({
|
||||||
1.0e-5,5.0e-6,1.0e-6,1.0e-7, // soften convergence more more
|
1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more
|
||||||
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
||||||
// 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
// 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
|
||||||
1.0e-8,1.0e-8
|
1.0e-8,1.0e-8
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -265,10 +263,8 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||||
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
typedef SchurDiagMooeeOperator<FermionActionD2,FermionFieldD2 > LinearOperatorD2;
|
|
||||||
typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
|
typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
|
||||||
typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
|
typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
|
||||||
typedef SchurDiagMooeeOperator<FermionEOFAActionD2,FermionFieldD2 > LinearOperatorEOFAD2;
|
|
||||||
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
|
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
|
||||||
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
|
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
|
||||||
|
|
||||||
@ -321,7 +317,6 @@ int main(int argc, char **argv) {
|
|||||||
// temporarily need a gauge field
|
// temporarily need a gauge field
|
||||||
LatticeGaugeFieldD U(GridPtr); U=Zero();
|
LatticeGaugeFieldD U(GridPtr); U=Zero();
|
||||||
LatticeGaugeFieldF UF(GridPtrF); UF=Zero();
|
LatticeGaugeFieldF UF(GridPtrF); UF=Zero();
|
||||||
LatticeGaugeFieldD2 UD2(GridPtrF); UD2=Zero();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
@ -340,6 +335,7 @@ int main(int argc, char **argv) {
|
|||||||
ParamsDirF.dirichlet=Dirichlet;
|
ParamsDirF.dirichlet=Dirichlet;
|
||||||
ParamsDir.partialDirichlet=1;
|
ParamsDir.partialDirichlet=1;
|
||||||
ParamsDirF.partialDirichlet=1;
|
ParamsDirF.partialDirichlet=1;
|
||||||
|
std::cout << GridLogMessage<< "Partial Dirichlet depth is "<<dwf_compressor_depth<<std::endl;
|
||||||
|
|
||||||
// double StoppingCondition = 1e-14;
|
// double StoppingCondition = 1e-14;
|
||||||
// double MDStoppingCondition = 1e-9;
|
// double MDStoppingCondition = 1e-9;
|
||||||
@ -424,7 +420,7 @@ int main(int argc, char **argv) {
|
|||||||
ActionCGL, ActionCGR,
|
ActionCGL, ActionCGR,
|
||||||
DerivativeCGL, DerivativeCGR,
|
DerivativeCGL, DerivativeCGR,
|
||||||
SFRp, true);
|
SFRp, true);
|
||||||
// Level2.push_back(&EOFA);
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// up down action
|
// up down action
|
||||||
@ -449,17 +445,15 @@ int main(int argc, char **argv) {
|
|||||||
std::vector<FermionAction *> Denominators;
|
std::vector<FermionAction *> Denominators;
|
||||||
std::vector<FermionActionF *> NumeratorsF;
|
std::vector<FermionActionF *> NumeratorsF;
|
||||||
std::vector<FermionActionF *> DenominatorsF;
|
std::vector<FermionActionF *> DenominatorsF;
|
||||||
std::vector<FermionActionD2 *> NumeratorsD2;
|
|
||||||
std::vector<FermionActionD2 *> DenominatorsD2;
|
|
||||||
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||||
std::vector<MxPCG *> ActionMPCG;
|
std::vector<MxPCG *> ActionMPCG;
|
||||||
std::vector<MxPCG *> MPCG;
|
std::vector<MxPCG *> MPCG;
|
||||||
|
|
||||||
#define MIXED_PRECISION
|
#define MIXED_PRECISION
|
||||||
#ifdef MIXED_PRECISION
|
#ifdef MIXED_PRECISION
|
||||||
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2> *> Bdys;
|
std::vector<GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy> *> Bdys;
|
||||||
#else
|
#else
|
||||||
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
std::vector<GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||||
@ -532,31 +526,19 @@ int main(int argc, char **argv) {
|
|||||||
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
|
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
|
||||||
} else {
|
} else {
|
||||||
#ifdef MIXED_PRECISION
|
#ifdef MIXED_PRECISION
|
||||||
// Use the D2 data types and make them use same grid as single
|
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
|
||||||
FermionActionD2::ImplParams ParamsDenD2(boundary);
|
|
||||||
FermionActionD2::ImplParams ParamsNumD2(boundary);
|
|
||||||
|
|
||||||
ParamsDenD2.dirichlet = ParamsDen.dirichlet;
|
|
||||||
ParamsDenD2.partialDirichlet = ParamsDen.partialDirichlet;
|
|
||||||
DenominatorsD2.push_back(new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenD2));
|
|
||||||
|
|
||||||
ParamsNumD2.dirichlet = ParamsNum.dirichlet;
|
|
||||||
ParamsNumD2.partialDirichlet = ParamsNum.partialDirichlet;
|
|
||||||
NumeratorsD2.push_back (new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumD2));
|
|
||||||
|
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
|
||||||
*Numerators[h],*Denominators[h],
|
*Numerators[h],*Denominators[h],
|
||||||
*NumeratorsF[h],*DenominatorsF[h],
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
*NumeratorsD2[h],*DenominatorsD2[h],
|
*Numerators[h],*Denominators[h],
|
||||||
OFRp, 400) );
|
OFRp, SP_iters) );
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
|
Bdys.push_back( new GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicy>(
|
||||||
*Numerators[h],*Denominators[h],
|
*Numerators[h],*Denominators[h],
|
||||||
*NumeratorsF[h],*DenominatorsF[h],
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
*NumeratorsD2[h],*DenominatorsD2[h],
|
*Numerators[h],*Denominators[h],
|
||||||
OFRp, 400) );
|
OFRp, SP_iters) );
|
||||||
#else
|
#else
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
Bdys.push_back( new GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ int main(int argc, char **argv) {
|
|||||||
// 4/2 => 0.6 dH
|
// 4/2 => 0.6 dH
|
||||||
// 3/3 => 0.8 dH .. depth 3, slower
|
// 3/3 => 0.8 dH .. depth 3, slower
|
||||||
//MD.MDsteps = 4;
|
//MD.MDsteps = 4;
|
||||||
MD.MDsteps = 3;
|
MD.MDsteps = 12;
|
||||||
MD.trajL = 0.5;
|
MD.trajL = 0.5;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
@ -200,8 +200,8 @@ int main(int argc, char **argv) {
|
|||||||
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
CheckpointerParameters CPparams;
|
CheckpointerParameters CPparams;
|
||||||
CPparams.config_prefix = "ckpoint_DDHMC_lat";
|
CPparams.config_prefix = "ckpoint_HMC_lat";
|
||||||
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
|
CPparams.rng_prefix = "ckpoint_HMC_rng";
|
||||||
CPparams.saveInterval = 1;
|
CPparams.saveInterval = 1;
|
||||||
CPparams.format = "IEEE64BIG";
|
CPparams.format = "IEEE64BIG";
|
||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
@ -228,7 +228,7 @@ int main(int argc, char **argv) {
|
|||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.01, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
std::vector<Real> hasenbusch({ 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||||
|
|
||||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
@ -299,8 +299,8 @@ int main(int argc, char **argv) {
|
|||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Collect actions
|
// Collect actions
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
// ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level2(3);
|
ActionLevel<HMCWrapper::Field> Level2(1);
|
||||||
ActionLevel<HMCWrapper::Field> Level3(15);
|
ActionLevel<HMCWrapper::Field> Level3(15);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
@ -369,7 +369,7 @@ int main(int argc, char **argv) {
|
|||||||
ActionCGL, ActionCGR,
|
ActionCGL, ActionCGR,
|
||||||
DerivativeCGL, DerivativeCGR,
|
DerivativeCGL, DerivativeCGR,
|
||||||
SFRp, true);
|
SFRp, true);
|
||||||
// Level2.push_back(&EOFA);
|
Level2.push_back(&EOFA);
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// up down action
|
// up down action
|
||||||
@ -477,7 +477,7 @@ int main(int argc, char **argv) {
|
|||||||
// Gauge action
|
// Gauge action
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
Level3.push_back(&GaugeAction);
|
Level3.push_back(&GaugeAction);
|
||||||
TheHMC.TheAction.push_back(Level1);
|
// TheHMC.TheAction.push_back(Level1);
|
||||||
TheHMC.TheAction.push_back(Level2);
|
TheHMC.TheAction.push_back(Level2);
|
||||||
TheHMC.TheAction.push_back(Level3);
|
TheHMC.TheAction.push_back(Level3);
|
||||||
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
10
TODO
10
TODO
@ -1,3 +1,12 @@
|
|||||||
|
- - Slice sum optimisation & A2A - atomic addition
|
||||||
|
- - Also faster non-atomic reduction
|
||||||
|
- - Remaining PRs
|
||||||
|
- - DDHMC
|
||||||
|
- - MixedPrec is the action eval, high precision
|
||||||
|
- - MixedPrecCleanup is the force eval, low precision
|
||||||
|
|
||||||
|
=================
|
||||||
|
=================
|
||||||
Lattice_basis.h -- > HIP and SYCL GPU code
|
Lattice_basis.h -- > HIP and SYCL GPU code
|
||||||
|
|
||||||
|
|
||||||
@ -8,6 +17,7 @@ DDHMC
|
|||||||
-- Multishift Mixed Precision - DONE
|
-- Multishift Mixed Precision - DONE
|
||||||
-- Pole dependent residual - DONE
|
-- Pole dependent residual - DONE
|
||||||
|
|
||||||
|
|
||||||
=======
|
=======
|
||||||
-- comms threads issue??
|
-- comms threads issue??
|
||||||
-- Part done: Staggered kernel performance on GPU
|
-- Part done: Staggered kernel performance on GPU
|
||||||
|
189
benchmarks/Benchmark_prec_change.cc
Normal file
189
benchmarks/Benchmark_prec_change.cc
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./benchmarks/Benchmark_prec_change.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
int Ls = 12;
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
|
|
||||||
|
GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD);
|
||||||
|
GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
|
||||||
|
GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
|
||||||
|
|
||||||
|
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||||
|
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
|
||||||
|
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG4(UGridD); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
LatticeFermionD field_d(FGridD), tmp_d(FGridD);
|
||||||
|
random(RNG5,field_d); tmp_d = field_d;
|
||||||
|
|
||||||
|
LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF);
|
||||||
|
precisionChange(field_d2, field_d); tmp_d2 = field_d2;
|
||||||
|
|
||||||
|
LatticeFermionF field_f(FGridF), tmp_f(FGridF);
|
||||||
|
precisionChange(field_f, field_d); tmp_f = field_f;
|
||||||
|
|
||||||
|
int N = 500;
|
||||||
|
|
||||||
|
double time_ds = 0, time_sd = 0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Benchmarking single<->double original implementation (fields initially device-resident)" << std::endl;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
//We want to benchmark the typical scenario of both fields being device resident
|
||||||
|
//To do this, invoke an operation that will open a device view and touch all sites
|
||||||
|
//with a write operation that invalidates the CPU copy
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
precisionChangeOrig(field_d,field_f);
|
||||||
|
double stop=usecond();
|
||||||
|
time_sd += stop - start;
|
||||||
|
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
start=usecond();
|
||||||
|
precisionChangeOrig(field_f,field_d);
|
||||||
|
stop=usecond();
|
||||||
|
time_ds += stop - start;
|
||||||
|
}
|
||||||
|
std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid());
|
||||||
|
precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid());
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Benchmarking single<->double with pregenerated workspace(fields initially device-resident)" << std::endl;
|
||||||
|
time_sd = time_ds = 0;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
precisionChange(field_d,field_f, wk_sp_to_dp);
|
||||||
|
double stop=usecond();
|
||||||
|
time_sd += stop - start;
|
||||||
|
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
start=usecond();
|
||||||
|
precisionChange(field_f,field_d, wk_dp_to_sp);
|
||||||
|
stop=usecond();
|
||||||
|
time_ds += stop - start;
|
||||||
|
}
|
||||||
|
std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Benchmarking single<->double with workspace generated on-the-fly (fields initially device-resident)" << std::endl;
|
||||||
|
time_sd = time_ds = 0;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
precisionChange(field_d,field_f);
|
||||||
|
double stop=usecond();
|
||||||
|
time_sd += stop - start;
|
||||||
|
|
||||||
|
field_d = tmp_d;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
start=usecond();
|
||||||
|
precisionChange(field_f,field_d);
|
||||||
|
stop=usecond();
|
||||||
|
time_ds += stop - start;
|
||||||
|
|
||||||
|
}
|
||||||
|
std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Benchmarking single<->double2 (fields initially device-resident)" << std::endl;
|
||||||
|
time_sd = time_ds = 0;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
field_d2 = tmp_d2;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
precisionChangeFast(field_d2,field_f);
|
||||||
|
double stop=usecond();
|
||||||
|
time_sd += stop - start;
|
||||||
|
|
||||||
|
field_d2 = tmp_d2;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
start=usecond();
|
||||||
|
precisionChangeFast(field_f,field_d2);
|
||||||
|
stop=usecond();
|
||||||
|
time_ds += stop - start;
|
||||||
|
}
|
||||||
|
std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Benchmarking single<->double2 through standard precisionChange call(fields initially device-resident) [NB: perf should be the same as the previous test!]" << std::endl;
|
||||||
|
time_sd = time_ds = 0;
|
||||||
|
for(int i=0;i<N;i++){
|
||||||
|
field_d2 = tmp_d2;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
precisionChange(field_d2,field_f);
|
||||||
|
double stop=usecond();
|
||||||
|
time_sd += stop - start;
|
||||||
|
|
||||||
|
field_d2 = tmp_d2;
|
||||||
|
field_f = tmp_f;
|
||||||
|
|
||||||
|
start=usecond();
|
||||||
|
precisionChange(field_f,field_d2);
|
||||||
|
stop=usecond();
|
||||||
|
time_ds += stop - start;
|
||||||
|
}
|
||||||
|
std::cout << "d->s " << time_ds/N << "us" << " s->d " << time_sd/N << "us" << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -1,12 +1,13 @@
|
|||||||
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||||
../../configure --enable-comms=mpi-auto \
|
../../configure --enable-comms=mpi-auto \
|
||||||
--with-lime=$CLIME \
|
--with-lime=$CLIME \
|
||||||
--enable-unified=no \
|
--enable-unified=yes \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-tracing=timer \
|
--enable-tracing=timer \
|
||||||
--enable-accelerator=hip \
|
--enable-accelerator=hip \
|
||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-simd=GPU \
|
--enable-simd=GPU \
|
||||||
|
--disable-accelerator-cshift \
|
||||||
--with-gmp=$OLCF_GMP_ROOT \
|
--with-gmp=$OLCF_GMP_ROOT \
|
||||||
--with-fftw=$FFTW_DIR/.. \
|
--with-fftw=$FFTW_DIR/.. \
|
||||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||||
|
@ -23,12 +23,7 @@ export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1
|
|||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
|
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0
|
||||||
|
|
||||||
for i in 0
|
#mpiexec -launcher ssh -n 1 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid 32.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0 > 1tile.log
|
||||||
do
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
|
|
||||||
mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 --device-mem 32768
|
|
||||||
done
|
|
||||||
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 1.1.1.2 --grid 32.32.32.64 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.1x2.log
|
|
||||||
#mpiexec -launcher ssh -n 2 -host localhost ./wrap4gpu.sh ./Benchmark_halo --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --shm-mpi 1 > halo.2tile.2x1.log
|
|
||||||
|
|
||||||
|
mpiexec -launcher ssh -n 2 -host localhost ./wrap.sh ./Benchmark_dwf_fp32 --mpi 2.1.1.1 --grid 64.32.32.32 --accelerator-threads $NT --comms-sequential --shm-mpi 0
|
||||||
|
|
||||||
|
@ -14,4 +14,3 @@ INSTALL=/nfs/site/home/paboylx/prereqs/
|
|||||||
LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \
|
LDFLAGS="-fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L$INSTALL/lib" \
|
||||||
CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare"
|
CXXFLAGS="-fsycl-unnamed-lambda -fsycl -no-fma -I$INSTALL/include -Wno-tautological-compare"
|
||||||
|
|
||||||
|
|
||||||
|
124
tests/core/Test_prec_change.cc
Normal file
124
tests/core/Test_prec_change.cc
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/core/Test_prec_change.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
int Ls = 12;
|
||||||
|
Coordinate latt4 = GridDefaultLatt();
|
||||||
|
|
||||||
|
GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridD = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridD);
|
||||||
|
GridCartesian * FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
|
||||||
|
GridRedBlackCartesian * FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
|
||||||
|
|
||||||
|
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||||
|
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
|
||||||
|
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
|
||||||
|
GridParallelRNG RNG5(FGridD); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG5F(FGridF); RNG5F.SeedFixedIntegers(seeds5);
|
||||||
|
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
|
||||||
|
|
||||||
|
LatticeFermionD field_d(FGridD), tmp_d(FGridD);
|
||||||
|
random(RNG5,field_d);
|
||||||
|
RealD norm2_field_d = norm2(field_d);
|
||||||
|
|
||||||
|
LatticeFermionD2 field_d2(FGridF), tmp_d2(FGridF);
|
||||||
|
random(RNG5F,field_d2);
|
||||||
|
RealD norm2_field_d2 = norm2(field_d2);
|
||||||
|
|
||||||
|
LatticeFermionF field_f(FGridF);
|
||||||
|
|
||||||
|
//Test original implementation
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "Testing original implementation" << std::endl;
|
||||||
|
field_f = Zero();
|
||||||
|
precisionChangeOrig(field_f,field_d);
|
||||||
|
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
tmp_d = Zero();
|
||||||
|
precisionChangeOrig(tmp_d, field_f);
|
||||||
|
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
}
|
||||||
|
//Test new implementation with pregenerated workspace
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "Testing new implementation with pregenerated workspace" << std::endl;
|
||||||
|
precisionChangeWorkspace wk_sp_to_dp(field_d.Grid(),field_f.Grid());
|
||||||
|
precisionChangeWorkspace wk_dp_to_sp(field_f.Grid(),field_d.Grid());
|
||||||
|
|
||||||
|
field_f = Zero();
|
||||||
|
precisionChange(field_f,field_d,wk_dp_to_sp);
|
||||||
|
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
tmp_d = Zero();
|
||||||
|
precisionChange(tmp_d, field_f,wk_sp_to_dp);
|
||||||
|
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
}
|
||||||
|
//Test new implementation without pregenerated workspace
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "Testing new implementation without pregenerated workspace" << std::endl;
|
||||||
|
field_f = Zero();
|
||||||
|
precisionChange(field_f,field_d);
|
||||||
|
RealD Ndiff = (norm2_field_d - norm2(field_f))/norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
tmp_d = Zero();
|
||||||
|
precisionChange(tmp_d, field_f);
|
||||||
|
Ndiff = norm2( LatticeFermionD(tmp_d-field_d) ) / norm2_field_d;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
}
|
||||||
|
//Test fast implementation
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage << "Testing fast (double2) implementation" << std::endl;
|
||||||
|
field_f = Zero();
|
||||||
|
precisionChangeFast(field_f,field_d2);
|
||||||
|
RealD Ndiff = (norm2_field_d2 - norm2(field_f))/norm2_field_d2;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of single and double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
tmp_d2 = Zero();
|
||||||
|
precisionChangeFast(tmp_d2, field_f);
|
||||||
|
Ndiff = norm2( LatticeFermionD2(tmp_d2-field_d2) ) / norm2_field_d2;
|
||||||
|
std::cout << GridLogMessage << (fabs(Ndiff) > 1e-05 ? "!!FAIL" : "Pass") << ": relative norm2 of back-converted and original double prec fields differs by " << Ndiff << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Done" << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
305
tests/forces/Test_bdy.cc
Normal file
305
tests/forces/Test_bdy.cc
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
2f Full det MdagM 10^6 force ~ 1.3e7
|
||||||
|
rid : Message : 1767.283471 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
Grid : Message : 1767.283476 s : S1 : 1.52885e+09
|
||||||
|
Grid : Message : 1767.283480 s : S2 : 1.52886e+09
|
||||||
|
Grid : Message : 1767.283482 s : dS : 8877.34
|
||||||
|
Grid : Message : 1767.283483 s : dSpred : 8877.7
|
||||||
|
Grid : Message : 1767.283484 s : diff : -0.360484
|
||||||
|
Grid : Message : 1767.283485 s : *********************************************************
|
||||||
|
|
||||||
|
2f Full det MpcdagMpc 10^6 force ~ 1.8e6
|
||||||
|
Grid : Message : 2399.576962 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
Grid : Message : 2399.576968 s : S1 : 1.52885e+09
|
||||||
|
Grid : Message : 2399.576972 s : S2 : 1.52886e+09
|
||||||
|
Grid : Message : 2399.576974 s : dS : 9728.49
|
||||||
|
Grid : Message : 2399.576975 s : dSpred : 9726.58
|
||||||
|
Grid : Message : 2399.576976 s : diff : 1.90683
|
||||||
|
Grid : Message : 2399.576977 s : *********************************************************
|
||||||
|
|
||||||
|
2f bdy MdagM 1500 force Force ~ 2800
|
||||||
|
Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
Grid : Message : 4622.385067 s : S1 : 1.52885e+09
|
||||||
|
Grid : Message : 4622.385071 s : S2 : 1.52885e+09
|
||||||
|
Grid : Message : 4622.385072 s : dS : 25.4944
|
||||||
|
Grid : Message : 4622.385073 s : dSpred : 25.4672
|
||||||
|
Grid : Message : 4622.385074 s : diff : 0.0271414
|
||||||
|
Grid : Message : 4622.385075 s : *********************************************************
|
||||||
|
|
||||||
|
2f bdy MpcdagMpc 10^6 force Force ~ 2200
|
||||||
|
Grid : Message : 4622.385061 s : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
Grid : Message : 4622.385067 s : S1 : 1.52885e+09
|
||||||
|
Grid : Message : 4622.385071 s : S2 : 1.52885e+09
|
||||||
|
Grid : Message : 4622.385072 s : dS : 25.4944
|
||||||
|
Grid : Message : 4622.385073 s : dSpred : 25.4672
|
||||||
|
Grid : Message : 4622.385074 s : diff : 0.0271414
|
||||||
|
Grid : Message : 4622.385075 s : *********************************************************
|
||||||
|
|
||||||
|
1f Bdy Det
|
||||||
|
Optimisation log: looser rational AND MD tolerances sloppy
|
||||||
|
MobiusForce.221179 -- same as HMC. dS is mispredicted Forece ~2.8
|
||||||
|
Grid : Message : 6582.258991 s : dS : 0.024478
|
||||||
|
Grid : Message : 6582.258992 s : dSpred : 0.00791876
|
||||||
|
Grid : Message : 6582.258994 s : diff : 0.0165592
|
||||||
|
|
||||||
|
MobiusForce.221193 -- tight rational AND MD tolerances to 1e-8 ~ 2.8 same
|
||||||
|
Grid : Message : 1964.939209 s : S1 : 7.64404e+08
|
||||||
|
Grid : Message : 1964.939213 s : S2 : 7.64404e+08
|
||||||
|
Grid : Message : 1964.939215 s : dS : -0.00775838 <--- too loose even on action
|
||||||
|
Grid : Message : 1964.939216 s : dSpred : -0.00416793
|
||||||
|
Grid : Message : 1964.939217 s : diff : -0.00359045
|
||||||
|
|
||||||
|
MobiusForce.221394 -- looser rational, MD tol 1e-8 ~ 2.8 same
|
||||||
|
Grid : Message : 1198.346720 s : S1 : 764404649.48886
|
||||||
|
Grid : Message : 1198.346760 s : S2 : 764404649.5133
|
||||||
|
Grid : Message : 1198.346780 s : dS : 0.024440884590149
|
||||||
|
Grid : Message : 1198.346800 s : dSpred : 0.0079145154465184
|
||||||
|
Grid : Message : 1198.346810 s : diff : 0.016526369143631
|
||||||
|
|
||||||
|
MobiusForce.221394 -- tight rational, MD tol sloppy Force ~ 2.8
|
||||||
|
Grid : Message : 2376.921950 s : S1 : 764404436.44069
|
||||||
|
Grid : Message : 2376.921954 s : S2 : 764404436.43299
|
||||||
|
Grid : Message : 2376.921956 s : dS : -0.0076971054077148
|
||||||
|
Grid : Message : 2376.921958 s : dSpred : -0.0041610472282526
|
||||||
|
Grid : Message : 2376.921959 s : diff : -0.0035360581794623
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
//
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_double_ratio.cc
|
||||||
|
|
||||||
|
Copyright (C) 2022
|
||||||
|
|
||||||
|
Author: Peter Boyle <pboyle@bnl.gov>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
typedef MobiusFermionD FermionAction;
|
||||||
|
typedef WilsonImplD FimplD;
|
||||||
|
typedef WilsonImplD FermionImplPolicy;
|
||||||
|
|
||||||
|
template<class Gimpl>
|
||||||
|
void ForceTest(Action<LatticeGaugeField> &action,LatticeGaugeField & U,MomentumFilterBase<LatticeGaugeField> &Filter)
|
||||||
|
{
|
||||||
|
GridBase *UGrid = U.Grid();
|
||||||
|
|
||||||
|
std::vector<int> seeds({1,2,3,5});
|
||||||
|
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
LatticeColourMatrix Pmu(UGrid);
|
||||||
|
LatticeGaugeField P(UGrid);
|
||||||
|
LatticeGaugeField UdSdU(UGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Force test for "<<action.action_name()<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
|
||||||
|
|
||||||
|
RealD eps=0.005;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Refresh "<<action.action_name()<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
|
||||||
|
Gimpl::generate_momenta(P,sRNG,RNG4);
|
||||||
|
Filter.applyFilter(P);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("./ckpoint_lat.2000");
|
||||||
|
NerscIO::readConfiguration(U,header,file);
|
||||||
|
#else
|
||||||
|
U = 1.0;
|
||||||
|
#endif
|
||||||
|
action.refresh(U,sRNG,RNG4);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
|
||||||
|
RealD S1 = action.S(U);
|
||||||
|
|
||||||
|
Gimpl::update_field(P,U,eps);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Derivative "<<action.action_name()<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
action.deriv(U,UdSdU);
|
||||||
|
UdSdU = Ta(UdSdU);
|
||||||
|
Filter.applyFilter(UdSdU);
|
||||||
|
|
||||||
|
DumpSliceNorm("Force",UdSdU,Nd-1);
|
||||||
|
|
||||||
|
Gimpl::update_field(P,U,eps);
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Action "<<action.action_name()<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
|
||||||
|
RealD S2 = action.S(U);
|
||||||
|
|
||||||
|
// Use the derivative
|
||||||
|
LatticeComplex dS(UGrid); dS = Zero();
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
auto UdSdUmu = PeekIndex<LorentzIndex>(UdSdU,mu);
|
||||||
|
Pmu= PeekIndex<LorentzIndex>(P,mu);
|
||||||
|
dS = dS - trace(Pmu*UdSdUmu)*eps*2.0*2.0;
|
||||||
|
}
|
||||||
|
ComplexD dSpred = sum(dS);
|
||||||
|
RealD diff = S2-S1-dSpred.real();
|
||||||
|
|
||||||
|
std::cout<< GridLogMessage << "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++"<<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "S1 : "<< S1 <<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "S2 : "<< S2 <<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "dS : "<< S2-S1 <<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "dSpred : "<< dSpred.real() <<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "diff : "<< diff<<std::endl;
|
||||||
|
std::cout<< GridLogMessage << "*********************************************************"<<std::endl;
|
||||||
|
// assert(diff<1.0);
|
||||||
|
std::cout<< GridLogMessage << "Done" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "*********************************************************"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
std::cout << std::setprecision(14);
|
||||||
|
Coordinate latt_size = GridDefaultLatt();
|
||||||
|
Coordinate mpi_layout = GridDefaultMpi();
|
||||||
|
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
Coordinate shm;
|
||||||
|
GlobalSharedMemory::GetShmDims(mpi_layout,shm);
|
||||||
|
|
||||||
|
const int Ls=12;
|
||||||
|
const int Nt = latt_size[3];
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Domain decomposed operator
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate CommDim(Nd);
|
||||||
|
for(int d=0;d<Nd;d++) CommDim[d]= (mpi_layout[d]/shm[d])>1 ? 1 : 0;
|
||||||
|
|
||||||
|
Coordinate NonDirichlet(Nd+1,0);
|
||||||
|
Coordinate Dirichlet(Nd+1,0);
|
||||||
|
Dirichlet[1] = CommDim[0]*latt_size[0]/mpi_layout[0] * shm[0];
|
||||||
|
Dirichlet[2] = CommDim[1]*latt_size[1]/mpi_layout[1] * shm[1];
|
||||||
|
Dirichlet[3] = CommDim[2]*latt_size[2]/mpi_layout[2] * shm[2];
|
||||||
|
Dirichlet[4] = CommDim[3]*latt_size[3]/mpi_layout[3] * shm[3];
|
||||||
|
|
||||||
|
Coordinate Block4(Nd);
|
||||||
|
Block4[0] = Dirichlet[1];
|
||||||
|
Block4[1] = Dirichlet[2];
|
||||||
|
Block4[2] = Dirichlet[3];
|
||||||
|
Block4[3] = Dirichlet[4];
|
||||||
|
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionAction::ImplParams Params(boundary);
|
||||||
|
FermionAction::ImplParams ParamsDir(boundary);
|
||||||
|
Params.dirichlet=NonDirichlet;
|
||||||
|
ParamsDir.dirichlet=Dirichlet;
|
||||||
|
ParamsDir.partialDirichlet=1;
|
||||||
|
|
||||||
|
///////////////////// Gauge Field and Gauge Forces ////////////////////////////
|
||||||
|
LatticeGaugeField U(UGrid);
|
||||||
|
|
||||||
|
RealD beta=6.0;
|
||||||
|
WilsonGaugeActionR PlaqAction(beta);
|
||||||
|
IwasakiGaugeActionR RectAction(beta);
|
||||||
|
|
||||||
|
MomentumFilterNone<LatticeGaugeField> FilterNone;
|
||||||
|
ForceTest<GimplTypesR>(PlaqAction,U,FilterNone);
|
||||||
|
ForceTest<GimplTypesR>(RectAction,U,FilterNone);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Action
|
||||||
|
////////////////////////////////////
|
||||||
|
RealD mass=0.00078;
|
||||||
|
RealD pvmass=1.0;
|
||||||
|
RealD M5=1.8;
|
||||||
|
RealD b=1.5;
|
||||||
|
RealD c=0.5;
|
||||||
|
|
||||||
|
// Double versions
|
||||||
|
FermionAction DdwfPeriodic(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,Params);
|
||||||
|
FermionAction PVPeriodic (U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,pvmass,M5,b,c,Params);
|
||||||
|
FermionAction DdwfDirichlet(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,ParamsDir);
|
||||||
|
|
||||||
|
double StoppingCondition = 1.0e-8;
|
||||||
|
double MaxCGIterations = 50000;
|
||||||
|
ConjugateGradient<LatticeFermion> CG(StoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
|
//////////////////// Two Flavour Determinant Ratio ///////////////////////////////
|
||||||
|
TwoFlavourRatioPseudoFermionAction<FimplD> Nf2(PVPeriodic, DdwfPeriodic,CG,CG);
|
||||||
|
// ForceTest<GimplTypesR>(Nf2,U,FilterNone);
|
||||||
|
|
||||||
|
//////////////////// Two Flavour Determinant force test Even Odd ///////////////////////////////
|
||||||
|
TwoFlavourEvenOddRatioPseudoFermionAction<FimplD> Nf2eo(PVPeriodic, DdwfPeriodic,CG,CG);
|
||||||
|
// ForceTest<GimplTypesR>(Nf2eo,U,FilterNone);
|
||||||
|
|
||||||
|
//////////////////// Domain forces ////////////////////
|
||||||
|
int Width=4;
|
||||||
|
DDHMCFilter<WilsonImplD::Field> DDHMCFilter(Block4,Width);
|
||||||
|
|
||||||
|
//////////////////// Two flavour boundary det ////////////////////
|
||||||
|
TwoFlavourRatioPseudoFermionAction<FimplD> BdyNf2(DdwfDirichlet, DdwfPeriodic,CG,CG);
|
||||||
|
// ForceTest<GimplTypesR>(BdyNf2,U,DDHMCFilter);
|
||||||
|
|
||||||
|
//////////////////// Two flavour eo boundary det ////////////////////
|
||||||
|
TwoFlavourEvenOddRatioPseudoFermionAction<FimplD> BdyNf2eo(DdwfDirichlet, DdwfPeriodic,CG,CG);
|
||||||
|
// ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
|
||||||
|
|
||||||
|
//////////////////// One flavour boundary det ////////////////////
|
||||||
|
OneFlavourRationalParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 4.0e-5;
|
||||||
|
OFRp.hi = 90.0;
|
||||||
|
OFRp.MaxIter = 60000;
|
||||||
|
OFRp.tolerance= 1.0e-8;
|
||||||
|
OFRp.mdtolerance= 1.0e-6;
|
||||||
|
OFRp.degree = 18;
|
||||||
|
OFRp.precision= 80;
|
||||||
|
OFRp.BoundsCheckFreq=0;
|
||||||
|
std::vector<RealD> ActionTolByPole({
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
std::vector<RealD> MDTolByPole({
|
||||||
|
1.0e-6,3.0e-7,1.0e-7,1.0e-7, // Orig sloppy
|
||||||
|
// 1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp);
|
||||||
|
ForceTest<GimplTypesR>(BdySqrt,U,DDHMCFilter);
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -476,6 +476,20 @@ int main (int argc, char ** argv)
|
|||||||
// ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
|
// ForceTest<GimplTypesR>(BdyNf2eo,U,DDHMCFilter);
|
||||||
|
|
||||||
//////////////////// One flavour boundary det ////////////////////
|
//////////////////// One flavour boundary det ////////////////////
|
||||||
|
RationalActionParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 6.0e-5;
|
||||||
|
OFRp.hi = 90.0;
|
||||||
|
OFRp.inv_pow = 2;
|
||||||
|
OFRp.MaxIter = SP_iters; // get most shifts by 2000, stop sharing space
|
||||||
|
OFRp.action_tolerance= 1.0e-8;
|
||||||
|
OFRp.action_degree = 18;
|
||||||
|
OFRp.md_tolerance= 1.0e-5;
|
||||||
|
OFRp.md_degree = 14;
|
||||||
|
// OFRp.degree = 20; converges
|
||||||
|
// OFRp.degree = 16;
|
||||||
|
OFRp.precision= 80;
|
||||||
|
OFRp.BoundsCheckFreq=0;
|
||||||
|
/*
|
||||||
OneFlavourRationalParams OFRp; // Up/down
|
OneFlavourRationalParams OFRp; // Up/down
|
||||||
OFRp.lo = 4.0e-5;
|
OFRp.lo = 4.0e-5;
|
||||||
OFRp.hi = 90.0;
|
OFRp.hi = 90.0;
|
||||||
@ -485,6 +499,23 @@ int main (int argc, char ** argv)
|
|||||||
OFRp.degree = 18;
|
OFRp.degree = 18;
|
||||||
OFRp.precision= 80;
|
OFRp.precision= 80;
|
||||||
OFRp.BoundsCheckFreq=0;
|
OFRp.BoundsCheckFreq=0;
|
||||||
|
*/
|
||||||
|
std::vector<RealD> ActionTolByPole({
|
||||||
|
1.0e-7,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
std::vector<RealD> MDTolByPole({
|
||||||
|
1.6e-5,5.0e-6,1.0e-6,3.0e-7, // soften convergence more more
|
||||||
|
// 1.0e-6,3.0e-7,1.0e-7,1.0e-7,
|
||||||
|
// 3.0e-6,1.0e-6,1.0e-7,1.0e-7, // soften convergence
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
|
1.0e-8,1.0e-8
|
||||||
|
});
|
||||||
|
/*
|
||||||
std::vector<RealD> ActionTolByPole({
|
std::vector<RealD> ActionTolByPole({
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
@ -499,9 +530,9 @@ int main (int argc, char ** argv)
|
|||||||
// 1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
// 1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
||||||
1.0e-8,1.0e-8,1.0e-8,1.0e-8,
|
|
||||||
1.0e-8,1.0e-8
|
1.0e-8,1.0e-8
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp);
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> BdySqrt(DdwfDirichlet,DdwfPeriodic,OFRp);
|
||||||
BdySqrt.SetTolerances(ActionTolByPole,MDTolByPole);
|
BdySqrt.SetTolerances(ActionTolByPole,MDTolByPole);
|
||||||
ForceTest<GimplTypesR>(BdySqrt,U,DDHMCFilter);
|
ForceTest<GimplTypesR>(BdySqrt,U,DDHMCFilter);
|
||||||
|
73
tests/lanczos/Test_dwf_block_lanczos.README
Normal file
73
tests/lanczos/Test_dwf_block_lanczos.README
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#Example script
|
||||||
|
DIR=/gpfs/alpine/phy157/proj-shared/phy157dwf/chulwoo/Grid/BL/build/tests/lanczos
|
||||||
|
BIN=${DIR}/Test_dwf_block_lanczos
|
||||||
|
|
||||||
|
VOL='--grid 16.16.16.32 '
|
||||||
|
GRID='--mpi 1.1.1.4 '
|
||||||
|
CONF='--gconf ckpoint_lat.IEEE64BIG.2000 '
|
||||||
|
OPT='--mass 0.01 --M5 1.8 --phase in.params --omega in.params --shm 4096'
|
||||||
|
#BL='--rbl 16.1024.128.1000.10 --split 1.1.4.4 --check_int 100 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51'
|
||||||
|
BL='--rbl 4.128.16.100.10 --split 1.1.1.4 --check_int 25 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51'
|
||||||
|
|
||||||
|
ARGS=${CONF}" "${OPT}" "${BL}" "${VOL}" "${GRID}
|
||||||
|
export APP="${BIN} ${ARGS}"
|
||||||
|
echo APP=${APP}
|
||||||
|
#export JS="jsrun --nrs 32 -a4 -g4 -c42 -dpacked -b packed:7 --smpiargs="-gpu" "
|
||||||
|
export JS="jsrun --nrs 1 -a4 -g4 -c42 -dpacked -b packed:10 --smpiargs="-gpu" "
|
||||||
|
$JS $APP
|
||||||
|
|
||||||
|
#sample in.param
|
||||||
|
|
||||||
|
boundary_phase 0 1 0
|
||||||
|
boundary_phase 1 1 0
|
||||||
|
boundary_phase 2 1 0
|
||||||
|
boundary_phase 3 -1 0
|
||||||
|
|
||||||
|
omega 0 0.5 0
|
||||||
|
omega 1 0.5 0
|
||||||
|
omega 2 0.5 0
|
||||||
|
omega 3 0.5 0
|
||||||
|
omega 4 0.5 0
|
||||||
|
omega 5 0.5 0
|
||||||
|
omega 6 0.5 0
|
||||||
|
omega 7 0.5 0
|
||||||
|
omega 8 0.5 0
|
||||||
|
omega 9 0.5 0
|
||||||
|
omega 10 0.5 0
|
||||||
|
omega 11 0.5 0
|
||||||
|
|
||||||
|
|
||||||
|
#output
|
||||||
|
|
||||||
|
Grid : Message : 1.717474 s : Gauge Configuration ckpoint_lat.IEEE64BIG.2000
|
||||||
|
Grid : Message : 1.717478 s : boundary_phase[0] = (1,0)
|
||||||
|
Grid : Message : 1.717497 s : boundary_phase[1] = (1,0)
|
||||||
|
Grid : Message : 1.717500 s : boundary_phase[2] = (1,0)
|
||||||
|
Grid : Message : 1.717503 s : boundary_phase[3] = (-1,0)
|
||||||
|
Grid : Message : 1.717506 s : Ls 12
|
||||||
|
Grid : Message : 1.717507 s : mass 0.01
|
||||||
|
Grid : Message : 1.717510 s : M5 1.8
|
||||||
|
Grid : Message : 1.717512 s : mob_b 1.5
|
||||||
|
Grid : Message : 1.717514 s : omega[0] = (0.5,0)
|
||||||
|
Grid : Message : 1.717517 s : omega[1] = (0.5,0)
|
||||||
|
Grid : Message : 1.717520 s : omega[2] = (0.5,0)
|
||||||
|
Grid : Message : 1.717523 s : omega[3] = (0.5,0)
|
||||||
|
Grid : Message : 1.717526 s : omega[4] = (0.5,0)
|
||||||
|
Grid : Message : 1.717529 s : omega[5] = (0.5,0)
|
||||||
|
Grid : Message : 1.717532 s : omega[6] = (0.5,0)
|
||||||
|
Grid : Message : 1.717535 s : omega[7] = (0.5,0)
|
||||||
|
Grid : Message : 1.717538 s : omega[8] = (0.5,0)
|
||||||
|
Grid : Message : 1.717541 s : omega[9] = (0.5,0)
|
||||||
|
Grid : Message : 1.717544 s : omega[10] = (0.5,0)
|
||||||
|
Grid : Message : 1.717547 s : omega[11] = (0.5,0)
|
||||||
|
Grid : Message : 1.717550 s : Nu 4
|
||||||
|
Grid : Message : 1.717551 s : Nk 128
|
||||||
|
Grid : Message : 1.717552 s : Np 16
|
||||||
|
Grid : Message : 1.717553 s : Nm 288
|
||||||
|
Grid : Message : 1.717554 s : Nstop 100
|
||||||
|
Grid : Message : 1.717555 s : Ntest 25
|
||||||
|
Grid : Message : 1.717557 s : MaxIter 10
|
||||||
|
Grid : Message : 1.717558 s : resid 1e-05
|
||||||
|
Grid : Message : 1.717560 s : Cheby Poly 0.007,7,51
|
||||||
|
|
||||||
|
|
410
tests/lanczos/Test_dwf_block_lanczos.cc
Normal file
410
tests/lanczos/Test_dwf_block_lanczos.cc
Normal file
@ -0,0 +1,410 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_block_lanczos.cc
|
||||||
|
|
||||||
|
Copyright (C) 2022
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Yong-Chull Jang <ypj@quark.phy.bnl.gov>
|
||||||
|
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/util/Init.h>
|
||||||
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
//using namespace Grid::QCD;
|
||||||
|
|
||||||
|
//typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||||
|
typedef typename ZMobiusFermionF::FermionField FermionField;
|
||||||
|
|
||||||
|
RealD AllZero(RealD x){ return 0.;}
|
||||||
|
|
||||||
|
class CmdJobParams
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
std::string gaugefile;
|
||||||
|
|
||||||
|
int Ls;
|
||||||
|
double mass;
|
||||||
|
double M5;
|
||||||
|
double mob_b;
|
||||||
|
std::vector<ComplexD> omega;
|
||||||
|
std::vector<Complex> boundary_phase;
|
||||||
|
std::vector<int> mpi_split;
|
||||||
|
|
||||||
|
LanczosType Impl;
|
||||||
|
int Nu;
|
||||||
|
int Nk;
|
||||||
|
int Np;
|
||||||
|
int Nm;
|
||||||
|
int Nstop;
|
||||||
|
int Ntest;
|
||||||
|
int MaxIter;
|
||||||
|
double resid;
|
||||||
|
|
||||||
|
double low;
|
||||||
|
double high;
|
||||||
|
int order;
|
||||||
|
|
||||||
|
CmdJobParams()
|
||||||
|
: gaugefile("Hot"),
|
||||||
|
Ls(8), mass(0.01), M5(1.8), mob_b(1.5),
|
||||||
|
Impl(LanczosType::irbl),mpi_split(4,1),
|
||||||
|
Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8),
|
||||||
|
low(0.2), high(5.5), order(11)
|
||||||
|
{Nm=Nk+Np;};
|
||||||
|
|
||||||
|
void Parse(char **argv, int argc);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void CmdJobParams::Parse(char **argv,int argc)
|
||||||
|
{
|
||||||
|
std::string arg;
|
||||||
|
std::vector<int> vi;
|
||||||
|
double re,im;
|
||||||
|
int expect, idx;
|
||||||
|
std::string vstr;
|
||||||
|
std::ifstream pfile;
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){
|
||||||
|
gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf");
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--phase") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--phase");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
expect = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("boundary_phase") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(expect==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
boundary_phase.push_back({re,im});
|
||||||
|
expect++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.});
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--omega") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--omega");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
Ls = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("omega") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(Ls==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
omega.push_back({re,im});
|
||||||
|
Ls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--Ls");
|
||||||
|
GridCmdOptionInt(arg,Ls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mass") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mass");
|
||||||
|
GridCmdOptionFloat(arg,mass);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--M5") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--M5");
|
||||||
|
GridCmdOptionFloat(arg,M5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b");
|
||||||
|
GridCmdOptionFloat(arg,mob_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--irbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2];
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::irbl;
|
||||||
|
Nm = Nk+Np;
|
||||||
|
}
|
||||||
|
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--rbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2]; // vector space is enlarged by adding Np vectors
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::rbl;
|
||||||
|
Nm = Nk+Np*MaxIter;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--split") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--split");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
for(int i=0;i<mpi_split.size();i++)
|
||||||
|
mpi_split[i] = vi[i];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--check_int") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--check_int");
|
||||||
|
GridCmdOptionInt(arg,Ntest);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--resid") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--resid");
|
||||||
|
GridCmdOptionFloat(arg,resid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_l") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_l");
|
||||||
|
GridCmdOptionFloat(arg,low);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_u") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_u");
|
||||||
|
GridCmdOptionFloat(arg,high);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_n") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_n");
|
||||||
|
GridCmdOptionInt(arg,order);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( CartesianCommunicator::RankWorld() == 0 ) {
|
||||||
|
std::streamsize ss = std::cout.precision();
|
||||||
|
std::cout << GridLogMessage <<" Gauge Configuration "<< gaugefile << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<4; ++i ) std::cout << GridLogMessage <<" boundary_phase["<< i << "] = " << boundary_phase[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Ls "<< Ls << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mass "<< mass << '\n';
|
||||||
|
std::cout << GridLogMessage <<" M5 "<< M5 << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mob_b "<< mob_b << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<Ls; ++i ) std::cout << GridLogMessage <<" omega["<< i << "] = " << omega[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Nu "<< Nu << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nk "<< Nk << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Np "<< Np << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nm "<< Nm << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nstop "<< Nstop << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Ntest "<< Ntest << '\n';
|
||||||
|
std::cout << GridLogMessage <<" MaxIter "<< MaxIter << '\n';
|
||||||
|
std::cout << GridLogMessage <<" resid "<< resid << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Cheby Poly "<< low << "," << high << "," << order << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
CmdJobParams JP;
|
||||||
|
JP.Parse(argv,argc);
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGrid);
|
||||||
|
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
||||||
|
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||||
|
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGridF);
|
||||||
|
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGridF);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
// ypj [note] why seed RNG5 again? bug? In this case, run with a default seed().
|
||||||
|
GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
LatticeGaugeFieldF UmuF(UGridF);
|
||||||
|
std::vector<LatticeColourMatrix> UF(4,UGridF);
|
||||||
|
|
||||||
|
if ( JP.gaugefile.compare("Hot") == 0 ) {
|
||||||
|
SU3::HotConfiguration(RNG4, Umu);
|
||||||
|
} else {
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu,header,JP.gaugefile);
|
||||||
|
// ypj [fixme] additional checks for the loaded configuration?
|
||||||
|
}
|
||||||
|
precisionChange (UmuF,Umu);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass = JP.mass;
|
||||||
|
RealD M5 = JP.M5;
|
||||||
|
|
||||||
|
// ypj [fixme] flexible support for a various Fermions
|
||||||
|
// RealD mob_b = JP.mob_b; // Gparity
|
||||||
|
// std::vector<ComplexD> omega; // ZMobius
|
||||||
|
|
||||||
|
// GparityMobiusFermionD ::ImplParams params;
|
||||||
|
// std::vector<int> twists({1,1,1,0});
|
||||||
|
// params.twists = twists;
|
||||||
|
// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||||
|
// SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
// int mrhs = JP.Nu;
|
||||||
|
int Ndir=4;
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> mpi_split (Ndir,1);
|
||||||
|
#if 0
|
||||||
|
int tmp=mrhs, dir=0;
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_split[dir]<<std::endl;
|
||||||
|
while ( tmp> 1) {
|
||||||
|
if ((mpi_split[dir]*2) <= mpi_layout[dir]){
|
||||||
|
mpi_split[dir] *=2;
|
||||||
|
tmp = tmp/2;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_layout[dir]<<std::endl;
|
||||||
|
dir = (dir+1)%Ndir;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
int mrhs=1;
|
||||||
|
for(int i =0;i<Ndir;i++){
|
||||||
|
mpi_split[i] = mpi_layout[i] / JP.mpi_split[i] ;
|
||||||
|
mrhs *= JP.mpi_split[i];
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "mpi_layout= " << mpi_layout << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mpi_split= " << mpi_split << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mrhs= " << mrhs << std::endl;
|
||||||
|
// assert(JP.Nu==tmp);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
// Split into 1^4 mpi communicators, keeping it explicitly single
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplexF::Nsimd()),
|
||||||
|
mpi_split,
|
||||||
|
*UGrid);
|
||||||
|
|
||||||
|
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,SGrid);
|
||||||
|
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
|
||||||
|
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,SGrid);
|
||||||
|
|
||||||
|
LatticeGaugeFieldF s_Umu(SGrid);
|
||||||
|
Grid_split (UmuF,s_Umu);
|
||||||
|
|
||||||
|
//WilsonFermionR::ImplParams params;
|
||||||
|
ZMobiusFermionF::ImplParams params;
|
||||||
|
params.overlapCommsCompute = true;
|
||||||
|
params.boundary_phases = JP.boundary_phase;
|
||||||
|
ZMobiusFermionF DdwfF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionF,FermionField> HermOp(Ddwf);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionF,FermionField> HermOp(DdwfF);
|
||||||
|
ZMobiusFermionF Dsplit(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionF,FermionField> SHermOp(Dsplit);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionF,FermionField> SHermOp(Dsplit);
|
||||||
|
|
||||||
|
//std::vector<double> Coeffs { 0.,-1.};
|
||||||
|
// ypj [note] this may not be supported by some compilers
|
||||||
|
std::vector<double> Coeffs({ 0.,-1.});
|
||||||
|
Polynomial<FermionField> PolyX(Coeffs);
|
||||||
|
//Chebyshev<FermionField> Cheb(0.2,5.5,11);
|
||||||
|
Chebyshev<FermionField> Cheb(JP.low,JP.high,JP.order);
|
||||||
|
// Cheb.csv(std::cout);
|
||||||
|
ImplicitlyRestartedBlockLanczos<FermionField> IRBL(HermOp, SHermOp,
|
||||||
|
FrbGridF,SFrbGrid,mrhs,
|
||||||
|
Cheb,
|
||||||
|
JP.Nstop, JP.Ntest,
|
||||||
|
JP.Nu, JP.Nk, JP.Nm,
|
||||||
|
JP.resid,
|
||||||
|
JP.MaxIter,
|
||||||
|
IRBLdiagonaliseWithEigen);
|
||||||
|
// IRBLdiagonaliseWithLAPACK);
|
||||||
|
IRBL.split_test=1;
|
||||||
|
|
||||||
|
std::vector<RealD> eval(JP.Nm);
|
||||||
|
|
||||||
|
std::vector<FermionField> src(JP.Nu,FrbGridF);
|
||||||
|
if (0)
|
||||||
|
{
|
||||||
|
// in case RNG is too slow
|
||||||
|
std::cout << GridLogMessage << "Using RNG5"<<std::endl;
|
||||||
|
FermionField src_tmp(FGrid);
|
||||||
|
for ( int i=0; i<JP.Nu; ++i ){
|
||||||
|
// gaussian(RNG5,src_tmp);
|
||||||
|
ComplexD rnd;
|
||||||
|
RealD re;
|
||||||
|
fillScalar(re,RNG5._gaussian[0],RNG5._generators[0]);
|
||||||
|
std::cout << i <<" / "<< JP.Nm <<" re "<< re << std::endl;
|
||||||
|
// printf("%d / %d re %e\n",i,FGrid->_processor,re);
|
||||||
|
src_tmp=re;
|
||||||
|
pickCheckerboard(Odd,src[i],src_tmp);
|
||||||
|
}
|
||||||
|
RNG5.Report();
|
||||||
|
} else {
|
||||||
|
std::cout << GridLogMessage << "Using RNG5rb"<<std::endl;
|
||||||
|
for ( int i=0; i<JP.Nu; ++i )
|
||||||
|
gaussian(RNG5rb,src[i]);
|
||||||
|
RNG5rb.Report();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<FermionField> evec(JP.Nm,FrbGridF);
|
||||||
|
for(int i=0;i<1;++i){
|
||||||
|
std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
IRBL.calc(eval,evec,src,Nconv,JP.Impl);
|
||||||
|
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
401
tests/lanczos/Test_dwf_block_lanczos.cc.double
Normal file
401
tests/lanczos/Test_dwf_block_lanczos.cc.double
Normal file
@ -0,0 +1,401 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_block_lanczos.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/util/Init.h>
|
||||||
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
//using namespace Grid::QCD;
|
||||||
|
|
||||||
|
//typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||||
|
typedef typename ZMobiusFermionR::FermionField FermionField;
|
||||||
|
|
||||||
|
RealD AllZero(RealD x){ return 0.;}
|
||||||
|
|
||||||
|
class CmdJobParams
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
std::string gaugefile;
|
||||||
|
|
||||||
|
int Ls;
|
||||||
|
double mass;
|
||||||
|
double M5;
|
||||||
|
double mob_b;
|
||||||
|
std::vector<ComplexD> omega;
|
||||||
|
std::vector<Complex> boundary_phase;
|
||||||
|
std::vector<int> mpi_split;
|
||||||
|
|
||||||
|
LanczosType Impl;
|
||||||
|
int Nu;
|
||||||
|
int Nk;
|
||||||
|
int Np;
|
||||||
|
int Nm;
|
||||||
|
int Nstop;
|
||||||
|
int Ntest;
|
||||||
|
int MaxIter;
|
||||||
|
double resid;
|
||||||
|
|
||||||
|
double low;
|
||||||
|
double high;
|
||||||
|
int order;
|
||||||
|
|
||||||
|
CmdJobParams()
|
||||||
|
: gaugefile("Hot"),
|
||||||
|
Ls(8), mass(0.01), M5(1.8), mob_b(1.5),
|
||||||
|
Impl(LanczosType::irbl),mpi_split(4,1),
|
||||||
|
Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8),
|
||||||
|
low(0.2), high(5.5), order(11)
|
||||||
|
{Nm=Nk+Np;};
|
||||||
|
|
||||||
|
void Parse(char **argv, int argc);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void CmdJobParams::Parse(char **argv,int argc)
|
||||||
|
{
|
||||||
|
std::string arg;
|
||||||
|
std::vector<int> vi;
|
||||||
|
double re,im;
|
||||||
|
int expect, idx;
|
||||||
|
std::string vstr;
|
||||||
|
std::ifstream pfile;
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){
|
||||||
|
gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf");
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--phase") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--phase");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
expect = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("boundary_phase") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(expect==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
boundary_phase.push_back({re,im});
|
||||||
|
expect++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.});
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--omega") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--omega");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
Ls = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("omega") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(Ls==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
omega.push_back({re,im});
|
||||||
|
Ls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--Ls");
|
||||||
|
GridCmdOptionInt(arg,Ls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mass") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mass");
|
||||||
|
GridCmdOptionFloat(arg,mass);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--M5") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--M5");
|
||||||
|
GridCmdOptionFloat(arg,M5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b");
|
||||||
|
GridCmdOptionFloat(arg,mob_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--irbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2];
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::irbl;
|
||||||
|
Nm = Nk+Np;
|
||||||
|
}
|
||||||
|
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--rbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2]; // vector space is enlarged by adding Np vectors
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::rbl;
|
||||||
|
Nm = Nk+Np*MaxIter;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--split") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--split");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
for(int i=0;i<mpi_split.size();i++)
|
||||||
|
mpi_split[i] = vi[i];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--check_int") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--check_int");
|
||||||
|
GridCmdOptionInt(arg,Ntest);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--resid") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--resid");
|
||||||
|
GridCmdOptionFloat(arg,resid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_l") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_l");
|
||||||
|
GridCmdOptionFloat(arg,low);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_u") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_u");
|
||||||
|
GridCmdOptionFloat(arg,high);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_n") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_n");
|
||||||
|
GridCmdOptionInt(arg,order);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( CartesianCommunicator::RankWorld() == 0 ) {
|
||||||
|
std::streamsize ss = std::cout.precision();
|
||||||
|
std::cout << GridLogMessage <<" Gauge Configuration "<< gaugefile << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<4; ++i ) std::cout << GridLogMessage <<" boundary_phase["<< i << "] = " << boundary_phase[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Ls "<< Ls << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mass "<< mass << '\n';
|
||||||
|
std::cout << GridLogMessage <<" M5 "<< M5 << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mob_b "<< mob_b << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<Ls; ++i ) std::cout << GridLogMessage <<" omega["<< i << "] = " << omega[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Nu "<< Nu << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nk "<< Nk << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Np "<< Np << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nm "<< Nm << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nstop "<< Nstop << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Ntest "<< Ntest << '\n';
|
||||||
|
std::cout << GridLogMessage <<" MaxIter "<< MaxIter << '\n';
|
||||||
|
std::cout << GridLogMessage <<" resid "<< resid << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Cheby Poly "<< low << "," << high << "," << order << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
CmdJobParams JP;
|
||||||
|
JP.Parse(argv,argc);
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGrid);
|
||||||
|
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
// ypj [note] why seed RNG5 again? bug? In this case, run with a default seed().
|
||||||
|
GridParallelRNG RNG5rb(FrbGrid); RNG5rb.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
|
||||||
|
if ( JP.gaugefile.compare("Hot") == 0 ) {
|
||||||
|
SU3::HotConfiguration(RNG4, Umu);
|
||||||
|
} else {
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu,header,JP.gaugefile);
|
||||||
|
// ypj [fixme] additional checks for the loaded configuration?
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass = JP.mass;
|
||||||
|
RealD M5 = JP.M5;
|
||||||
|
|
||||||
|
// ypj [fixme] flexible support for a various Fermions
|
||||||
|
// RealD mob_b = JP.mob_b; // Gparity
|
||||||
|
// std::vector<ComplexD> omega; // ZMobius
|
||||||
|
|
||||||
|
// GparityMobiusFermionD ::ImplParams params;
|
||||||
|
// std::vector<int> twists({1,1,1,0});
|
||||||
|
// params.twists = twists;
|
||||||
|
// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||||
|
// SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
// int mrhs = JP.Nu;
|
||||||
|
int Ndir=4;
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> mpi_split (Ndir,1);
|
||||||
|
#if 0
|
||||||
|
int tmp=mrhs, dir=0;
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_split[dir]<<std::endl;
|
||||||
|
while ( tmp> 1) {
|
||||||
|
if ((mpi_split[dir]*2) <= mpi_layout[dir]){
|
||||||
|
mpi_split[dir] *=2;
|
||||||
|
tmp = tmp/2;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_layout[dir]<<std::endl;
|
||||||
|
dir = (dir+1)%Ndir;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
int mrhs=1;
|
||||||
|
for(int i =0;i<Ndir;i++){
|
||||||
|
mpi_split[i] = mpi_layout[i] / JP.mpi_split[i] ;
|
||||||
|
mrhs *= JP.mpi_split[i];
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "mpi_layout= " << mpi_layout << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mpi_split= " << mpi_split << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mrhs= " << mrhs << std::endl;
|
||||||
|
// assert(JP.Nu==tmp);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
// Split into 1^4 mpi communicators
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
mpi_split,
|
||||||
|
*UGrid);
|
||||||
|
|
||||||
|
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,SGrid);
|
||||||
|
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
|
||||||
|
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,SGrid);
|
||||||
|
|
||||||
|
LatticeGaugeField s_Umu(SGrid);
|
||||||
|
Grid_split (Umu,s_Umu);
|
||||||
|
|
||||||
|
//WilsonFermionR::ImplParams params;
|
||||||
|
ZMobiusFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = true;
|
||||||
|
params.boundary_phases = JP.boundary_phase;
|
||||||
|
ZMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||||
|
ZMobiusFermionR Dsplit(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionR,FermionField> SHermOp(Dsplit);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionR,FermionField> SHermOp(Dsplit);
|
||||||
|
|
||||||
|
//std::vector<double> Coeffs { 0.,-1.};
|
||||||
|
// ypj [note] this may not be supported by some compilers
|
||||||
|
std::vector<double> Coeffs({ 0.,-1.});
|
||||||
|
Polynomial<FermionField> PolyX(Coeffs);
|
||||||
|
//Chebyshev<FermionField> Cheb(0.2,5.5,11);
|
||||||
|
Chebyshev<FermionField> Cheb(JP.low,JP.high,JP.order);
|
||||||
|
// Cheb.csv(std::cout);
|
||||||
|
ImplicitlyRestartedBlockLanczos<FermionField> IRBL(HermOp, SHermOp,
|
||||||
|
FrbGrid,SFrbGrid,mrhs,
|
||||||
|
Cheb,
|
||||||
|
JP.Nstop, JP.Ntest,
|
||||||
|
JP.Nu, JP.Nk, JP.Nm,
|
||||||
|
JP.resid,
|
||||||
|
JP.MaxIter,
|
||||||
|
IRBLdiagonaliseWithEigen);
|
||||||
|
// IRBLdiagonaliseWithLAPACK);
|
||||||
|
IRBL.split_test=0;
|
||||||
|
|
||||||
|
std::vector<RealD> eval(JP.Nm);
|
||||||
|
|
||||||
|
std::vector<FermionField> src(JP.Nu,FrbGrid);
|
||||||
|
if (0)
|
||||||
|
{
|
||||||
|
// in case RNG is too slow
|
||||||
|
std::cout << GridLogMessage << "Using RNG5"<<std::endl;
|
||||||
|
FermionField src_tmp(FGrid);
|
||||||
|
for ( int i=0; i<JP.Nu; ++i ){
|
||||||
|
// gaussian(RNG5,src_tmp);
|
||||||
|
ComplexD rnd;
|
||||||
|
RealD re;
|
||||||
|
fillScalar(re,RNG5._gaussian[0],RNG5._generators[0]);
|
||||||
|
std::cout << i <<" / "<< JP.Nm <<" re "<< re << std::endl;
|
||||||
|
// printf("%d / %d re %e\n",i,FGrid->_processor,re);
|
||||||
|
src_tmp=re;
|
||||||
|
pickCheckerboard(Odd,src[i],src_tmp);
|
||||||
|
}
|
||||||
|
RNG5.Report();
|
||||||
|
} else {
|
||||||
|
std::cout << GridLogMessage << "Using RNG5rb"<<std::endl;
|
||||||
|
for ( int i=0; i<JP.Nu; ++i )
|
||||||
|
gaussian(RNG5rb,src[i]);
|
||||||
|
RNG5rb.Report();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<FermionField> evec(JP.Nm,FrbGrid);
|
||||||
|
for(int i=0;i<1;++i){
|
||||||
|
std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
IRBL.calc(eval,evec,src,Nconv,JP.Impl);
|
||||||
|
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
408
tests/lanczos/Test_dwf_block_lanczos.cc.single
Normal file
408
tests/lanczos/Test_dwf_block_lanczos.cc.single
Normal file
@ -0,0 +1,408 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_block_lanczos.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/util/Init.h>
|
||||||
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
//using namespace Grid::QCD;
|
||||||
|
|
||||||
|
//typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||||
|
typedef typename ZMobiusFermionF::FermionField FermionField;
|
||||||
|
|
||||||
|
RealD AllZero(RealD x){ return 0.;}
|
||||||
|
|
||||||
|
class CmdJobParams
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
std::string gaugefile;
|
||||||
|
|
||||||
|
int Ls;
|
||||||
|
double mass;
|
||||||
|
double M5;
|
||||||
|
double mob_b;
|
||||||
|
std::vector<ComplexD> omega;
|
||||||
|
std::vector<Complex> boundary_phase;
|
||||||
|
std::vector<int> mpi_split;
|
||||||
|
|
||||||
|
LanczosType Impl;
|
||||||
|
int Nu;
|
||||||
|
int Nk;
|
||||||
|
int Np;
|
||||||
|
int Nm;
|
||||||
|
int Nstop;
|
||||||
|
int Ntest;
|
||||||
|
int MaxIter;
|
||||||
|
double resid;
|
||||||
|
|
||||||
|
double low;
|
||||||
|
double high;
|
||||||
|
int order;
|
||||||
|
|
||||||
|
CmdJobParams()
|
||||||
|
: gaugefile("Hot"),
|
||||||
|
Ls(8), mass(0.01), M5(1.8), mob_b(1.5),
|
||||||
|
Impl(LanczosType::irbl),mpi_split(4,1),
|
||||||
|
Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8),
|
||||||
|
low(0.2), high(5.5), order(11)
|
||||||
|
{Nm=Nk+Np;};
|
||||||
|
|
||||||
|
void Parse(char **argv, int argc);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void CmdJobParams::Parse(char **argv,int argc)
|
||||||
|
{
|
||||||
|
std::string arg;
|
||||||
|
std::vector<int> vi;
|
||||||
|
double re,im;
|
||||||
|
int expect, idx;
|
||||||
|
std::string vstr;
|
||||||
|
std::ifstream pfile;
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){
|
||||||
|
gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf");
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--phase") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--phase");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
expect = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("boundary_phase") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(expect==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
boundary_phase.push_back({re,im});
|
||||||
|
expect++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.});
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--omega") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--omega");
|
||||||
|
pfile.open(arg);
|
||||||
|
assert(pfile);
|
||||||
|
Ls = 0;
|
||||||
|
while( pfile >> vstr ) {
|
||||||
|
if ( vstr.compare("omega") == 0 ) {
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionInt(vstr,idx);
|
||||||
|
assert(Ls==idx);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,re);
|
||||||
|
pfile >> vstr;
|
||||||
|
GridCmdOptionFloat(vstr,im);
|
||||||
|
omega.push_back({re,im});
|
||||||
|
Ls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pfile.close();
|
||||||
|
} else {
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--Ls");
|
||||||
|
GridCmdOptionInt(arg,Ls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mass") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mass");
|
||||||
|
GridCmdOptionFloat(arg,mass);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--M5") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--M5");
|
||||||
|
GridCmdOptionFloat(arg,M5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b");
|
||||||
|
GridCmdOptionFloat(arg,mob_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--irbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2];
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::irbl;
|
||||||
|
Nm = Nk+Np;
|
||||||
|
}
|
||||||
|
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--rbl");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
Nu = vi[0];
|
||||||
|
Nk = vi[1];
|
||||||
|
Np = vi[2]; // vector space is enlarged by adding Np vectors
|
||||||
|
Nstop = vi[3];
|
||||||
|
MaxIter = vi[4];
|
||||||
|
// ypj[fixme] mode overriding message is needed.
|
||||||
|
Impl = LanczosType::rbl;
|
||||||
|
Nm = Nk+Np*MaxIter;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
// block Lanczos with explicit extension of its dimensions
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--split") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--split");
|
||||||
|
GridCmdOptionIntVector(arg,vi);
|
||||||
|
for(int i=0;i<mpi_split.size();i++)
|
||||||
|
mpi_split[i] = vi[i];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--check_int") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--check_int");
|
||||||
|
GridCmdOptionInt(arg,Ntest);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--resid") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--resid");
|
||||||
|
GridCmdOptionFloat(arg,resid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_l") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_l");
|
||||||
|
GridCmdOptionFloat(arg,low);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_u") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_u");
|
||||||
|
GridCmdOptionFloat(arg,high);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--cheby_n") ){
|
||||||
|
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_n");
|
||||||
|
GridCmdOptionInt(arg,order);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( CartesianCommunicator::RankWorld() == 0 ) {
|
||||||
|
std::streamsize ss = std::cout.precision();
|
||||||
|
std::cout << GridLogMessage <<" Gauge Configuration "<< gaugefile << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<4; ++i ) std::cout << GridLogMessage <<" boundary_phase["<< i << "] = " << boundary_phase[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Ls "<< Ls << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mass "<< mass << '\n';
|
||||||
|
std::cout << GridLogMessage <<" M5 "<< M5 << '\n';
|
||||||
|
std::cout << GridLogMessage <<" mob_b "<< mob_b << '\n';
|
||||||
|
std::cout.precision(15);
|
||||||
|
for ( int i=0; i<Ls; ++i ) std::cout << GridLogMessage <<" omega["<< i << "] = " << omega[i] << '\n';
|
||||||
|
std::cout.precision(ss);
|
||||||
|
std::cout << GridLogMessage <<" Nu "<< Nu << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nk "<< Nk << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Np "<< Np << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nm "<< Nm << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Nstop "<< Nstop << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Ntest "<< Ntest << '\n';
|
||||||
|
std::cout << GridLogMessage <<" MaxIter "<< MaxIter << '\n';
|
||||||
|
std::cout << GridLogMessage <<" resid "<< resid << '\n';
|
||||||
|
std::cout << GridLogMessage <<" Cheby Poly "<< low << "," << high << "," << order << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
CmdJobParams JP;
|
||||||
|
JP.Parse(argv,argc);
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGrid);
|
||||||
|
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
||||||
|
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||||
|
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGridF);
|
||||||
|
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGridF);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
// ypj [note] why seed RNG5 again? bug? In this case, run with a default seed().
|
||||||
|
GridParallelRNG RNG5rb(FrbGridF); RNG5rb.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||||
|
LatticeGaugeFieldF UmuF(UGridF);
|
||||||
|
std::vector<LatticeColourMatrix> UF(4,UGridF);
|
||||||
|
|
||||||
|
if ( JP.gaugefile.compare("Hot") == 0 ) {
|
||||||
|
SU3::HotConfiguration(RNG4, Umu);
|
||||||
|
} else {
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu,header,JP.gaugefile);
|
||||||
|
// ypj [fixme] additional checks for the loaded configuration?
|
||||||
|
}
|
||||||
|
precisionChange (UmuF,Umu);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD mass = JP.mass;
|
||||||
|
RealD M5 = JP.M5;
|
||||||
|
|
||||||
|
// ypj [fixme] flexible support for a various Fermions
|
||||||
|
// RealD mob_b = JP.mob_b; // Gparity
|
||||||
|
// std::vector<ComplexD> omega; // ZMobius
|
||||||
|
|
||||||
|
// GparityMobiusFermionD ::ImplParams params;
|
||||||
|
// std::vector<int> twists({1,1,1,0});
|
||||||
|
// params.twists = twists;
|
||||||
|
// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||||
|
// SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
// int mrhs = JP.Nu;
|
||||||
|
int Ndir=4;
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> mpi_split (Ndir,1);
|
||||||
|
#if 0
|
||||||
|
int tmp=mrhs, dir=0;
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_split[dir]<<std::endl;
|
||||||
|
while ( tmp> 1) {
|
||||||
|
if ((mpi_split[dir]*2) <= mpi_layout[dir]){
|
||||||
|
mpi_split[dir] *=2;
|
||||||
|
tmp = tmp/2;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_layout[dir]<<std::endl;
|
||||||
|
dir = (dir+1)%Ndir;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
int mrhs=1;
|
||||||
|
for(int i =0;i<Ndir;i++){
|
||||||
|
mpi_split[i] = mpi_layout[i] / JP.mpi_split[i] ;
|
||||||
|
mrhs *= JP.mpi_split[i];
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "mpi_layout= " << mpi_layout << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mpi_split= " << mpi_split << std::endl;
|
||||||
|
std::cout << GridLogMessage << "mrhs= " << mrhs << std::endl;
|
||||||
|
// assert(JP.Nu==tmp);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
// Split into 1^4 mpi communicators, keeping it explicitly single
|
||||||
|
/////////////////////////////////////////////
|
||||||
|
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplexF::Nsimd()),
|
||||||
|
mpi_split,
|
||||||
|
*UGrid);
|
||||||
|
|
||||||
|
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,SGrid);
|
||||||
|
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
|
||||||
|
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,SGrid);
|
||||||
|
|
||||||
|
LatticeGaugeFieldF s_Umu(SGrid);
|
||||||
|
Grid_split (UmuF,s_Umu);
|
||||||
|
|
||||||
|
//WilsonFermionR::ImplParams params;
|
||||||
|
ZMobiusFermionF::ImplParams params;
|
||||||
|
params.overlapCommsCompute = true;
|
||||||
|
params.boundary_phases = JP.boundary_phase;
|
||||||
|
ZMobiusFermionF DdwfF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionF,FermionField> HermOp(Ddwf);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionF,FermionField> HermOp(DdwfF);
|
||||||
|
ZMobiusFermionF Dsplit(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,JP.omega,1.,0.,params);
|
||||||
|
// SchurDiagTwoOperator<ZMobiusFermionF,FermionField> SHermOp(Dsplit);
|
||||||
|
SchurDiagOneOperator<ZMobiusFermionF,FermionField> SHermOp(Dsplit);
|
||||||
|
|
||||||
|
//std::vector<double> Coeffs { 0.,-1.};
|
||||||
|
// ypj [note] this may not be supported by some compilers
|
||||||
|
std::vector<double> Coeffs({ 0.,-1.});
|
||||||
|
Polynomial<FermionField> PolyX(Coeffs);
|
||||||
|
//Chebyshev<FermionField> Cheb(0.2,5.5,11);
|
||||||
|
Chebyshev<FermionField> Cheb(JP.low,JP.high,JP.order);
|
||||||
|
// Cheb.csv(std::cout);
|
||||||
|
ImplicitlyRestartedBlockLanczos<FermionField> IRBL(HermOp, SHermOp,
|
||||||
|
FrbGridF,SFrbGrid,mrhs,
|
||||||
|
Cheb,
|
||||||
|
JP.Nstop, JP.Ntest,
|
||||||
|
JP.Nu, JP.Nk, JP.Nm,
|
||||||
|
JP.resid,
|
||||||
|
JP.MaxIter,
|
||||||
|
IRBLdiagonaliseWithEigen);
|
||||||
|
// IRBLdiagonaliseWithLAPACK);
|
||||||
|
IRBL.split_test=1;
|
||||||
|
|
||||||
|
std::vector<RealD> eval(JP.Nm);
|
||||||
|
|
||||||
|
std::vector<FermionField> src(JP.Nu,FrbGridF);
|
||||||
|
if (0)
|
||||||
|
{
|
||||||
|
// in case RNG is too slow
|
||||||
|
std::cout << GridLogMessage << "Using RNG5"<<std::endl;
|
||||||
|
FermionField src_tmp(FGrid);
|
||||||
|
for ( int i=0; i<JP.Nu; ++i ){
|
||||||
|
// gaussian(RNG5,src_tmp);
|
||||||
|
ComplexD rnd;
|
||||||
|
RealD re;
|
||||||
|
fillScalar(re,RNG5._gaussian[0],RNG5._generators[0]);
|
||||||
|
std::cout << i <<" / "<< JP.Nm <<" re "<< re << std::endl;
|
||||||
|
// printf("%d / %d re %e\n",i,FGrid->_processor,re);
|
||||||
|
src_tmp=re;
|
||||||
|
pickCheckerboard(Odd,src[i],src_tmp);
|
||||||
|
}
|
||||||
|
RNG5.Report();
|
||||||
|
} else {
|
||||||
|
std::cout << GridLogMessage << "Using RNG5rb"<<std::endl;
|
||||||
|
for ( int i=0; i<JP.Nu; ++i )
|
||||||
|
gaussian(RNG5rb,src[i]);
|
||||||
|
RNG5rb.Report();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<FermionField> evec(JP.Nm,FrbGridF);
|
||||||
|
for(int i=0;i<1;++i){
|
||||||
|
std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
IRBL.calc(eval,evec,src,Nconv,JP.Impl);
|
||||||
|
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -35,26 +35,45 @@ template<typename Action>
|
|||||||
struct Setup{};
|
struct Setup{};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct Setup<GparityMobiusFermionD>{
|
struct Setup<GparityMobiusFermionF>{
|
||||||
static GparityMobiusFermionD* getAction(LatticeGaugeField &Umu,
|
static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu,
|
||||||
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
||||||
RealD mass=0.01;
|
RealD mass=0.00054;
|
||||||
RealD M5=1.8;
|
RealD M5=1.8;
|
||||||
RealD mob_b=1.5;
|
RealD mob_b=1.5;
|
||||||
GparityMobiusFermionD ::ImplParams params;
|
GparityMobiusFermionD ::ImplParams params;
|
||||||
std::vector<int> twists({1,1,1,0});
|
std::vector<int> twists({1,1,1,0});
|
||||||
params.twists = twists;
|
params.twists = twists;
|
||||||
return new GparityMobiusFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
struct Setup<DomainWallFermionF>{
|
||||||
|
static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu,
|
||||||
struct Setup<DomainWallFermionD>{
|
struct Setup<DomainWallFermionD>{
|
||||||
static DomainWallFermionD* getAction(LatticeGaugeField &Umu,
|
static DomainWallFermionD* getAction(LatticeGaugeField &Umu,
|
||||||
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
||||||
RealD mass=0.01;
|
RealD mass=0.00054;
|
||||||
RealD M5=1.8;
|
RealD M5=1.8;
|
||||||
return new DomainWallFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<>
|
||||||
|
struct Setup<MobiusFermionF>{
|
||||||
|
static MobiusFermionF* getAction(LatticeGaugeFieldF &Umu,
|
||||||
|
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
|
||||||
|
RealD mass=0.00054;
|
||||||
|
RealD M5=1.8;
|
||||||
|
RealD mob_b=1.5;
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
MobiusFermionF::ImplParams Params(boundary);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "mass "<<mass<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "M5 "<<M5<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "mob_b "<<mob_b<<std::endl;
|
||||||
|
return new MobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,Params);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -63,38 +82,60 @@ struct Setup<DomainWallFermionD>{
|
|||||||
template<typename Action>
|
template<typename Action>
|
||||||
void run(){
|
void run(){
|
||||||
typedef typename Action::FermionField FermionField;
|
typedef typename Action::FermionField FermionField;
|
||||||
const int Ls=8;
|
const int Ls=12;
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
||||||
|
|
||||||
|
GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||||
|
GridCartesian* FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls, UGridF);
|
||||||
|
GridRedBlackCartesian* FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGridF);
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5(FGridF); RNG5.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGridF); RNG4.SeedFixedIntegers(seeds4);
|
||||||
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5rb(FrbGridF); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU<Nc>::HotConfiguration(RNG4, Umu);
|
// SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("./config");
|
||||||
|
|
||||||
Action *action = Setup<Action>::getAction(Umu,FGrid,FrbGrid,UGrid,UrbGrid);
|
// int precision32 = 0;
|
||||||
|
// int tworow = 0;
|
||||||
|
// NerscIO::writeConfiguration(Umu,file,tworow,precision32);
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
LatticeGaugeFieldF UmuF(UGridF);
|
||||||
|
precisionChange(UmuF, Umu);
|
||||||
|
|
||||||
|
Action *action = Setup<Action>::getAction(UmuF,FGridF,FrbGridF,UGridF,UrbGridF);
|
||||||
|
|
||||||
//MdagMLinearOperator<Action,FermionField> HermOp(Ddwf);
|
//MdagMLinearOperator<Action,FermionField> HermOp(Ddwf);
|
||||||
SchurDiagTwoOperator<Action,FermionField> HermOp(*action);
|
// SchurDiagTwoOperator<Action,FermionField> HermOp(*action);
|
||||||
|
SchurDiagOneOperator<Action,FermionField> HermOp(*action);
|
||||||
|
|
||||||
const int Nstop = 30;
|
const int Nstop = 150;
|
||||||
const int Nk = 40;
|
const int Nk = 160;
|
||||||
const int Np = 40;
|
const int Np = 40;
|
||||||
const int Nm = Nk+Np;
|
const int Nm = Nk+Np;
|
||||||
const int MaxIt= 10000;
|
const int MaxIt= 10000;
|
||||||
RealD resid = 1.0e-8;
|
RealD resid = 1.0e-6;
|
||||||
|
std::cout << GridLogMessage << "Nstop "<<Nstop<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "Nk "<<Nk<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "Np "<<Np<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "resid "<<resid<<std::endl;
|
||||||
|
|
||||||
std::vector<double> Coeffs { 0.,-1.};
|
std::vector<double> Coeffs { 0.,-1.};
|
||||||
Polynomial<FermionField> PolyX(Coeffs);
|
Polynomial<FermionField> PolyX(Coeffs);
|
||||||
Chebyshev<FermionField> Cheby(0.2,5.,11);
|
Chebyshev<FermionField> Cheby(0.0000006,5.5,4001);
|
||||||
|
std::cout << GridLogMessage << "Cheby(0.0000006,5.5,4001) "<<std::endl;
|
||||||
|
|
||||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
||||||
PlainHermOp<FermionField> Op (HermOp);
|
PlainHermOp<FermionField> Op (HermOp);
|
||||||
@ -102,9 +143,9 @@ void run(){
|
|||||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
|
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Nstop,Nk,Nm,resid,MaxIt);
|
||||||
|
|
||||||
std::vector<RealD> eval(Nm);
|
std::vector<RealD> eval(Nm);
|
||||||
FermionField src(FrbGrid);
|
FermionField src(FrbGridF);
|
||||||
gaussian(RNG5rb,src);
|
gaussian(RNG5rb,src);
|
||||||
std::vector<FermionField> evec(Nm,FrbGrid);
|
std::vector<FermionField> evec(Nm,FrbGridF);
|
||||||
for(int i=0;i<1;i++){
|
for(int i=0;i<1;i++){
|
||||||
std::cout << GridLogMessage <<i<<" / "<< Nm<< " grid pointer "<<evec[i].Grid()<<std::endl;
|
std::cout << GridLogMessage <<i<<" / "<< Nm<< " grid pointer "<<evec[i].Grid()<<std::endl;
|
||||||
};
|
};
|
||||||
@ -119,7 +160,7 @@ int main (int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
std::string action = "GparityMobius";
|
std::string action = "Mobius";
|
||||||
for(int i=1;i<argc;i++){
|
for(int i=1;i<argc;i++){
|
||||||
if(std::string(argv[i]) == "-action"){
|
if(std::string(argv[i]) == "-action"){
|
||||||
action = argv[i+1];
|
action = argv[i+1];
|
||||||
@ -127,9 +168,11 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(action == "GparityMobius"){
|
if(action == "GparityMobius"){
|
||||||
run<GparityMobiusFermionD>();
|
run<GparityMobiusFermionF>();
|
||||||
}else if(action == "DWF"){
|
}else if(action == "DWF"){
|
||||||
run<DomainWallFermionD>();
|
run<DomainWallFermionF>();
|
||||||
|
}else if(action == "Mobius"){
|
||||||
|
run<MobiusFermionF>();
|
||||||
}else{
|
}else{
|
||||||
std::cout << "Unknown action" << std::endl;
|
std::cout << "Unknown action" << std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
|
122
tests/solver/Test_dwf_mixedcg_prec.cc
Normal file
122
tests/solver/Test_dwf_mixedcg_prec.cc
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_cg_prec.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
//using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=12;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
|
||||||
|
GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
|
||||||
|
GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
|
LatticeFermionD src(FGrid); random(RNG5,src);
|
||||||
|
LatticeFermionD result(FGrid); result=Zero();
|
||||||
|
LatticeGaugeFieldD Umu(UGrid);
|
||||||
|
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||||
|
|
||||||
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
|
precisionChange(Umu_f,Umu);
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5);
|
||||||
|
|
||||||
|
LatticeFermionD src_o(FrbGrid);
|
||||||
|
LatticeFermionD result_o(FrbGrid);
|
||||||
|
LatticeFermionD result_o_2(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o.Checkerboard() = Odd;
|
||||||
|
result_o = Zero();
|
||||||
|
result_o_2.Checkerboard() = Odd;
|
||||||
|
result_o_2 = Zero();
|
||||||
|
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||||
|
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||||
|
double t1,t2,flops;
|
||||||
|
double MdagMsiteflops = 1452; // Mobius (real coeffs)
|
||||||
|
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
|
||||||
|
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
|
||||||
|
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
|
||||||
|
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
|
||||||
|
|
||||||
|
result_o = Zero();
|
||||||
|
t1=usecond();
|
||||||
|
mCG(src_o,result_o);
|
||||||
|
t2=usecond();
|
||||||
|
int iters = mCG.TotalInnerIterations; //Number of inner CG iterations
|
||||||
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||||
|
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||||
|
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||||
|
result_o_2 = Zero();
|
||||||
|
t1=usecond();
|
||||||
|
CG(HermOpEO,src_o,result_o_2);
|
||||||
|
t2=usecond();
|
||||||
|
iters = CG.IterationsToComplete;
|
||||||
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||||
|
|
||||||
|
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
|
|
||||||
|
LatticeFermionD diff_o(FrbGrid);
|
||||||
|
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
143
tests/solver/Test_dwf_relupcg_prec.cc
Normal file
143
tests/solver/Test_dwf_relupcg_prec.cc
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/solver/Test_dwf_relupcg_prec.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
double relup_delta = 0.2;
|
||||||
|
for(int i=1;i<argc-1;i++){
|
||||||
|
std::string sarg = argv[i];
|
||||||
|
if(sarg == "--relup_delta"){
|
||||||
|
std::stringstream ss; ss << argv[i+1]; ss >> relup_delta;
|
||||||
|
std::cout << GridLogMessage << "Set reliable update Delta to " << relup_delta << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const int Ls=12;
|
||||||
|
|
||||||
|
{
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
|
||||||
|
GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
|
||||||
|
GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
|
LatticeFermionD src(FGrid); random(RNG5,src);
|
||||||
|
LatticeFermionD result(FGrid); result=Zero();
|
||||||
|
LatticeGaugeFieldD Umu(UGrid);
|
||||||
|
LatticeGaugeFieldF Umu_f(UGrid_f);
|
||||||
|
|
||||||
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
|
precisionChange(Umu_f,Umu);
|
||||||
|
|
||||||
|
RealD mass=0.1;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
DomainWallFermionF Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5);
|
||||||
|
|
||||||
|
LatticeFermionD src_o(FrbGrid);
|
||||||
|
LatticeFermionD result_o(FrbGrid);
|
||||||
|
LatticeFermionD result_o_2(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o.Checkerboard() = Odd;
|
||||||
|
result_o = Zero();
|
||||||
|
result_o_2.Checkerboard() = Odd;
|
||||||
|
result_o_2 = Zero();
|
||||||
|
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||||
|
ConjugateGradientReliableUpdate<LatticeFermionD,LatticeFermionF> mCG(1e-8, 10000, relup_delta, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||||
|
double t1,t2,flops;
|
||||||
|
double MdagMsiteflops = 1452; // Mobius (real coeffs)
|
||||||
|
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
|
||||||
|
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
|
||||||
|
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
|
||||||
|
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
|
||||||
|
int iters, iters_cleanup, relups, tot_iters;
|
||||||
|
for(int i=0;i<10;i++){
|
||||||
|
result_o = Zero();
|
||||||
|
t1=usecond();
|
||||||
|
mCG(src_o,result_o);
|
||||||
|
t2=usecond();
|
||||||
|
iters = mCG.IterationsToComplete; //Number of single prec CG iterations
|
||||||
|
iters_cleanup = mCG.IterationsToCleanup;
|
||||||
|
relups = mCG.ReliableUpdatesPerformed;
|
||||||
|
tot_iters = iters + iters_cleanup + relups; //relup cost MdagM application in double
|
||||||
|
|
||||||
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*tot_iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*tot_iters;
|
||||||
|
std::cout << " SinglePrecision single prec iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " SinglePrecision double prec cleanup iterations/sec "<< iters_cleanup/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " SinglePrecision reliable updates/sec "<< relups/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||||
|
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||||
|
for(int i=0;i<1;i++){
|
||||||
|
result_o_2 = Zero();
|
||||||
|
t1=usecond();
|
||||||
|
CG(HermOpEO,src_o,result_o_2);
|
||||||
|
t2=usecond();
|
||||||
|
iters = CG.IterationsToComplete;
|
||||||
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||||
|
|
||||||
|
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
|
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryManager::Print();
|
||||||
|
|
||||||
|
LatticeFermionD diff_o(FrbGrid);
|
||||||
|
RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user