diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 7720e97f..14234fe0 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -328,6 +328,8 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice typedef typename vobj::vector_type vector_type; typedef typename vobj::tensor_reduced tensor_reduced; + scalar_type zscale(scale); + GridBase *grid = X._grid; int Nsimd =grid->Nsimd(); @@ -353,7 +355,7 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice grid->iCoorFromIindex(icoor,l); int ldx =r+icoor[orthogdim]*rd; scalar_type *as =(scalar_type *)&av; - as[l] = scalar_type(a[ldx])*scale; + as[l] = scalar_type(a[ldx])*zscale; } tensor_reduced at; at=av; diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 15e30f02..0048382f 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -327,18 +327,20 @@ class Grid_simd { // provides support /////////////////////////////////////// -#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) -#pragma GCC push_options -#pragma GCC optimize ("O0") -#endif + //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) + //#pragma GCC push_options + //#pragma GCC optimize ("O0") + //#endif template friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { Grid_simd ret; Grid_simd::conv_t conv; - + Grid_simd::scalar_type s; + conv.v = v.v; for (int i = 0; i < Nsimd(); i++) { - conv.s[i] = func(conv.s[i]); + s = conv.s[i]; + conv.s[i] = func(s); } ret.v = conv.v; return ret; @@ -350,18 +352,21 @@ class Grid_simd { Grid_simd ret; Grid_simd::conv_t cx; Grid_simd::conv_t cy; + Grid_simd::scalar_type sx,sy; cx.v = x.v; cy.v = y.v; for (int i = 0; i < Nsimd(); i++) { - cx.s[i] = func(cx.s[i], cy.s[i]); + sx = cx.s[i]; + sy = cy.s[i]; + cx.s[i] = func(sx,sy); } ret.v = cx.v; return ret; } -#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) -#pragma GCC pop_options -#endif + //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) + //#pragma GCC pop_options + //#endif /////////////////////// // Exchange // Al Ah , Bl Bh -> Al Bl Ah,Bh diff --git a/tests/Test_dwf_mixedcg_prec_halfcomms.cc b/tests/Test_dwf_mixedcg_prec_halfcomms.cc new file mode 100644 index 00000000..9cc935d9 --- /dev/null +++ b/tests/Test_dwf_mixedcg_prec_halfcomms.cc @@ -0,0 +1,110 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_cg_prec.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=24; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + GridCartesian * UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); + GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); + GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermionD src(FGrid); random(RNG5,src); + LatticeFermionD result(FGrid); result=zero; + LatticeGaugeFieldD Umu(UGrid); + LatticeGaugeFieldF Umu_f(UGrid_f); + + SU3::HotConfiguration(RNG4,Umu); + + precisionChange(Umu_f,Umu); + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + DomainWallFermionFH Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); + + + LatticeFermionD src_o(FrbGrid); + LatticeFermionD result_o(FrbGrid); + LatticeFermionD result_o_2(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o.checkerboard = Odd; + result_o = zero; + result_o_2.checkerboard = Odd; + result_o_2 = zero; + + SchurDiagMooeeOperator HermOpEO(Ddwf); + SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); + + std::cout << "Starting mixed CG" << std::endl; + MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); + mCG.InnerTolerance = 3.0e-5; + mCG(src_o,result_o); + + std::cout << "Starting regular CG" << std::endl; + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,result_o_2); + + LatticeFermionD diff_o(FrbGrid); + RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); + + std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + + + Grid_finalize(); +}