mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 04:37:05 +01:00
Merge remote-tracking branch 'origin/develop' into temporary-smearing
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
|
||||
bin_PROGRAMS = Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd
|
||||
bin_PROGRAMS += Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_cshift_red_black_rotate Test_cshift_rotate Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_gpforce Test_dwf_hdcr Test_dwf_lanczos Test_dwf_rb5d Test_gamma Test_GaugeAction Test_gparity Test_gpdwf_force Test_gp_rect_force Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_rect_force Test_RectPlaq Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_synthetic_lanczos Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi Test_wilson_tm_even_odd
|
||||
|
||||
|
||||
Test_cayley_cg_SOURCES=Test_cayley_cg.cc
|
||||
@ -50,6 +50,14 @@ Test_cshift_red_black_SOURCES=Test_cshift_red_black.cc
|
||||
Test_cshift_red_black_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cshift_red_black_rotate_SOURCES=Test_cshift_red_black_rotate.cc
|
||||
Test_cshift_red_black_rotate_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_cshift_rotate_SOURCES=Test_cshift_rotate.cc
|
||||
Test_cshift_rotate_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_dwf_cg_prec_SOURCES=Test_dwf_cg_prec.cc
|
||||
Test_dwf_cg_prec_LDADD=-lGrid
|
||||
|
||||
@ -90,6 +98,10 @@ Test_dwf_lanczos_SOURCES=Test_dwf_lanczos.cc
|
||||
Test_dwf_lanczos_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_dwf_rb5d_SOURCES=Test_dwf_rb5d.cc
|
||||
Test_dwf_rb5d_LDADD=-lGrid
|
||||
|
||||
|
||||
Test_gamma_SOURCES=Test_gamma.cc
|
||||
Test_gamma_LDADD=-lGrid
|
||||
|
||||
|
@ -8,8 +8,20 @@ endif
|
||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
|
||||
AM_LDFLAGS = -L$(top_builddir)/lib
|
||||
|
||||
if USE_LAPACK
|
||||
AM_CXXFLAGS += -DUSE_LAPACK
|
||||
if USE_LAPACK_LIB
|
||||
#if test "X${ac_LAPACK}X" != XyesX
|
||||
AM_CXXFLAGS += -I$(ac_LAPACK)/include
|
||||
AM_LDFLAGS += -L$(ac_LAPACK)/lib
|
||||
#fi
|
||||
endif
|
||||
endif
|
||||
|
||||
if BUILD_ZMM
|
||||
bin_PROGRAMS=Test_zmm
|
||||
else
|
||||
bin_PROGRAMS=
|
||||
endif
|
||||
|
||||
include Make.inc
|
||||
|
@ -96,13 +96,13 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> peer(4);
|
||||
Complex tmp =cm;
|
||||
Integer index=real(tmp);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
}
|
||||
}}}}
|
||||
|
@ -132,7 +132,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> peer(4);
|
||||
Complex ctmp = cm;
|
||||
Integer index=real(ctmp);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
@ -140,7 +140,7 @@ int main (int argc, char ** argv)
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
@ -180,7 +180,7 @@ int main (int argc, char ** argv)
|
||||
std::vector<int> peer(4);
|
||||
Complex ctmp=cmeo;
|
||||
Integer index=real(ctmp);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
double nrm = abs(cmeo()()()-scm);
|
||||
if (nrm != 0) {
|
||||
@ -189,7 +189,7 @@ int main (int argc, char ** argv)
|
||||
<< cmeo()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exx=1;
|
||||
|
||||
@ -205,7 +205,7 @@ int main (int argc, char ** argv)
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Fine.CoorFromIndex(peer,index,latt_size);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exx=1;
|
||||
} else if (1) {
|
||||
|
223
tests/Test_cshift_red_black_rotate.cc
Normal file
223
tests/Test_cshift_red_black_rotate.cc
Normal file
@ -0,0 +1,223 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_cshift_red_black.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
int Nd = latt_size.size();
|
||||
std::vector<int> simd_layout( { vComplex::Nsimd(),1,1,1});
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::vector<int> mask(Nd,1);
|
||||
mask[0]=0;
|
||||
|
||||
GridCartesian Fine (latt_size,simd_layout,mpi_layout);
|
||||
GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1);
|
||||
|
||||
GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice();
|
||||
|
||||
LatticeComplex U(&Fine);
|
||||
LatticeComplex ShiftU(&Fine);
|
||||
LatticeComplex rbShiftU(&Fine);
|
||||
LatticeComplex Ue(&RBFine);
|
||||
LatticeComplex Uo(&RBFine);
|
||||
LatticeComplex ShiftUe(&RBFine);
|
||||
LatticeComplex ShiftUo(&RBFine);
|
||||
LatticeComplex lex(&Fine);
|
||||
lex=zero;
|
||||
Integer stride =1;
|
||||
{
|
||||
double nrm;
|
||||
LatticeComplex coor(&Fine);
|
||||
|
||||
for(int d=0;d<Nd;d++){
|
||||
// Integer i=10000;
|
||||
Integer i=0;
|
||||
LatticeCoordinate(coor,d);
|
||||
lex = lex + coor*stride+i;
|
||||
stride=stride*latt_size[d];
|
||||
}
|
||||
U=lex;
|
||||
}
|
||||
|
||||
pickCheckerboard(Even,Ue,U);
|
||||
pickCheckerboard(Odd,Uo,U);
|
||||
|
||||
// std::cout<<GridLogMessage << U<<std::endl;
|
||||
std::cout<<GridLogMessage << "Ue " <<norm2(Ue)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Uo " <<norm2(Uo)<<std::endl;
|
||||
|
||||
|
||||
TComplex cm;
|
||||
TComplex cmeo;
|
||||
for(int dir=0;dir<Nd;dir++){
|
||||
// if ( dir!=1 ) continue;
|
||||
for(int shift=0;shift<latt_size[dir];shift++){
|
||||
|
||||
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"Even grid"<<std::endl;
|
||||
ShiftUe = Cshift(Ue,dir,shift); // Shift everything cb by cb
|
||||
std::cout<<GridLogMessage << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"Odd grid"<<std::endl;
|
||||
ShiftUo = Cshift(Uo,dir,shift);
|
||||
std::cout<<GridLogMessage << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"Recombined Even/Odd grids"<<std::endl;
|
||||
setCheckerboard(rbShiftU,ShiftUe);
|
||||
setCheckerboard(rbShiftU,ShiftUo);
|
||||
std::cout<<GridLogMessage << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"Full grid shift"<<std::endl;
|
||||
ShiftU = Cshift(U,dir,shift); // Shift everything
|
||||
std::cout<<GridLogMessage << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
|
||||
|
||||
std::vector<int> coor(4);
|
||||
|
||||
std::cout<<GridLogMessage << "Checking the non-checkerboard shift"<<std::endl;
|
||||
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||
for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
|
||||
|
||||
peekSite(cm,ShiftU,coor);
|
||||
|
||||
///////// double nrm=norm2(U);
|
||||
|
||||
std::vector<int> scoor(coor);
|
||||
scoor[dir] = (scoor[dir]+shift)%latt_size[dir];
|
||||
|
||||
Integer slex = scoor[0]
|
||||
+ latt_size[0]*scoor[1]
|
||||
+ latt_size[0]*latt_size[1]*scoor[2]
|
||||
+ latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
|
||||
|
||||
Complex scm(slex);
|
||||
|
||||
double nrm = abs(scm-cm()()());
|
||||
std::vector<int> peer(4);
|
||||
Complex ctmp = cm;
|
||||
Integer index=real(ctmp);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}}}}
|
||||
|
||||
int exx=0;
|
||||
std::cout<<GridLogMessage << "Checking the checkerboard shift"<<std::endl;
|
||||
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||
for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
|
||||
|
||||
peekSite(cm,rbShiftU,coor);
|
||||
|
||||
Integer checkerboard = RBFine.CheckerBoard(coor);
|
||||
|
||||
// std::cout << " coor "<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] \n ";
|
||||
// std::cout << "shift "<< shift <<" dir "<<dir<< " checker board "<< checkerboard << " ";
|
||||
// std::cout << "Uo " << ShiftUo.checkerboard << " Ue "<<ShiftUe.checkerboard<<std::endl;
|
||||
if ( checkerboard == ShiftUo.checkerboard ) {
|
||||
peekSite(cmeo,ShiftUo,coor);
|
||||
} else {
|
||||
peekSite(cmeo,ShiftUe,coor);
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> scoor(coor);
|
||||
scoor[dir] = (scoor[dir]+shift)%latt_size[dir];
|
||||
|
||||
Integer slex = scoor[0]
|
||||
+ latt_size[0]*scoor[1]
|
||||
+ latt_size[0]*latt_size[1]*scoor[2]
|
||||
+ latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
|
||||
|
||||
Complex scm(slex);
|
||||
|
||||
std::vector<int> peer(4);
|
||||
Complex ctmp=cmeo;
|
||||
Integer index=real(ctmp);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
double nrm = abs(cmeo()()()-scm);
|
||||
if (nrm != 0) {
|
||||
std::cout<<"EOFAIL shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cmeo()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exx=1;
|
||||
|
||||
}
|
||||
|
||||
ctmp=cm;
|
||||
index=real(ctmp);
|
||||
nrm = abs(scm-cm()()());
|
||||
|
||||
if (nrm > 0){
|
||||
std::cout<<"FAIL shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cout<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cout<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
exx=1;
|
||||
} else if (1) {
|
||||
std::cout<<GridLogMessage<<"PASS shift "<< shift<<" in dir "<< dir
|
||||
<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
|
||||
<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
}
|
||||
}}}}
|
||||
if (exx) exit(-1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
125
tests/Test_cshift_rotate.cc
Normal file
125
tests/Test_cshift_rotate.cc
Normal file
@ -0,0 +1,125 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_cshift.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> latt_size = GridDefaultLatt();
|
||||
std::vector<int> simd_layout( { vComplex::Nsimd(),1,1,1});
|
||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG FineRNG(&Fine); FineRNG.SeedRandomDevice();
|
||||
|
||||
LatticeComplex U(&Fine);
|
||||
LatticeComplex ShiftU(&Fine);
|
||||
|
||||
LatticeComplex lex(&Fine);
|
||||
lex=zero;
|
||||
Integer stride =1;
|
||||
{
|
||||
double nrm;
|
||||
LatticeComplex coor(&Fine);
|
||||
|
||||
for(int d=0;d<4;d++){
|
||||
LatticeCoordinate(coor,d);
|
||||
lex = lex + coor*stride;
|
||||
stride=stride*latt_size[d];
|
||||
}
|
||||
U=lex;
|
||||
}
|
||||
|
||||
|
||||
TComplex cm;
|
||||
|
||||
for(int dir=0;dir<4;dir++){
|
||||
for(int shift=0;shift<latt_size[dir];shift++){
|
||||
if ( Fine.IsBoss() )
|
||||
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
|
||||
|
||||
ShiftU = Cshift(U,dir,shift); // Shift everything
|
||||
|
||||
/*
|
||||
std::cout << "U[0]" << U[0]<<std::endl;
|
||||
std::cout << "U[1]" << U[1]<<std::endl;
|
||||
std::cout << "ShiftU[0]" << ShiftU[0]<<std::endl;
|
||||
std::cout << "ShiftU[1]" << ShiftU[1]<<std::endl;
|
||||
*/
|
||||
std::vector<int> coor(4);
|
||||
|
||||
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||
for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
|
||||
|
||||
peekSite(cm,ShiftU,coor);
|
||||
|
||||
double nrm=norm2(U);
|
||||
|
||||
std::vector<int> scoor(coor);
|
||||
scoor[dir] = (scoor[dir]+shift)%latt_size[dir];
|
||||
|
||||
Integer slex = scoor[0]
|
||||
+ latt_size[0]*scoor[1]
|
||||
+ latt_size[0]*latt_size[1]*scoor[2]
|
||||
+ latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
|
||||
|
||||
Complex scm(slex);
|
||||
|
||||
nrm = abs(scm-cm()()());
|
||||
std::vector<int> peer(4);
|
||||
Complex tmp =cm;
|
||||
Integer index=real(tmp);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
index=real(scm);
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
}
|
||||
/*
|
||||
else {
|
||||
std::cerr<<"PASS shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
std::cerr<<"Got "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
}
|
||||
*/
|
||||
}}}}
|
||||
}
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -42,6 +42,8 @@ public:
|
||||
int, domaindecompose,
|
||||
int, domainsize,
|
||||
int, order,
|
||||
int, Ls,
|
||||
double, mq,
|
||||
double, lo,
|
||||
double, hi,
|
||||
int, steps);
|
||||
@ -263,11 +265,6 @@ public:
|
||||
resid = norm2(r) /norm2(src);
|
||||
std::cout << "SAP "<<i<<" resid "<<resid<<std::endl;
|
||||
|
||||
|
||||
// Npoly*outer*2 1/2 vol matmuls.
|
||||
// 71 iters => 20*71 = 1400 matmuls.
|
||||
// 2*71 = 140 comms.
|
||||
|
||||
// Even domain solve
|
||||
r= where(subset==(Integer)0,r,zz);
|
||||
_SmootherOperator.AdjOp(r,vec1);
|
||||
@ -332,7 +329,7 @@ public:
|
||||
CoarseVector Ctmp(_CoarseOperator.Grid());
|
||||
CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;
|
||||
|
||||
ConjugateGradient<CoarseVector> CG(1.0e-3,100000);
|
||||
ConjugateGradient<CoarseVector> CG(3.0e-3,100000);
|
||||
// ConjugateGradient<FineField> fCG(3.0e-2,1000);
|
||||
|
||||
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
|
||||
@ -345,14 +342,14 @@ public:
|
||||
|
||||
// Chebyshev<FineField> Cheby (0.5,70.0,30,InverseApproximation);
|
||||
// Chebyshev<FineField> ChebyAccu(0.5,70.0,30,InverseApproximation);
|
||||
Chebyshev<FineField> Cheby (2.0,70.0,15,InverseApproximation);
|
||||
Chebyshev<FineField> ChebyAccu(2.0,70.0,15,InverseApproximation);
|
||||
Chebyshev<FineField> Cheby (params.lo,params.hi,params.order,InverseApproximation);
|
||||
Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation);
|
||||
// Cheby.JacksonSmooth();
|
||||
// ChebyAccu.JacksonSmooth();
|
||||
|
||||
_Aggregates.ProjectToSubspace (Csrc,in);
|
||||
_Aggregates.PromoteFromSubspace(Csrc,out);
|
||||
std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
|
||||
// _Aggregates.ProjectToSubspace (Csrc,in);
|
||||
// _Aggregates.PromoteFromSubspace(Csrc,out);
|
||||
// std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
|
||||
|
||||
// ofstream fout("smoother");
|
||||
// Cheby.csv(fout);
|
||||
@ -479,7 +476,7 @@ int main (int argc, char ** argv)
|
||||
read(RD,"params",params);
|
||||
std::cout<<"Params: Order "<<params.order<<"["<<params.lo<<","<<params.hi<<"]"<< " steps "<<params.steps<<std::endl;
|
||||
|
||||
const int Ls=8;
|
||||
const int Ls=params.Ls;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
@ -490,10 +487,12 @@ int main (int argc, char ** argv)
|
||||
///////////////////////////////////////////////////
|
||||
// Construct a coarsened grid; utility for this?
|
||||
///////////////////////////////////////////////////
|
||||
const int block=2;
|
||||
std::vector<int> block ({2,2,2,2});
|
||||
const int nbasis= 32;
|
||||
|
||||
std::vector<int> clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/block;
|
||||
clatt[d] = clatt[d]/block[d];
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
@ -539,7 +538,7 @@ int main (int argc, char ** argv)
|
||||
// SU3::HotConfiguration(RNG4,Umu);
|
||||
// Umu=zero;
|
||||
|
||||
RealD mass=0.01;
|
||||
RealD mass=params.mq;
|
||||
RealD M5=1.8;
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
@ -548,9 +547,6 @@ int main (int argc, char ** argv)
|
||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
|
||||
const int nbasis = 32;
|
||||
// const int nbasis = 4;
|
||||
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||
@ -564,7 +560,8 @@ int main (int argc, char ** argv)
|
||||
assert ( (nbasis & 0x1)==0);
|
||||
int nb=nbasis/2;
|
||||
std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
|
||||
Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
|
||||
// Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
|
||||
Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
|
||||
for(int n=0;n<nb;n++){
|
||||
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||
std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
|
||||
@ -600,7 +597,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||
ConjugateGradient<CoarseVector> CG(1.0e-6,100000);
|
||||
CG(PosdefLdop,c_src,c_res);
|
||||
// CG(PosdefLdop,c_src,c_res);
|
||||
|
||||
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
// std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
|
||||
@ -625,17 +622,17 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
Precon.SmootherTest(src);
|
||||
// Precon.SmootherTest(src);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
PreconDD.SmootherTest(src);
|
||||
// PreconDD.SmootherTest(src);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
PreconDD.SAP(src,result);
|
||||
// PreconDD.SAP(src,result);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Unprec CG "<< std::endl;
|
||||
@ -663,18 +660,18 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
|
||||
result=zero;
|
||||
std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
||||
PGCRDD(HermIndefOp,src,result);
|
||||
// PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
|
||||
// result=zero;
|
||||
// std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
||||
// PGCRDD(HermIndefOp,src,result);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
// PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,128);
|
||||
// std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
||||
// result=zero;
|
||||
// PGCR(HermIndefOp,src,result);
|
||||
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
|
||||
std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
|
||||
result=zero;
|
||||
PGCR(HermIndefOp,src,result);
|
||||
|
||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||
std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
|
||||
|
@ -31,6 +31,10 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||
|
||||
RealD AllZero(RealD x){ return 0.;}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
@ -41,41 +45,56 @@ int main (int argc, char ** argv)
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n",UGrid,UrbGrid,FGrid,FrbGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeFermion src(FGrid); gaussian(RNG5,src);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4, Umu);
|
||||
SU3::TepidConfiguration(RNG4, Umu);
|
||||
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
RealD mob_b=1.5;
|
||||
// DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
GparityMobiusFermionD ::ImplParams params;
|
||||
std::vector<int> twists({1,1,1,0});
|
||||
params.twists = twists;
|
||||
GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
|
||||
|
||||
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
// MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
// SchurDiagTwoOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
|
||||
// SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
|
||||
|
||||
const int Nk = 30;
|
||||
const int Np = 10;
|
||||
const int Nstop = 30;
|
||||
const int Nk = 40;
|
||||
const int Np = 40;
|
||||
const int Nm = Nk+Np;
|
||||
const int MaxIt= 10000;
|
||||
RealD resid = 1.0e-8;
|
||||
|
||||
std::vector<double> Coeffs(1,1.0);
|
||||
Polynomial<LatticeFermion> PolyX(Coeffs);
|
||||
ImplicitlyRestartedLanczos<LatticeFermion> IRL(HermOp,PolyX,Nk,Nm,resid,MaxIt);
|
||||
std::vector<double> Coeffs { 0.,-1.};
|
||||
Polynomial<FermionField> PolyX(Coeffs);
|
||||
Chebyshev<FermionField> Cheb(0.2,5.,11);
|
||||
// ChebyshevLanczos<LatticeFermion> Cheb(9.,1.,0.,20);
|
||||
// Cheb.csv(std::cout);
|
||||
// exit(-24);
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(HermOp,Cheb,Nstop,Nk,Nm,resid,MaxIt);
|
||||
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<LatticeFermion> evec(Nm,FGrid);
|
||||
for(int i=0;i<Nm;i++){
|
||||
FermionField src(FrbGrid); gaussian(RNG5rb,src);
|
||||
std::vector<FermionField> evec(Nm,FrbGrid);
|
||||
for(int i=0;i<1;i++){
|
||||
std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl;
|
||||
};
|
||||
|
||||
|
138
tests/Test_dwf_rb5d.cc
Normal file
138
tests/Test_dwf_rb5d.cc
Normal file
@ -0,0 +1,138 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_even_odd.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
|
||||
|
||||
typedef WilsonFermion5D<WilsonImplR> WilsonFermion5D_OKR;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
|
||||
const int Ls=32;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
|
||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeFermion src (FGrid); random(RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
LatticeFermion ref(FGrid); ref=zero;
|
||||
LatticeFermion tmp(FGrid);
|
||||
LatticeFermion err(FGrid);
|
||||
|
||||
LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
// Only one non-zero (y)
|
||||
/*
|
||||
Umu=zero;
|
||||
for(int nn=0;nn<Nd;nn++){
|
||||
random(RNG4,U[nn]);
|
||||
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
|
||||
}
|
||||
*/
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
typename WilsonFermion5DR::ImplParams params;
|
||||
|
||||
WilsonFermion5DR Dw(1,Umu,*FGrid,*FrbGrid,*sUGrid,M5,params);
|
||||
|
||||
Dw.Dhop(src,result,0);
|
||||
|
||||
std::cout << "Norm src = "<<norm2(src)<<" Norm res = "<<norm2(result) << std::endl;
|
||||
|
||||
|
||||
|
||||
GridCartesian * FokGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FokrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
WilsonFermion5D_OKR Dok(Umu,*FokGrid,*FokrbGrid,*UGrid,*UrbGrid,M5,params);
|
||||
|
||||
LatticeFermion src_ok (FokGrid);
|
||||
LatticeFermion ref_ok(FokGrid);
|
||||
LatticeFermion result_ok(FokGrid);
|
||||
|
||||
|
||||
for(int lidx=0;lidx<FGrid->lSites();lidx++){
|
||||
std::vector<int> lcoor;
|
||||
FGrid->LocalIndexToLocalCoor(lidx,lcoor);
|
||||
|
||||
SpinColourVector siteSrc;
|
||||
|
||||
peekLocalSite(siteSrc,src,lcoor);
|
||||
pokeLocalSite(siteSrc,src_ok,lcoor);
|
||||
|
||||
peekLocalSite(siteSrc,result,lcoor);
|
||||
pokeLocalSite(siteSrc,result_ok,lcoor);
|
||||
}
|
||||
|
||||
Dok.Dhop(src_ok,ref_ok,0);
|
||||
|
||||
std::cout << "Reference = "<<norm2(src_ok)<<" res = "<<norm2(ref_ok) << std::endl;
|
||||
ref_ok = ref_ok - result_ok;
|
||||
std::cout << "Reference diff = "<<norm2(result_ok)<< std::endl;
|
||||
std::cout << "Reference diff = "<<norm2(ref_ok)<< std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -140,4 +140,18 @@ int main(int argc,char **argv)
|
||||
std::cout << "Loaded (txt) -----------------" << std::endl;
|
||||
std::cout << copy3 << std::endl << veccopy3 << std::endl;
|
||||
}
|
||||
|
||||
std::vector<int> iv = strToVec<int>("1 2 2 4");
|
||||
std::vector<std::string> sv = strToVec<std::string>("bli bla blu");
|
||||
|
||||
for (auto &e: iv)
|
||||
{
|
||||
std::cout << e << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
for (auto &e: sv)
|
||||
{
|
||||
std::cout << e << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
@ -69,7 +69,6 @@ public:
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
|
||||
std::string name(void) const { return std::string("timesI"); }
|
||||
};
|
||||
|
||||
class funcTimesMinusI {
|
||||
public:
|
||||
funcTimesMinusI() {};
|
||||
@ -97,6 +96,7 @@ public:
|
||||
// zeroit
|
||||
// permute
|
||||
|
||||
|
||||
class funcReduce {
|
||||
public:
|
||||
funcReduce() {};
|
||||
@ -145,7 +145,7 @@ void Tester(const functor &func)
|
||||
|
||||
int ok=0;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( abs(reference[i]-result[i])>0){
|
||||
if ( abs(reference[i]-result[i])>1.0e-7){
|
||||
std::cout<<GridLogMessage<< "*****" << std::endl;
|
||||
std::cout<<GridLogMessage<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||
ok++;
|
||||
@ -208,6 +208,100 @@ void ReductionTester(const functor &func)
|
||||
|
||||
|
||||
|
||||
class funcPermute {
|
||||
public:
|
||||
int n;
|
||||
funcPermute(int _n) { n=_n;};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { permute(rr,i1,n);}
|
||||
template<class scal> void apply(std::vector<scal> &rr,std::vector<scal> &in) const {
|
||||
int sz=in.size();
|
||||
int msk = sz>>(n+1);
|
||||
for(int i=0;i<sz;i++){
|
||||
rr[i] = in[ i^msk ];
|
||||
}
|
||||
}
|
||||
std::string name(void) const { return std::string("Permute"); }
|
||||
};
|
||||
class funcRotate {
|
||||
public:
|
||||
int n;
|
||||
funcRotate(int _n) { n=_n;};
|
||||
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr=rotate(i1,n);}
|
||||
template<class scal> void apply(std::vector<scal> &rr,std::vector<scal> &in) const {
|
||||
int sz = in.size();
|
||||
for(int i=0;i<sz;i++){
|
||||
rr[i] = in[(i+n)%sz];
|
||||
}
|
||||
}
|
||||
std::string name(void) const { return std::string("Rotate"); }
|
||||
};
|
||||
|
||||
|
||||
template<class scal, class vec,class functor >
|
||||
void PermTester(const functor &func)
|
||||
{
|
||||
GridSerialRNG sRNG;
|
||||
sRNG.SeedRandomDevice();
|
||||
|
||||
int Nsimd = vec::Nsimd();
|
||||
|
||||
std::vector<scal> input1(Nsimd);
|
||||
std::vector<scal> input2(Nsimd);
|
||||
std::vector<scal> result(Nsimd);
|
||||
std::vector<scal> reference(Nsimd);
|
||||
|
||||
std::vector<vec,alignedAllocator<vec> > buf(3);
|
||||
vec & v_input1 = buf[0];
|
||||
vec & v_input2 = buf[1];
|
||||
vec & v_result = buf[2];
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
random(sRNG,input1[i]);
|
||||
random(sRNG,input2[i]);
|
||||
random(sRNG,result[i]);
|
||||
}
|
||||
|
||||
merge<vec,scal>(v_input1,input1);
|
||||
merge<vec,scal>(v_input2,input2);
|
||||
merge<vec,scal>(v_result,result);
|
||||
|
||||
func(v_result,v_input1,v_input2);
|
||||
|
||||
func.apply(reference,input1);
|
||||
|
||||
extract<vec,scal>(v_result,result);
|
||||
std::cout<<GridLogMessage << " " << func.name() << " " <<func.n <<std::endl;
|
||||
|
||||
int ok=0;
|
||||
if (0) {
|
||||
std::cout<<GridLogMessage<< "*****" << std::endl;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
std::cout<< input1[i]<<" ";
|
||||
}
|
||||
std::cout <<std::endl;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
std::cout<< result[i]<<" ";
|
||||
}
|
||||
std::cout <<std::endl;
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
std::cout<< reference[i]<<" ";
|
||||
}
|
||||
std::cout <<std::endl;
|
||||
std::cout<<GridLogMessage<< "*****" << std::endl;
|
||||
}
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
if ( abs(reference[i]-result[i])>1.0e-7){
|
||||
std::cout<<GridLogMessage<< "*****" << std::endl;
|
||||
std::cout<<GridLogMessage<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
|
||||
ok++;
|
||||
}
|
||||
}
|
||||
if ( ok==0 ) {
|
||||
std::cout<<GridLogMessage << " OK!" <<std::endl;
|
||||
}
|
||||
assert(ok==0);
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
@ -235,6 +329,24 @@ int main (int argc, char ** argv)
|
||||
Tester<RealF,vRealF>(funcInnerProduct());
|
||||
ReductionTester<RealF,RealF,vRealF>(funcReduce());
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vRealF permutes "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
|
||||
// Log2 iteration
|
||||
for(int i=0;(1<<i)< vRealF::Nsimd();i++){
|
||||
PermTester<RealF,vRealF>(funcPermute(i));
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vRealF rotate "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
for(int r=0;r<vRealF::Nsimd();r++){
|
||||
PermTester<RealF,vRealF>(funcRotate(r));
|
||||
}
|
||||
|
||||
|
||||
std::cout << GridLogMessage <<"==================================="<< std::endl;
|
||||
std::cout << GridLogMessage <<"Testing vRealD "<<std::endl;
|
||||
std::cout << GridLogMessage <<"==================================="<< std::endl;
|
||||
@ -247,6 +359,25 @@ int main (int argc, char ** argv)
|
||||
Tester<RealD,vRealD>(funcInnerProduct());
|
||||
ReductionTester<RealD,RealD,vRealD>(funcReduce());
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vRealD permutes "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
|
||||
// Log2 iteration
|
||||
for(int i=0;(1<<i)< vRealD::Nsimd();i++){
|
||||
PermTester<RealD,vRealD>(funcPermute(i));
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vRealD rotate "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
for(int r=0;r<vRealD::Nsimd();r++){
|
||||
PermTester<RealD,vRealD>(funcRotate(r));
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::cout << GridLogMessage <<"==================================="<< std::endl;
|
||||
std::cout << GridLogMessage <<"Testing vComplexF "<<std::endl;
|
||||
std::cout << GridLogMessage <<"==================================="<< std::endl;
|
||||
@ -261,6 +392,23 @@ int main (int argc, char ** argv)
|
||||
Tester<ComplexF,vComplexF>(funcInnerProduct());
|
||||
ReductionTester<ComplexF,ComplexF,vComplexF>(funcReduce());
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vComplexF permutes "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
|
||||
// Log2 iteration
|
||||
for(int i=0;(1<<i)< vComplexF::Nsimd();i++){
|
||||
PermTester<ComplexF,vComplexF>(funcPermute(i));
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vComplexF rotate "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
for(int r=0;r<vComplexF::Nsimd();r++){
|
||||
PermTester<ComplexF,vComplexF>(funcRotate(r));
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vComplexD "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
@ -276,5 +424,23 @@ int main (int argc, char ** argv)
|
||||
Tester<ComplexD,vComplexD>(funcInnerProduct());
|
||||
ReductionTester<ComplexD,ComplexD,vComplexD>(funcReduce());
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vComplexD permutes "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
|
||||
// Log2 iteration
|
||||
for(int i=0;(1<<i)< vComplexD::Nsimd();i++){
|
||||
PermTester<ComplexD,vComplexD>(funcPermute(i));
|
||||
}
|
||||
|
||||
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
std::cout<<GridLogMessage << "Testing vComplexD rotate "<<std::endl;
|
||||
std::cout<<GridLogMessage << "==================================="<< std::endl;
|
||||
for(int r=0;r<vComplexD::Nsimd();r++){
|
||||
PermTester<ComplexD,vComplexD>(funcRotate(r));
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
*/
|
||||
|
||||
typedef CartesianStencil<vobj,vobj,SimpleCompressor<vobj> > Stencil;
|
||||
typedef CartesianStencil<vobj,vobj> Stencil;
|
||||
for(int dir=0;dir<4;dir++){
|
||||
for(int disp=0;disp<Fine._fdimensions[dir];disp++){
|
||||
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
|
||||
random(pRNG,scale);
|
||||
|
||||
scale = exp(-real(scale)*3.0);
|
||||
scale = exp(-Grid::real(scale)*3.0);
|
||||
std::cout << " True matrix \n"<< scale <<std::endl;
|
||||
}
|
||||
|
||||
|
@ -27,15 +27,24 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
#include <PerfCount.h>
|
||||
#include <simd/Avx512Asm.h>
|
||||
|
||||
|
||||
int main(int argc,char **argv)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#if 0
|
||||
#include <simd/Intel512wilson.h>
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
void ZmulF(void *ptr1,void *ptr2,void *ptr3);
|
||||
void Zmul(void *ptr1,void *ptr2,void *ptr3);
|
||||
void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3);
|
||||
void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3);
|
||||
void TimesIAvx512F(void *ptr1,void *ptr3);
|
||||
void TimesIAvx512(void *ptr1,void *ptr3);
|
||||
void TimesMinusIAvx512F(void *ptr1,void *ptr3);
|
||||
void TimesMinusIAvx512(void *ptr1,void *ptr3);
|
||||
|
||||
|
||||
|
||||
@ -63,50 +72,106 @@ int main(int argc,char **argv)
|
||||
|
||||
vColourMatrixD mat;
|
||||
vHalfSpinColourVectorD vec;
|
||||
vHalfSpinColourVectorD vec1;
|
||||
vHalfSpinColourVectorD vec2;
|
||||
vHalfSpinColourVectorD vec3;
|
||||
|
||||
vHalfSpinColourVectorD matvec;
|
||||
vHalfSpinColourVectorD ref;
|
||||
vComplexD err;
|
||||
|
||||
random(sRNG,vec1);
|
||||
vec1 = std::complex<double>(0.1,3.0);
|
||||
random(sRNG,vec2);
|
||||
vec2=2.0;
|
||||
random(sRNG,vec3);
|
||||
|
||||
//std::cout << "Zmul vec1"<<vec1<<" &vec1 "<<& vec1<<std::endl;
|
||||
//std::cout << "Zmul vec2"<<vec2<<" &vec2 "<<& vec2<<std::endl;
|
||||
//std::cout << "Zmul vec3"<<vec3<<" &vec3 "<<& vec3<<std::endl;
|
||||
for(int sp=0;sp<2;sp++){
|
||||
for(int co=0;co<3;co++){
|
||||
ref()(sp)(co) = vec1()(sp)(co)*vec2()(sp)(co);
|
||||
}}
|
||||
|
||||
Zmul((void *)&vec1,(void *)&vec2,(void *)&vec3);
|
||||
//std::cout << "Zmul vec3"<<vec3<<" &vec3 "<<& vec3<<std::endl;
|
||||
//std::cout << "Zmul \n\t ref "<<ref<<"\n\t vec3"<<vec3 <<std::endl;
|
||||
ref = ref - vec3;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<"Zmul diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
random(sRNG,mat);
|
||||
mat = zero;
|
||||
mat()()(0,0) = 1.0;
|
||||
random(sRNG,vec);
|
||||
|
||||
ref = mat*vec;
|
||||
|
||||
WilsonDslashAvx512((void *)&vec, (void *)&mat,(void *)&matvec);
|
||||
|
||||
//std::cout << ref <<std::endl;
|
||||
//std::cout << matvec<<std::endl;
|
||||
ref = ref - matvec;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<"Double SU3 x 2spin diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
vColourMatrixF matF;
|
||||
vHalfSpinColourVectorF vec1F;
|
||||
vHalfSpinColourVectorF vec2F;
|
||||
vHalfSpinColourVectorF vec3F;
|
||||
vHalfSpinColourVectorF vecF;
|
||||
vHalfSpinColourVectorF matvecF;
|
||||
vHalfSpinColourVectorF refF;
|
||||
vComplexF errF;
|
||||
|
||||
random(sRNG,matF);
|
||||
matF = zero;
|
||||
matF()()(0,0)=1.0;
|
||||
random(sRNG,vecF);
|
||||
|
||||
refF = matF*vecF;
|
||||
|
||||
WilsonDslashAvx512F((void *)&vecF, (void *)&matF,(void *)&matvecF);
|
||||
|
||||
//std::cout << refF <<std::endl;
|
||||
//std::cout << matvecF<<std::endl;
|
||||
|
||||
refF = refF-matvecF;
|
||||
errF = TensorRemove(innerProduct(refF,refF));
|
||||
std::cout <<"Single SU3 x 2spin diff "<< Reduce(errF)<<std::endl;
|
||||
|
||||
TimesIAvx512F((void *)&vecF,(void *)&matvecF);
|
||||
//std::cout << timesI(vecF)<<std::endl;
|
||||
//std::cout << matvecF<<std::endl;
|
||||
refF = timesI(vecF)-matvecF;
|
||||
errF = TensorRemove(innerProduct(refF,refF));
|
||||
std::cout <<" timesI single diff "<< Reduce(errF)<<std::endl;
|
||||
|
||||
TimesIAvx512((void *)&vec,(void *)&matvec);
|
||||
|
||||
//std::cout << timesI(vec)<<std::endl;
|
||||
//std::cout << matvec<<std::endl;
|
||||
|
||||
ref = timesI(vec)-matvec;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<" timesI double diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
TimesMinusIAvx512F((void *)&vecF,(void *)&matvecF);
|
||||
//std::cout << timesMinusI(vecF)<<std::endl;
|
||||
//std::cout << matvecF<<std::endl;
|
||||
refF = timesMinusI(vecF)-matvecF;
|
||||
errF = TensorRemove(innerProduct(refF,refF));
|
||||
std::cout <<" timesMinusI single diff "<< Reduce(errF)<<std::endl;
|
||||
|
||||
TimesMinusIAvx512((void *)&vec,(void *)&matvec);
|
||||
//std::cout << timesMinusI(vec)<<std::endl;
|
||||
//std::cout << matvec<<std::endl;
|
||||
|
||||
ref = timesMinusI(vec)-matvec;
|
||||
err = TensorRemove(innerProduct(ref,ref));
|
||||
std::cout <<" timesMinusI double diff "<< Reduce(err)<<std::endl;
|
||||
|
||||
|
||||
LatticeFermion src (FGrid);
|
||||
LatticeFermion tmp (FGrid);
|
||||
LatticeFermion srce(FrbGrid);
|
||||
|
||||
LatticeFermion resulto(FrbGrid); resulto=zero;
|
||||
@ -114,13 +179,14 @@ int main(int argc,char **argv)
|
||||
LatticeFermion diff(FrbGrid);
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
#if 1
|
||||
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
random(RNG5,src);
|
||||
#if 1
|
||||
random(RNG4,Umu);
|
||||
#else
|
||||
int mmu=3;
|
||||
int mmu=2;
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
@ -157,7 +223,7 @@ int main(int argc,char **argv)
|
||||
}
|
||||
t1=usecond();
|
||||
|
||||
|
||||
#if 1
|
||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
Dw.DhopOE(srce,resulta,0);
|
||||
PerformanceCounter Counter(i);
|
||||
@ -166,50 +232,119 @@ int main(int argc,char **argv)
|
||||
Counter.Stop();
|
||||
Counter.Report();
|
||||
}
|
||||
resulta = (-0.5) * resulta;
|
||||
#endif
|
||||
//resulta = (-0.5) * resulta;
|
||||
|
||||
std::cout<<GridLogMessage << "Called Asm Dw"<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(resulta)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops*ncall/(t1-t0)<<std::endl;
|
||||
diff = resulto-resulta;
|
||||
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
|
||||
|
||||
std::cout<<std::endl;
|
||||
#if 0
|
||||
std::cout<<"=========== result Grid ============="<<std::endl;
|
||||
std::cout<<std::endl;
|
||||
tmp = zero;
|
||||
setCheckerboard(tmp,resulto);
|
||||
std::cout<<tmp<<std::endl;
|
||||
std::cout<<std::endl;
|
||||
std::cout<<"=========== result ASM ============="<<std::endl;
|
||||
std::cout<<std::endl;
|
||||
tmp = zero;
|
||||
setCheckerboard(tmp,resulta);
|
||||
std::cout<<tmp<<std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef VLOAD
|
||||
#undef VSTORE
|
||||
#undef VMUL
|
||||
#undef VMADD
|
||||
#undef ZEND1
|
||||
#undef ZEND2
|
||||
#undef ZLOAD
|
||||
#undef ZMUL
|
||||
#undef ZMADD
|
||||
|
||||
#define VZERO(A) VZEROd(A)
|
||||
#define VTIMESI(A,B,C) VTIMESId(A,B,C)
|
||||
#define VTIMESMINUSI(A,B,C) VTIMESMINUSId(A,B,C)
|
||||
|
||||
#define VLOAD(OFF,PTR,DEST) VLOADd(OFF,PTR,DEST)
|
||||
#define VSTORE(OFF,PTR,SRC) VSTOREd(OFF,PTR,SRC)
|
||||
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULd(Uri,Uir,Chi,UChi,Z)
|
||||
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDd(Uri,Uir,Chi,UChi,Z)
|
||||
#define ZEND1(A,B,C) ZEND1d(A,B,C)
|
||||
#define ZEND2(A,B,C) ZEND2d(A,B,C)
|
||||
#define ZLOAD(A,B,C,D) ZLOADd(A,B,C,D)
|
||||
#define ZMUL(A,B,C,D,E) ZMULd(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDd(A,B,C,D,E)
|
||||
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#include <simd/Intel512double.h>
|
||||
|
||||
#define zz Z0
|
||||
|
||||
|
||||
void Zmul(void *ptr1,void *ptr2,void *ptr3)
|
||||
{
|
||||
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k6 " : : :);
|
||||
__asm__ ("mov $0x5555, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k7 " : : :);
|
||||
|
||||
#define CC result_00
|
||||
LOAD64(%r9,ptr1);
|
||||
LOAD64(%r8,ptr2);
|
||||
LOAD64(%r10,ptr3)
|
||||
__asm__ (
|
||||
VLOAD(0,%r8,CC)
|
||||
ZLOAD(0,%r9,Chi_00,Z0)
|
||||
ZMUL(Chi_00,Z0,CC,UChi_00,Z1)
|
||||
//VSTORE(0,%r10,UChi_00)
|
||||
//VSTORE(1,%r10,Z1)
|
||||
ZEND1(UChi_00,Z1,Z0)
|
||||
//VSTORE(2,%r10,UChi_00)
|
||||
ZEND2(UChi_00,Z1,Z0)
|
||||
//VSTORE(3,%r10,UChi_00)
|
||||
VSTORE(0,%r10,UChi_00)
|
||||
VLOAD(1,%r8,CC)
|
||||
ZLOAD(1,%r9,Chi_01,Z0)
|
||||
ZMUL(Chi_01,Z0,CC,UChi_01,Z1)
|
||||
ZEND1(UChi_01,Z1,Z0)
|
||||
ZEND2(UChi_01,Z1,Z0)
|
||||
VSTORE(1,%r10,UChi_01)
|
||||
VLOAD(2,%r8,CC)
|
||||
ZLOAD(2,%r9,Chi_02,Z0)
|
||||
ZMUL(Chi_02,Z0,CC,UChi_02,Z1)
|
||||
ZEND1(UChi_02,Z1,Z0)
|
||||
ZEND2(UChi_02,Z1,Z0)
|
||||
VSTORE(2,%r10,UChi_02)
|
||||
VLOAD(3,%r8,CC)
|
||||
ZLOAD(3,%r9,Chi_10,Z0)
|
||||
ZMUL(Chi_10,Z0,CC,UChi_10,Z1)
|
||||
ZEND1(UChi_10,Z1,Z0)
|
||||
ZEND2(UChi_10,Z1,Z0)
|
||||
VSTORE(3,%r10,UChi_10)
|
||||
VLOAD(4,%r8,CC)
|
||||
ZLOAD(4,%r9,Chi_11,Z0)
|
||||
ZMUL(Chi_11,Z0,CC,UChi_11,Z1)
|
||||
ZEND1(UChi_11,Z1,Z0)
|
||||
ZEND2(UChi_11,Z1,Z0)
|
||||
VSTORE(4,%r10,UChi_11)
|
||||
VLOAD(5,%r8,CC)
|
||||
ZLOAD(5,%r9,Chi_12,Z0)
|
||||
ZMUL(Chi_12,Z0,CC,UChi_12,Z1)
|
||||
ZEND1(UChi_12,Z1,Z0)
|
||||
ZEND2(UChi_12,Z1,Z0)
|
||||
VSTORE(5,%r10,UChi_12)
|
||||
);
|
||||
}
|
||||
void TimesMinusIAvx512(void *ptr1,void *ptr3)
|
||||
{
|
||||
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k6 " : : :);
|
||||
__asm__ ("mov $0x5555, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k7 " : : :);
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
|
||||
__asm__ (
|
||||
VZERO(zz)
|
||||
VTIMESMINUSI(Chi_00,UChi_00,zz)
|
||||
VTIMESMINUSI(Chi_01,UChi_01,zz)
|
||||
VTIMESMINUSI(Chi_02,UChi_02,zz)
|
||||
VTIMESMINUSI(Chi_10,UChi_10,zz)
|
||||
VTIMESMINUSI(Chi_11,UChi_11,zz)
|
||||
VTIMESMINUSI(Chi_12,UChi_12,zz)
|
||||
);
|
||||
|
||||
SAVE_UCHI(ptr3);
|
||||
}
|
||||
|
||||
void TimesIAvx512(void *ptr1,void *ptr3)
|
||||
{
|
||||
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
|
||||
__asm__ ("kmov %%eax, %%k6 " : : :);
|
||||
__asm__ ("knot %%k6, %%k7 " : : :);
|
||||
|
||||
__asm__ ("kmovw %%eax, %%k6 " : : :);
|
||||
__asm__ ("mov $0x5555, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k7 " : : :);
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
@ -252,41 +387,69 @@ void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3)
|
||||
|
||||
}
|
||||
|
||||
#undef VLOAD
|
||||
#undef VSTORE
|
||||
#undef VMUL
|
||||
#undef VMADD
|
||||
#undef ZEND1
|
||||
#undef ZEND2
|
||||
#undef ZLOAD
|
||||
#undef ZMUL
|
||||
#undef ZMADD
|
||||
#undef VZERO
|
||||
#undef VTIMESI
|
||||
#undef VTIMESI0
|
||||
#undef VTIMESI1
|
||||
#undef VTIMESI2
|
||||
#undef VTIMESMINUSI
|
||||
#undef ZMULMEM2SP
|
||||
#undef ZMADDMEM2SP
|
||||
#define VZERO(A) VZEROf(A)
|
||||
#define VMOV(A,B) VMOVf(A,B)
|
||||
#define VADD(A,B,C) VADDf(A,B,C)
|
||||
#define VSUB(A,B,C) VSUBf(A,B,C)
|
||||
#define VTIMESI(A,B,C) VTIMESIf(A,B,C)
|
||||
#define VTIMESMINUSI(A,B,C) VTIMESMINUSIf(A,B,C)
|
||||
#include <simd/Intel512single.h>
|
||||
|
||||
#define VLOAD(OFF,PTR,DEST) VLOADf(OFF,PTR,DEST)
|
||||
#define VSTORE(OFF,PTR,SRC) VSTOREf(OFF,PTR,SRC)
|
||||
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULf(Uri,Uir,Chi,UChi,Z)
|
||||
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDf(Uri,Uir,Chi,UChi,Z)
|
||||
#define ZEND1(A,B,C) ZEND1f(A,B,C)
|
||||
#define ZEND2(A,B,C) ZEND2f(A,B,C)
|
||||
#define ZLOAD(A,B,C,D) ZLOADf(A,B,C,D)
|
||||
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
|
||||
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
void ZmulF(void *ptr1,void *ptr2,void *ptr3)
|
||||
{
|
||||
__asm__ ("mov $0xAAAA, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k6 " : : :);
|
||||
__asm__ ("mov $0x5555, %%eax " : : :"%eax");
|
||||
__asm__ ("kmovw %%eax, %%k7 " : : :);
|
||||
MASK_REGS;
|
||||
ZLOAD(0,ptr1,Chi_00,Z0);
|
||||
ZLOAD(1,ptr1,Chi_01,Z1);
|
||||
ZLOAD(2,ptr1,Chi_02,Z2);
|
||||
ZLOAD(3,ptr1,Chi_10,Z3);
|
||||
ZLOAD(4,ptr1,Chi_11,Z4);
|
||||
ZLOAD(5,ptr1,Chi_12,Z5);
|
||||
|
||||
VLOAD(0,ptr2,Chi_20);
|
||||
VLOAD(1,ptr2,Chi_21);
|
||||
VLOAD(2,ptr2,Chi_22);
|
||||
VLOAD(3,ptr2,Chi_30);
|
||||
VLOAD(4,ptr2,Chi_31);
|
||||
VLOAD(5,ptr2,Chi_32);
|
||||
|
||||
ZMUL(Chi_00,Z0,Chi_20,UChi_00,UChi_20);
|
||||
ZMUL(Chi_01,Z1,Chi_21,UChi_01,UChi_21);
|
||||
ZMUL(Chi_02,Z2,Chi_22,UChi_02,UChi_22);
|
||||
ZMUL(Chi_10,Z3,Chi_23,UChi_10,UChi_30);
|
||||
ZMUL(Chi_11,Z4,Chi_24,UChi_11,UChi_31);
|
||||
ZMUL(Chi_12,Z5,Chi_25,UChi_12,UChi_32);
|
||||
|
||||
ZEND1(UChi_00,UChi_20,Z0);
|
||||
ZEND1(UChi_01,UChi_21,Z1);
|
||||
ZEND1(UChi_02,UChi_22,Z2);
|
||||
ZEND1(UChi_10,UChi_30,Z3);
|
||||
ZEND1(UChi_11,UChi_31,Z4);
|
||||
ZEND1(UChi_12,UChi_32,Z5);
|
||||
|
||||
ZEND2(UChi_00,UChi_20,Z0);
|
||||
ZEND2(UChi_01,UChi_21,Z1);
|
||||
ZEND2(UChi_02,UChi_22,Z2);
|
||||
ZEND2(UChi_10,UChi_30,Z3);
|
||||
ZEND2(UChi_11,UChi_31,Z4);
|
||||
ZEND2(UChi_12,UChi_32,Z5);
|
||||
|
||||
SAVE_UCHI(ptr3);
|
||||
}
|
||||
|
||||
void TimesMinusIAvx512F(void *ptr1,void *ptr3)
|
||||
{
|
||||
MASK_REGS;
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
__asm__ (
|
||||
VZERO(zz)
|
||||
VTIMESMINUSI(Chi_00,UChi_00,zz)
|
||||
VTIMESMINUSI(Chi_01,UChi_01,zz)
|
||||
VTIMESMINUSI(Chi_02,UChi_02,zz)
|
||||
VTIMESMINUSI(Chi_10,UChi_10,zz)
|
||||
VTIMESMINUSI(Chi_11,UChi_11,zz)
|
||||
VTIMESMINUSI(Chi_12,UChi_12,zz)
|
||||
);
|
||||
SAVE_UCHI(ptr3);
|
||||
}
|
||||
|
||||
void TimesIAvx512F(void *ptr1,void *ptr3)
|
||||
{
|
||||
@ -311,10 +474,12 @@ void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3)
|
||||
|
||||
LOAD_CHI(ptr1);
|
||||
|
||||
MULT_2SPIN(ptr2);
|
||||
MULT_ADDSUB_2SPIN(ptr2);
|
||||
//MULT_2SPIN(ptr2);
|
||||
|
||||
SAVE_UCHI(ptr3);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user