mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Merge branch 'develop' of github.com:paboyle/Grid into develop
This commit is contained in:
commit
f9c8e5c8ef
@ -775,7 +775,26 @@ public:
|
|||||||
for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
for(int p=0;p<npoint;p++) AcceleratorViewContainer[p].ViewClose();
|
||||||
}
|
}
|
||||||
|
|
||||||
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) :
|
CoarsenedMatrix(GridCartesian &CoarseGrid, int hermitian_=0) :
|
||||||
|
_grid(&CoarseGrid),
|
||||||
|
_cbgrid(new GridRedBlackCartesian(&CoarseGrid)),
|
||||||
|
geom(CoarseGrid._ndimension),
|
||||||
|
hermitian(hermitian_),
|
||||||
|
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||||
|
StencilEven(_cbgrid,geom.npoint,Even,geom.directions,geom.displacements,0),
|
||||||
|
StencilOdd(_cbgrid,geom.npoint,Odd,geom.directions,geom.displacements,0),
|
||||||
|
A(geom.npoint,&CoarseGrid),
|
||||||
|
Aeven(geom.npoint,_cbgrid),
|
||||||
|
Aodd(geom.npoint,_cbgrid),
|
||||||
|
AselfInv(&CoarseGrid),
|
||||||
|
AselfInvEven(_cbgrid),
|
||||||
|
AselfInvOdd(_cbgrid),
|
||||||
|
dag_factor(nbasis*nbasis)
|
||||||
|
{
|
||||||
|
fillFactor();
|
||||||
|
};
|
||||||
|
|
||||||
|
CoarsenedMatrix(GridCartesian &CoarseGrid, GridRedBlackCartesian &CoarseRBGrid, int hermitian_=0) :
|
||||||
|
|
||||||
_grid(&CoarseGrid),
|
_grid(&CoarseGrid),
|
||||||
_cbgrid(&CoarseRBGrid),
|
_cbgrid(&CoarseRBGrid),
|
||||||
@ -817,6 +836,8 @@ public:
|
|||||||
typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
|
typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
|
||||||
typedef typename Fobj::scalar_type scalar_type;
|
typedef typename Fobj::scalar_type scalar_type;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix "<< std::endl;
|
||||||
|
|
||||||
FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
|
FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
|
||||||
FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
|
FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
|
||||||
|
|
||||||
@ -847,11 +868,13 @@ public:
|
|||||||
|
|
||||||
CoarseScalar InnerProd(Grid());
|
CoarseScalar InnerProd(Grid());
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix Orthog "<< std::endl;
|
||||||
// Orthogonalise the subblocks over the basis
|
// Orthogonalise the subblocks over the basis
|
||||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||||
|
|
||||||
// Compute the matrix elements of linop between this orthonormal
|
// Compute the matrix elements of linop between this orthonormal
|
||||||
// set of vectors.
|
// set of vectors.
|
||||||
|
std::cout << GridLogMessage<< "CoarsenMatrix masks "<< std::endl;
|
||||||
int self_stencil=-1;
|
int self_stencil=-1;
|
||||||
for(int p=0;p<geom.npoint;p++)
|
for(int p=0;p<geom.npoint;p++)
|
||||||
{
|
{
|
||||||
@ -890,7 +913,7 @@ public:
|
|||||||
|
|
||||||
phi=Subspace.subspace[i];
|
phi=Subspace.subspace[i];
|
||||||
|
|
||||||
// std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
|
||||||
linop.OpDirAll(phi,Mphi_p);
|
linop.OpDirAll(phi,Mphi_p);
|
||||||
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
|
||||||
|
|
||||||
@ -919,6 +942,18 @@ public:
|
|||||||
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
autoView( A_self , A[self_stencil], AcceleratorWrite);
|
||||||
|
|
||||||
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
|
||||||
|
if ( hermitian && (disp==-1) ) {
|
||||||
|
for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
|
||||||
|
int dirp = geom.directions[pp];
|
||||||
|
int dispp = geom.displacements[pp];
|
||||||
|
if ( (dirp==dir) && (dispp==1) ){
|
||||||
|
auto sft = conjugate(Cshift(oZProj,dir,1));
|
||||||
|
autoView( sft_v , sft , AcceleratorWrite);
|
||||||
|
autoView( A_pp , A[pp], AcceleratorWrite);
|
||||||
|
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -957,33 +992,12 @@ public:
|
|||||||
}
|
}
|
||||||
if(hermitian) {
|
if(hermitian) {
|
||||||
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
|
||||||
ForceHermitian();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl;
|
InvertSelfStencilLink(); std::cout << GridLogMessage << "Coarse self link inverted" << std::endl;
|
||||||
FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl;
|
FillHalfCbs(); std::cout << GridLogMessage << "Coarse half checkerboards filled" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ForceHermitian(void) {
|
|
||||||
CoarseMatrix Diff (Grid());
|
|
||||||
for(int p=0;p<geom.npoint;p++){
|
|
||||||
int dir = geom.directions[p];
|
|
||||||
int disp = geom.displacements[p];
|
|
||||||
if(disp==-1) {
|
|
||||||
// Find the opposite link
|
|
||||||
for(int pp=0;pp<geom.npoint;pp++){
|
|
||||||
int dirp = geom.directions[pp];
|
|
||||||
int dispp = geom.displacements[pp];
|
|
||||||
if ( (dirp==dir) && (dispp==1) ){
|
|
||||||
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
|
|
||||||
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
|
|
||||||
A[pp] = adj(Cshift(A[p],dir,1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void InvertSelfStencilLink() {
|
void InvertSelfStencilLink() {
|
||||||
std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
|
std::cout << GridLogDebug << "CoarsenedMatrix::InvertSelfStencilLink" << std::endl;
|
||||||
int localVolume = Grid()->lSites();
|
int localVolume = Grid()->lSites();
|
||||||
|
@ -123,7 +123,7 @@ assert(GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) < 1.0e-5);
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Helper to fill out metadata
|
// Helper to fill out metadata
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
|
||||||
FieldMetaData &header,
|
FieldMetaData &header,
|
||||||
scidacRecord & _scidacRecord,
|
scidacRecord & _scidacRecord,
|
||||||
scidacFile & _scidacFile)
|
scidacFile & _scidacFile)
|
||||||
@ -619,12 +619,12 @@ class IldgWriter : public ScidacWriter {
|
|||||||
// Don't require scidac records EXCEPT checksum
|
// Don't require scidac records EXCEPT checksum
|
||||||
// Use Grid MetaData object if present.
|
// Use Grid MetaData object if present.
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
template <class vsimd>
|
template <class stats = PeriodicGaugeStatistics>
|
||||||
void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
|
void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,int sequence,std::string LFN,std::string description)
|
||||||
{
|
{
|
||||||
GridBase * grid = Umu.Grid();
|
GridBase * grid = Umu.Grid();
|
||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
typedef Lattice<vLorentzColourMatrixD> GaugeField;
|
||||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
typedef vLorentzColourMatrixD vobj;
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
@ -636,6 +636,9 @@ class IldgWriter : public ScidacWriter {
|
|||||||
|
|
||||||
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
|
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
|
||||||
|
|
||||||
|
stats Stats;
|
||||||
|
Stats(Umu,header);
|
||||||
|
|
||||||
std::string format = header.floating_point;
|
std::string format = header.floating_point;
|
||||||
header.ensemble_id = description;
|
header.ensemble_id = description;
|
||||||
header.ensemble_label = description;
|
header.ensemble_label = description;
|
||||||
@ -705,10 +708,10 @@ class IldgReader : public GridLimeReader {
|
|||||||
// Else use ILDG MetaData object if present.
|
// Else use ILDG MetaData object if present.
|
||||||
// Else use SciDAC MetaData object if present.
|
// Else use SciDAC MetaData object if present.
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
template <class vsimd>
|
template <class stats = PeriodicGaugeStatistics>
|
||||||
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
|
void readConfiguration(Lattice<vLorentzColourMatrixD> &Umu, FieldMetaData &FieldMetaData_) {
|
||||||
|
|
||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
typedef Lattice<vLorentzColourMatrixD > GaugeField;
|
||||||
typedef typename GaugeField::vector_object vobj;
|
typedef typename GaugeField::vector_object vobj;
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
@ -921,7 +924,8 @@ class IldgReader : public GridLimeReader {
|
|||||||
|
|
||||||
if ( found_FieldMetaData || found_usqcdInfo ) {
|
if ( found_FieldMetaData || found_usqcdInfo ) {
|
||||||
FieldMetaData checker;
|
FieldMetaData checker;
|
||||||
GaugeStatistics(Umu,checker);
|
stats Stats;
|
||||||
|
Stats(Umu,checker);
|
||||||
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
|
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
|
||||||
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
|
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
|
||||||
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
|
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
|
||||||
|
@ -176,29 +176,18 @@ template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMet
|
|||||||
GridMetaData(grid,header);
|
GridMetaData(grid,header);
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
}
|
}
|
||||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
|
template<class Impl>
|
||||||
|
class GaugeStatistics
|
||||||
{
|
{
|
||||||
// How to convert data precision etc...
|
public:
|
||||||
header.link_trace=WilsonLoops<PeriodicGimplF>::linkTrace(data);
|
void operator()(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
|
||||||
header.plaquette =WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
|
{
|
||||||
}
|
header.link_trace=WilsonLoops<Impl>::linkTrace(data);
|
||||||
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
|
header.plaquette =WilsonLoops<Impl>::avgPlaquette(data);
|
||||||
{
|
}
|
||||||
// How to convert data precision etc...
|
};
|
||||||
header.link_trace=WilsonLoops<PeriodicGimplD>::linkTrace(data);
|
typedef GaugeStatistics<PeriodicGimplD> PeriodicGaugeStatistics;
|
||||||
header.plaquette =WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
|
typedef GaugeStatistics<ConjugateGimplD> ConjugateGaugeStatistics;
|
||||||
}
|
|
||||||
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
|
|
||||||
{
|
|
||||||
|
|
||||||
GridBase *grid = field.Grid();
|
|
||||||
std::string format = getFormatString<vLorentzColourMatrixF>();
|
|
||||||
header.floating_point = format;
|
|
||||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
|
||||||
GridMetaData(grid,header);
|
|
||||||
GaugeStatistics(field,header);
|
|
||||||
MachineCharacteristics(header);
|
|
||||||
}
|
|
||||||
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
|
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
|
||||||
{
|
{
|
||||||
GridBase *grid = field.Grid();
|
GridBase *grid = field.Grid();
|
||||||
@ -206,7 +195,6 @@ template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzCo
|
|||||||
header.floating_point = format;
|
header.floating_point = format;
|
||||||
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
|
||||||
GridMetaData(grid,header);
|
GridMetaData(grid,header);
|
||||||
GaugeStatistics(field,header);
|
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,6 +40,8 @@ using namespace Grid;
|
|||||||
class NerscIO : public BinaryIO {
|
class NerscIO : public BinaryIO {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef Lattice<vLorentzColourMatrixD> GaugeField;
|
||||||
|
|
||||||
static inline void truncate(std::string file){
|
static inline void truncate(std::string file){
|
||||||
std::ofstream fout(file,std::ios::out);
|
std::ofstream fout(file,std::ios::out);
|
||||||
}
|
}
|
||||||
@ -129,12 +131,12 @@ public:
|
|||||||
// Now the meat: the object readers
|
// Now the meat: the object readers
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class vsimd>
|
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||||
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
static inline void readConfiguration(GaugeField &Umu,
|
||||||
FieldMetaData& header,
|
FieldMetaData& header,
|
||||||
std::string file)
|
std::string file,
|
||||||
|
GaugeStats GaugeStatisticsCalculator=GaugeStats())
|
||||||
{
|
{
|
||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
|
||||||
|
|
||||||
GridBase *grid = Umu.Grid();
|
GridBase *grid = Umu.Grid();
|
||||||
uint64_t offset = readHeader(file,Umu.Grid(),header);
|
uint64_t offset = readHeader(file,Umu.Grid(),header);
|
||||||
@ -153,23 +155,23 @@ public:
|
|||||||
// munger is a function of <floating point, Real, data_type>
|
// munger is a function of <floating point, Real, data_type>
|
||||||
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
|
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3F>
|
||||||
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
|
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
|
BinaryIO::readLatticeObject<vLorentzColourMatrixD, LorentzColour2x3D>
|
||||||
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
|
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
}
|
}
|
||||||
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
|
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
|
||||||
if ( ieee32 || ieee32big ) {
|
if ( ieee32 || ieee32big ) {
|
||||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
|
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixF>
|
||||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
|
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
}
|
}
|
||||||
if ( ieee64 || ieee64big ) {
|
if ( ieee64 || ieee64big ) {
|
||||||
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
|
BinaryIO::readLatticeObject<vLorentzColourMatrixD,LorentzColourMatrixD>
|
||||||
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
|
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
}
|
}
|
||||||
@ -177,7 +179,7 @@ public:
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
GaugeStatistics(Umu,clone);
|
GaugeStats Stats; Stats(Umu,clone);
|
||||||
|
|
||||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
|
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
|
||||||
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
|
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
|
||||||
@ -203,15 +205,13 @@ public:
|
|||||||
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vsimd>
|
template<class GaugeStats=PeriodicGaugeStatistics>
|
||||||
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
|
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
|
||||||
std::string file,
|
std::string file,
|
||||||
int two_row,
|
int two_row,
|
||||||
int bits32)
|
int bits32)
|
||||||
{
|
{
|
||||||
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
|
typedef vLorentzColourMatrixD vobj;
|
||||||
|
|
||||||
typedef iLorentzColourMatrix<vsimd> vobj;
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
|
||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
@ -229,7 +229,7 @@ public:
|
|||||||
|
|
||||||
GridMetaData(grid,header);
|
GridMetaData(grid,header);
|
||||||
assert(header.nd==4);
|
assert(header.nd==4);
|
||||||
GaugeStatistics(Umu,header);
|
GaugeStats Stats; Stats(Umu,header);
|
||||||
MachineCharacteristics(header);
|
MachineCharacteristics(header);
|
||||||
|
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
@ -238,19 +238,19 @@ public:
|
|||||||
header.floating_point = std::string("IEEE64BIG");
|
header.floating_point = std::string("IEEE64BIG");
|
||||||
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
header.data_type = std::string("4D_SU3_GAUGE_3x3");
|
||||||
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
GaugeSimpleUnmunger<fobj3D,sobj> munge;
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
truncate(file);
|
truncate(file);
|
||||||
offset = writeHeader(header,file);
|
offset = writeHeader(header,file);
|
||||||
}
|
}
|
||||||
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
grid->Broadcast(0,(void *)&offset,sizeof(offset));
|
||||||
|
|
||||||
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
|
||||||
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
|
||||||
nersc_csum,scidac_csuma,scidac_csumb);
|
nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
header.checksum = nersc_csum;
|
header.checksum = nersc_csum;
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
writeHeader(header,file);
|
writeHeader(header,file);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "
|
||||||
<<std::hex<<header.checksum
|
<<std::hex<<header.checksum
|
||||||
|
@ -154,7 +154,7 @@ public:
|
|||||||
grid->Barrier(); timer.Stop();
|
grid->Barrier(); timer.Stop();
|
||||||
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
|
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
|
||||||
|
|
||||||
GaugeStatistics(Umu, clone);
|
PeriodicGaugeStatistics Stats; Stats(Umu, clone);
|
||||||
|
|
||||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||||
|
|
||||||
|
@ -208,7 +208,7 @@ public:
|
|||||||
|
|
||||||
FieldMetaData clone(header);
|
FieldMetaData clone(header);
|
||||||
|
|
||||||
GaugeStatistics(Umu, clone);
|
PeriodicGaugeStatistics Stats; Stats(Umu, clone);
|
||||||
|
|
||||||
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
|
||||||
|
|
||||||
|
@ -80,6 +80,13 @@ template<typename T> struct isSpinor {
|
|||||||
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
|
||||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
|
||||||
|
|
||||||
|
const int CoarseIndex = 4;
|
||||||
|
template<typename T> struct isCoarsened {
|
||||||
|
static constexpr bool value = (CoarseIndex<=T::TensorLevel);
|
||||||
|
};
|
||||||
|
template <typename T> using IfCoarsened = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
|
||||||
|
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
|
||||||
|
|
||||||
// ChrisK very keen to add extra space for Gparity doubling.
|
// ChrisK very keen to add extra space for Gparity doubling.
|
||||||
//
|
//
|
||||||
// Also add domain wall index, in a way where Wilson operator
|
// Also add domain wall index, in a way where Wilson operator
|
||||||
|
@ -642,7 +642,7 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
|||||||
Current curr_type,
|
Current curr_type,
|
||||||
unsigned int mu)
|
unsigned int mu)
|
||||||
{
|
{
|
||||||
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
|
#if (!defined(GRID_HIP))
|
||||||
Gamma::Algebra Gmu [] = {
|
Gamma::Algebra Gmu [] = {
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaX,
|
||||||
Gamma::Algebra::GammaY,
|
Gamma::Algebra::GammaY,
|
||||||
@ -826,7 +826,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
|
#if (!defined(GRID_HIP))
|
||||||
int tshift = (mu == Nd-1) ? 1 : 0;
|
int tshift = (mu == Nd-1) ? 1 : 0;
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
// GENERAL CAYLEY CASE
|
// GENERAL CAYLEY CASE
|
||||||
|
@ -38,9 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
// undefine everything related to kernels
|
// undefine everything related to kernels
|
||||||
#include <simd/Fujitsu_A64FX_undef.h>
|
#include <simd/Fujitsu_A64FX_undef.h>
|
||||||
|
|
||||||
// enable A64FX body
|
|
||||||
#define WILSONKERNELSASMBODYA64FX
|
|
||||||
//#pragma message("A64FX Dslash: WilsonKernelsAsmBodyA64FX.h")
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// If we are A64FX specialise the single precision routine
|
// If we are A64FX specialise the single precision routine
|
||||||
@ -63,119 +60,89 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
#define INTERIOR_AND_EXTERIOR
|
#define INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#define EXTERIOR
|
#define EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
@ -185,119 +152,89 @@ WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldV
|
|||||||
#define INTERIOR_AND_EXTERIOR
|
#define INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#define EXTERIOR
|
#define EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// undefine
|
// undefine
|
||||||
@ -330,119 +267,89 @@ WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFie
|
|||||||
#define INTERIOR_AND_EXTERIOR
|
#define INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#define EXTERIOR
|
#define EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// XYZT vectorised, dag Kernel, double
|
// XYZT vectorised, dag Kernel, double
|
||||||
@ -451,124 +358,93 @@ WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldV
|
|||||||
#define INTERIOR_AND_EXTERIOR
|
#define INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#define INTERIOR
|
#define INTERIOR
|
||||||
#undef EXTERIOR
|
#undef EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef INTERIOR_AND_EXTERIOR
|
#undef INTERIOR_AND_EXTERIOR
|
||||||
#undef INTERIOR
|
#undef INTERIOR
|
||||||
#define EXTERIOR
|
#define EXTERIOR
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
|
||||||
template<> void
|
template<> void
|
||||||
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
#if defined (WILSONKERNELSASMBODYA64FX)
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
|
||||||
#else
|
|
||||||
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// undefs
|
// undefs
|
||||||
#undef WILSONKERNELSASMBODYA64FX
|
|
||||||
#include <simd/Fujitsu_A64FX_undef.h>
|
#include <simd/Fujitsu_A64FX_undef.h>
|
||||||
|
|
||||||
#endif //A64FXASM
|
#endif //A64FXASM
|
||||||
|
@ -25,6 +25,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
|
// GCC 10 messes up SVE instruction scheduling using -O3, but
|
||||||
|
// -O3 -fno-schedule-insns -fno-schedule-insns2 does wonders
|
||||||
|
// performance now is better than armclang 20.2
|
||||||
|
|
||||||
#ifdef KERNEL_DAG
|
#ifdef KERNEL_DAG
|
||||||
#define DIR0_PROJ XP_PROJ
|
#define DIR0_PROJ XP_PROJ
|
||||||
#define DIR1_PROJ YP_PROJ
|
#define DIR1_PROJ YP_PROJ
|
||||||
@ -97,7 +102,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
PROJ; \
|
PROJ; \
|
||||||
MAYBEPERM(PERMUTE_DIR,perm); \
|
MAYBEPERM(PERMUTE_DIR,perm); \
|
||||||
} else { \
|
} else { \
|
||||||
LOAD_CHI(base); \
|
LOAD_CHI(base); \
|
||||||
} \
|
} \
|
||||||
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
||||||
MULT_2SPIN_1(Dir); \
|
MULT_2SPIN_1(Dir); \
|
||||||
@ -110,6 +115,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
} \
|
} \
|
||||||
RECON; \
|
RECON; \
|
||||||
|
|
||||||
|
/*
|
||||||
|
NB: picking PREFETCH_GAUGE_L2(Dir+4); here results in performance penalty
|
||||||
|
though I expected that it would improve on performance
|
||||||
|
*/
|
||||||
|
|
||||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||||
PREFETCH1_CHIMU(base); \
|
PREFETCH1_CHIMU(base); \
|
||||||
@ -126,73 +136,63 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
|
|
||||||
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||||
basep = st.GetPFInfo(nent,plocal); nent++; \
|
basep = st.GetPFInfo(nent,plocal); nent++; \
|
||||||
if ( local ) { \
|
if ( local ) { \
|
||||||
LOAD_CHIMU(base); \
|
LOAD_CHIMU(base); \
|
||||||
LOAD_TABLE(PERMUTE_DIR); \
|
LOAD_TABLE(PERMUTE_DIR); \
|
||||||
PROJ; \
|
PROJ; \
|
||||||
MAYBEPERM(PERMUTE_DIR,perm); \
|
MAYBEPERM(PERMUTE_DIR,perm); \
|
||||||
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
|
}else if ( st.same_node[Dir] ) {LOAD_CHI(base);} \
|
||||||
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
if ( local || st.same_node[Dir] ) { \
|
||||||
if ( local || st.same_node[Dir] ) { \
|
MULT_2SPIN_1(Dir); \
|
||||||
MULT_2SPIN_1(Dir); \
|
MULT_2SPIN_2; \
|
||||||
PREFETCH_CHIMU(base); \
|
RECON; \
|
||||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
} \
|
||||||
MULT_2SPIN_2; \
|
base = st.GetInfo(ptype,local,perm,NxtDir,ent,plocal); ent++; \
|
||||||
if (s == 0) { \
|
PREFETCH_CHIMU(base); \
|
||||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
PREFETCH_CHIMU_L2(basep); \
|
||||||
} \
|
|
||||||
RECON; \
|
|
||||||
PREFETCH_CHIMU_L2(basep); \
|
|
||||||
} else { PREFETCH_CHIMU(base); } \
|
|
||||||
|
|
||||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||||
PREFETCH1_CHIMU(base); \
|
PREFETCH1_CHIMU(base); \
|
||||||
|
{ ZERO_PSI; } \
|
||||||
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
|
ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON)
|
||||||
|
|
||||||
#define RESULT(base,basep) SAVE_RESULT(base,basep);
|
#define RESULT(base,basep) SAVE_RESULT(base,basep);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Post comms kernel
|
// Post comms kernel
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
#ifdef EXTERIOR
|
#ifdef EXTERIOR
|
||||||
|
|
||||||
|
|
||||||
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
#define ASM_LEG(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||||
LOAD_CHI(base); \
|
LOAD_CHI(base); \
|
||||||
MULT_2SPIN_1(Dir); \
|
MULT_2SPIN_1(Dir); \
|
||||||
PREFETCH_CHIMU(base); \
|
|
||||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
|
||||||
MULT_2SPIN_2; \
|
MULT_2SPIN_2; \
|
||||||
if (s == 0) { \
|
RECON; \
|
||||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
nmu++; \
|
||||||
} \
|
|
||||||
RECON; \
|
|
||||||
nmu++; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
#define ASM_LEG_XP(Dir,NxtDir,PERMUTE_DIR,PROJ,RECON) \
|
||||||
nmu=0; \
|
nmu=0; \
|
||||||
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++;\
|
{ ZERO_PSI;} \
|
||||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
base = st.GetInfo(ptype,local,perm,Dir,ent,plocal); ent++; \
|
||||||
LOAD_CHI(base); \
|
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||||
|
LOAD_CHI(base); \
|
||||||
MULT_2SPIN_1(Dir); \
|
MULT_2SPIN_1(Dir); \
|
||||||
PREFETCH_CHIMU(base); \
|
|
||||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
|
||||||
MULT_2SPIN_2; \
|
MULT_2SPIN_2; \
|
||||||
if (s == 0) { \
|
RECON; \
|
||||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
nmu++; \
|
||||||
} \
|
|
||||||
RECON; \
|
|
||||||
nmu++; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
|
#define RESULT(base,basep) if (nmu){ ADD_RESULT(base,base);}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
int nmu;
|
int nmu;
|
||||||
int local,perm, ptype;
|
int local,perm, ptype;
|
||||||
@ -209,7 +209,6 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
||||||
// int sUn=lo.Reorder(ssn);
|
// int sUn=lo.Reorder(ssn);
|
||||||
int sUn=ssn;
|
int sUn=ssn;
|
||||||
LOCK_GAUGE(0);
|
|
||||||
#else
|
#else
|
||||||
int sU =ssU;
|
int sU =ssU;
|
||||||
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
||||||
@ -295,6 +294,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
std::cout << "----------------------------------------------------" << std::endl;
|
std::cout << "----------------------------------------------------" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// DC ZVA test
|
||||||
|
// { uint64_t basestore = (uint64_t)&out[ss];
|
||||||
|
// PREFETCH_RESULT_L2_STORE(basestore); }
|
||||||
|
|
||||||
|
|
||||||
ASM_LEG(Ym,Zm,PERMUTE_DIR2,DIR5_PROJ,DIR5_RECON);
|
ASM_LEG(Ym,Zm,PERMUTE_DIR2,DIR5_PROJ,DIR5_RECON);
|
||||||
|
|
||||||
#ifdef SHOW
|
#ifdef SHOW
|
||||||
@ -308,6 +312,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
std::cout << "----------------------------------------------------" << std::endl;
|
std::cout << "----------------------------------------------------" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// DC ZVA test
|
||||||
|
//{ uint64_t basestore = (uint64_t)&out[ss];
|
||||||
|
// PREFETCH_RESULT_L2_STORE(basestore); }
|
||||||
|
|
||||||
|
|
||||||
ASM_LEG(Zm,Tm,PERMUTE_DIR1,DIR6_PROJ,DIR6_RECON);
|
ASM_LEG(Zm,Tm,PERMUTE_DIR1,DIR6_PROJ,DIR6_RECON);
|
||||||
|
|
||||||
#ifdef SHOW
|
#ifdef SHOW
|
||||||
@ -321,6 +330,11 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
std::cout << "----------------------------------------------------" << std::endl;
|
std::cout << "----------------------------------------------------" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// DC ZVA test
|
||||||
|
//{ uint64_t basestore = (uint64_t)&out[ss];
|
||||||
|
// PREFETCH_RESULT_L2_STORE(basestore); }
|
||||||
|
|
||||||
|
|
||||||
ASM_LEG(Tm,Xp,PERMUTE_DIR0,DIR7_PROJ,DIR7_RECON);
|
ASM_LEG(Tm,Xp,PERMUTE_DIR0,DIR7_PROJ,DIR7_RECON);
|
||||||
|
|
||||||
#ifdef SHOW
|
#ifdef SHOW
|
||||||
@ -341,6 +355,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
|||||||
base = (uint64_t) &out[ss];
|
base = (uint64_t) &out[ss];
|
||||||
basep= st.GetPFInfo(nent,plocal); ent++;
|
basep= st.GetPFInfo(nent,plocal); ent++;
|
||||||
basep = (uint64_t) &out[ssn];
|
basep = (uint64_t) &out[ssn];
|
||||||
|
//PREFETCH_RESULT_L1_STORE(base);
|
||||||
RESULT(base,basep);
|
RESULT(base,basep);
|
||||||
|
|
||||||
#ifdef SHOW
|
#ifdef SHOW
|
||||||
|
38
Grid/qcd/action/gauge/Gauge.cc
Normal file
38
Grid/qcd/action/gauge/Gauge.cc
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/gauge/Gauge.cc
|
||||||
|
|
||||||
|
Copyright (C) 2020
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/qcd/action/fermion/FermionCore.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
std::vector<int> ConjugateGaugeImplBase::_conjDirs;
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
@ -59,14 +59,14 @@ public:
|
|||||||
}
|
}
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
||||||
return Cshift(adj(Link), mu, -1);
|
return PeriodicBC::CovShiftIdentityBackward(Link, mu);
|
||||||
}
|
}
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
||||||
return Link;
|
return PeriodicBC::CovShiftIdentityForward(Link,mu);
|
||||||
}
|
}
|
||||||
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
||||||
return Cshift(Link, mu, 1);
|
return PeriodicBC::ShiftStaple(Link,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool isPeriodicGaugeField(void) { return true; }
|
static inline bool isPeriodicGaugeField(void) { return true; }
|
||||||
@ -74,7 +74,13 @@ public:
|
|||||||
|
|
||||||
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
// Composition with smeared link, bc's etc.. probably need multiple inheritance
|
||||||
// Variable precision "S" and variable Nc
|
// Variable precision "S" and variable Nc
|
||||||
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes {
|
class ConjugateGaugeImplBase {
|
||||||
|
protected:
|
||||||
|
static std::vector<int> _conjDirs;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes, ConjugateGaugeImplBase {
|
||||||
|
private:
|
||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(GimplTypes);
|
INHERIT_GIMPL_TYPES(GimplTypes);
|
||||||
|
|
||||||
@ -84,47 +90,56 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template <class covariant>
|
template <class covariant>
|
||||||
static Lattice<covariant> CovShiftForward(const GaugeLinkField &Link, int mu,
|
static Lattice<covariant> CovShiftForward(const GaugeLinkField &Link, int mu,
|
||||||
const Lattice<covariant> &field) {
|
const Lattice<covariant> &field)
|
||||||
return ConjugateBC::CovShiftForward(Link, mu, field);
|
{
|
||||||
|
assert(_conjDirs.size() == Nd);
|
||||||
|
if(_conjDirs[mu])
|
||||||
|
return ConjugateBC::CovShiftForward(Link, mu, field);
|
||||||
|
else
|
||||||
|
return PeriodicBC::CovShiftForward(Link, mu, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class covariant>
|
template <class covariant>
|
||||||
static Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,
|
static Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,
|
||||||
const Lattice<covariant> &field) {
|
const Lattice<covariant> &field)
|
||||||
return ConjugateBC::CovShiftBackward(Link, mu, field);
|
{
|
||||||
|
assert(_conjDirs.size() == Nd);
|
||||||
|
if(_conjDirs[mu])
|
||||||
|
return ConjugateBC::CovShiftBackward(Link, mu, field);
|
||||||
|
else
|
||||||
|
return PeriodicBC::CovShiftBackward(Link, mu, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu)
|
||||||
GridBase *grid = Link.Grid();
|
{
|
||||||
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
assert(_conjDirs.size() == Nd);
|
||||||
|
if(_conjDirs[mu])
|
||||||
Lattice<iScalar<vInteger>> coor(grid);
|
return ConjugateBC::CovShiftIdentityBackward(Link, mu);
|
||||||
LatticeCoordinate(coor, mu);
|
else
|
||||||
|
return PeriodicBC::CovShiftIdentityBackward(Link, mu);
|
||||||
GaugeLinkField tmp(grid);
|
|
||||||
tmp = adj(Link);
|
|
||||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
|
||||||
return Cshift(tmp, mu, -1); // moves towards positive mu
|
|
||||||
}
|
}
|
||||||
static inline GaugeLinkField
|
static inline GaugeLinkField
|
||||||
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
|
CovShiftIdentityForward(const GaugeLinkField &Link, int mu)
|
||||||
return Link;
|
{
|
||||||
|
assert(_conjDirs.size() == Nd);
|
||||||
|
if(_conjDirs[mu])
|
||||||
|
return ConjugateBC::CovShiftIdentityForward(Link,mu);
|
||||||
|
else
|
||||||
|
return PeriodicBC::CovShiftIdentityForward(Link,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
|
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu)
|
||||||
GridBase *grid = Link.Grid();
|
{
|
||||||
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
assert(_conjDirs.size() == Nd);
|
||||||
|
if(_conjDirs[mu])
|
||||||
Lattice<iScalar<vInteger>> coor(grid);
|
return ConjugateBC::ShiftStaple(Link,mu);
|
||||||
LatticeCoordinate(coor, mu);
|
else
|
||||||
|
return PeriodicBC::ShiftStaple(Link,mu);
|
||||||
GaugeLinkField tmp(grid);
|
|
||||||
tmp = Cshift(Link, mu, 1);
|
|
||||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
|
||||||
return tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void setDirections(std::vector<int> &conjDirs) { _conjDirs=conjDirs; }
|
||||||
|
static inline std::vector<int> getDirections(void) { return _conjDirs; }
|
||||||
static inline bool isPeriodicGaugeField(void) { return false; }
|
static inline bool isPeriodicGaugeField(void) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ public:
|
|||||||
conf_file = os.str();
|
conf_file = os.str();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual ~BaseHmcCheckpointer(){};
|
||||||
void check_filename(const std::string &filename){
|
void check_filename(const std::string &filename){
|
||||||
std::ifstream f(filename.c_str());
|
std::ifstream f(filename.c_str());
|
||||||
if(!f.good()){
|
if(!f.good()){
|
||||||
@ -82,7 +82,6 @@ public:
|
|||||||
abort();
|
abort();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void initialize(const CheckpointerParameters &Params) = 0;
|
virtual void initialize(const CheckpointerParameters &Params) = 0;
|
||||||
|
|
||||||
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
|
virtual void CheckpointRestore(int traj, typename Impl::Field &U,
|
||||||
|
@ -45,6 +45,7 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Implementation);
|
INHERIT_GIMPL_TYPES(Implementation);
|
||||||
|
typedef GaugeStatistics<Implementation> GaugeStats;
|
||||||
|
|
||||||
ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||||
|
|
||||||
@ -78,7 +79,7 @@ public:
|
|||||||
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
|
||||||
IldgWriter _IldgWriter(grid->IsBoss());
|
IldgWriter _IldgWriter(grid->IsBoss());
|
||||||
_IldgWriter.open(config);
|
_IldgWriter.open(config);
|
||||||
_IldgWriter.writeConfiguration(U, traj, config, config);
|
_IldgWriter.writeConfiguration<GaugeStats>(U, traj, config, config);
|
||||||
_IldgWriter.close();
|
_IldgWriter.close();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
|
std::cout << GridLogMessage << "Written ILDG Configuration on " << config
|
||||||
@ -105,7 +106,7 @@ public:
|
|||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
IldgReader _IldgReader;
|
IldgReader _IldgReader;
|
||||||
_IldgReader.open(config);
|
_IldgReader.open(config);
|
||||||
_IldgReader.readConfiguration(U,header); // format from the header
|
_IldgReader.readConfiguration<GaugeStats>(U,header); // format from the header
|
||||||
_IldgReader.close();
|
_IldgReader.close();
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Read ILDG Configuration from " << config
|
std::cout << GridLogMessage << "Read ILDG Configuration from " << config
|
||||||
|
@ -43,7 +43,8 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Gimpl); // only for gauge configurations
|
INHERIT_GIMPL_TYPES(Gimpl); // only for gauge configurations
|
||||||
|
typedef GaugeStatistics<Gimpl> GaugeStats;
|
||||||
|
|
||||||
NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); }
|
||||||
|
|
||||||
void initialize(const CheckpointerParameters &Params_) {
|
void initialize(const CheckpointerParameters &Params_) {
|
||||||
@ -60,7 +61,7 @@ public:
|
|||||||
int precision32 = 1;
|
int precision32 = 1;
|
||||||
int tworow = 0;
|
int tworow = 0;
|
||||||
NerscIO::writeRNGState(sRNG, pRNG, rng);
|
NerscIO::writeRNGState(sRNG, pRNG, rng);
|
||||||
NerscIO::writeConfiguration(U, config, tworow, precision32);
|
NerscIO::writeConfiguration<GaugeStats>(U, config, tworow, precision32);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -74,7 +75,7 @@ public:
|
|||||||
|
|
||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
NerscIO::readRNGState(sRNG, pRNG, header, rng);
|
NerscIO::readRNGState(sRNG, pRNG, header, rng);
|
||||||
NerscIO::readConfiguration(U, header, config);
|
NerscIO::readConfiguration<GaugeStats>(U, header, config);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ public:
|
|||||||
virtual Prod* getPtr() = 0;
|
virtual Prod* getPtr() = 0;
|
||||||
|
|
||||||
// add a getReference?
|
// add a getReference?
|
||||||
|
virtual ~HMCModuleBase(){};
|
||||||
virtual void print_parameters(){}; // default to nothing
|
virtual void print_parameters(){}; // default to nothing
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(0)-fspin(2);
|
hspin(0)=fspin(0)-fspin(2);
|
||||||
hspin(1)=fspin(1)-fspin(3);
|
hspin(1)=fspin(1)-fspin(3);
|
||||||
}
|
}
|
||||||
@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
* 0 0 -1 0
|
* 0 0 -1 0
|
||||||
* 0 0 0 -1
|
* 0 0 0 -1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(0);
|
hspin(0)=fspin(0);
|
||||||
hspin(1)=fspin(1);
|
hspin(1)=fspin(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
hspin(0)=fspin(2);
|
hspin(0)=fspin(2);
|
||||||
hspin(1)=fspin(3);
|
hspin(1)=fspin(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
rfspin(0)=fspin(0);
|
rfspin(0)=fspin(0);
|
||||||
rfspin(1)=fspin(1);
|
rfspin(1)=fspin(1);
|
||||||
rfspin(2)=Zero();
|
rfspin(2)=Zero();
|
||||||
rfspin(3)=Zero();
|
rfspin(3)=Zero();
|
||||||
}
|
}
|
||||||
// template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
rfspin(0)=Zero();
|
rfspin(0)=Zero();
|
||||||
rfspin(1)=Zero();
|
rfspin(1)=Zero();
|
||||||
rfspin(2)=fspin(2);
|
rfspin(2)=fspin(2);
|
||||||
rfspin(3)=fspin(3);
|
rfspin(3)=fspin(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||||
|
{
|
||||||
|
const int hN = N>>1;
|
||||||
|
for(int s=0;s<hN;s++){
|
||||||
|
rfspin(s)=fspin(s);
|
||||||
|
rfspin(s+hN)=Zero();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
|
||||||
|
{
|
||||||
|
const int hN = N>>1;
|
||||||
|
for(int s=0;s<hN;s++){
|
||||||
|
rfspin(s)=Zero();
|
||||||
|
rfspin(s+hN)=fspin(s+hN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Reconstruction routines to move back again to four spin
|
// Reconstruction routines to move back again to four spin
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesMinusI(hspin(1));
|
fspin(2)=timesMinusI(hspin(1));
|
||||||
@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesI(hspin(1));
|
fspin(2)=timesI(hspin(1));
|
||||||
@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(1));
|
fspin(2)-=timesI(hspin(1));
|
||||||
@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(1));
|
fspin(2)+=timesI(hspin(1));
|
||||||
@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
|
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)= hspin(1);
|
fspin(2)= hspin(1);
|
||||||
@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=-hspin(1);
|
fspin(2)=-hspin(1);
|
||||||
@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=hspin(1);
|
fspin(2)+=hspin(1);
|
||||||
@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=hspin(1);
|
fspin(2)-=hspin(1);
|
||||||
@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=timesMinusI(hspin(0));
|
fspin(2)=timesMinusI(hspin(0));
|
||||||
@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)= timesI(hspin(0));
|
fspin(2)= timesI(hspin(0));
|
||||||
@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=timesI(hspin(0));
|
fspin(2)-=timesI(hspin(0));
|
||||||
@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=timesI(hspin(0));
|
fspin(2)+=timesI(hspin(0));
|
||||||
@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=hspin(0);
|
fspin(2)=hspin(0);
|
||||||
@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0);
|
fspin(0)=hspin(0);
|
||||||
fspin(1)=hspin(1);
|
fspin(1)=hspin(1);
|
||||||
fspin(2)=-hspin(0);
|
fspin(2)=-hspin(0);
|
||||||
@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)+=hspin(0);
|
fspin(2)+=hspin(0);
|
||||||
@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0);
|
fspin(0)+=hspin(0);
|
||||||
fspin(1)+=hspin(1);
|
fspin(1)+=hspin(1);
|
||||||
fspin(2)-=hspin(0);
|
fspin(2)-=hspin(0);
|
||||||
@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
*/
|
*/
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
|
fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
|
||||||
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
|
fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
|
||||||
fspin(2)=Zero();
|
fspin(2)=Zero();
|
||||||
@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)=Zero();
|
fspin(0)=Zero();
|
||||||
fspin(1)=Zero();
|
fspin(1)=Zero();
|
||||||
fspin(2)=hspin(0)+hspin(0);
|
fspin(2)=hspin(0)+hspin(0);
|
||||||
@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
|
|||||||
}
|
}
|
||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
|
||||||
fspin(0)+=hspin(0)+hspin(0);
|
fspin(0)+=hspin(0)+hspin(0);
|
||||||
fspin(1)+=hspin(1)+hspin(1);
|
fspin(1)+=hspin(1)+hspin(1);
|
||||||
}
|
}
|
||||||
@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
|
|||||||
//////////
|
//////////
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjXp(hspin._internal[i],fspin._internal[i]);
|
spProjXp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
|
|||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Xm
|
// Xm
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjXm(hspin._internal,fspin._internal);
|
spProjXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjXm(hspin._internal[i],fspin._internal[i]);
|
spProjXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconXm(hspin._internal,fspin._internal);
|
spReconXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconXm(hspin._internal[i],fspin._internal[i]);
|
spReconXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconXm(hspin._internal,fspin._internal);
|
accumReconXm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconXm(hspin._internal[i],fspin._internal[i]);
|
accumReconXm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Yp
|
// Yp
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjYp(hspin._internal,fspin._internal);
|
spProjYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjYp(hspin._internal[i],fspin._internal[i]);
|
spProjYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconYp(hspin._internal,fspin._internal);
|
spReconYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconYp(hspin._internal[i],fspin._internal[i]);
|
spReconYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconYp(hspin._internal,fspin._internal);
|
accumReconYp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconYp(hspin._internal[i],fspin._internal[i]);
|
accumReconYp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Ym
|
// Ym
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjYm(hspin._internal,fspin._internal);
|
spProjYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjYm(hspin._internal[i],fspin._internal[i]);
|
spProjYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconYm(hspin._internal,fspin._internal);
|
spReconYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconYm(hspin._internal[i],fspin._internal[i]);
|
spReconYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconYm(hspin._internal,fspin._internal);
|
accumReconYm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconYm(hspin._internal[i],fspin._internal[i]);
|
accumReconYm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjZp(hspin._internal,fspin._internal);
|
spProjZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjZp(hspin._internal[i],fspin._internal[i]);
|
spProjZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconZp(hspin._internal,fspin._internal);
|
spReconZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconZp(hspin._internal[i],fspin._internal[i]);
|
spReconZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconZp(hspin._internal,fspin._internal);
|
accumReconZp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconZp(hspin._internal[i],fspin._internal[i]);
|
accumReconZp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjZm(hspin._internal,fspin._internal);
|
spProjZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjZm(hspin._internal[i],fspin._internal[i]);
|
spProjZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconZm(hspin._internal,fspin._internal);
|
spReconZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconZm(hspin._internal[i],fspin._internal[i]);
|
spReconZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconZm(hspin._internal,fspin._internal);
|
accumReconZm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconZm(hspin._internal[i],fspin._internal[i]);
|
accumReconZm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
|
|||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjTp(hspin._internal,fspin._internal);
|
spProjTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjTp(hspin._internal[i],fspin._internal[i]);
|
spProjTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconTp(hspin._internal,fspin._internal);
|
spReconTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconTp(hspin._internal[i],fspin._internal[i]);
|
spReconTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconTp(hspin._internal,fspin._internal);
|
accumReconTp(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconTp(hspin._internal[i],fspin._internal[i]);
|
accumReconTp(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// Tm
|
// Tm
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProjTm(hspin._internal,fspin._internal);
|
spProjTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProjTm(hspin._internal[i],fspin._internal[i]);
|
spProjTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spReconTm(hspin._internal,fspin._internal);
|
spReconTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spReconTm(hspin._internal[i],fspin._internal[i]);
|
spReconTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumReconTm(hspin._internal,fspin._internal);
|
accumReconTm(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumReconTm(hspin._internal[i],fspin._internal[i]);
|
accumReconTm(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
////////
|
////////
|
||||||
// 5p
|
// 5p
|
||||||
////////
|
////////
|
||||||
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5p(hspin._internal,fspin._internal);
|
spProj5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spRecon5p(hspin._internal,fspin._internal);
|
spRecon5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spRecon5p(hspin._internal[i],fspin._internal[i]);
|
spRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumRecon5p(hspin._internal,fspin._internal);
|
accumRecon5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumRecon5p(hspin._internal[i],fspin._internal[i]);
|
accumRecon5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
|
|||||||
}
|
}
|
||||||
|
|
||||||
// four spinor projectors for chiral proj
|
// four spinor projectors for chiral proj
|
||||||
// template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5p(hspin._internal,fspin._internal);
|
spProj5p(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5p(hspin._internal[i],fspin._internal[i]);
|
spProj5p(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
|
|||||||
// 5m
|
// 5m
|
||||||
////////
|
////////
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
spProj5m(hspin._internal,fspin._internal);
|
spProj5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
|
|||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spRecon5m(hspin._internal,fspin._internal);
|
spRecon5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spRecon5m(hspin._internal[i],fspin._internal[i]);
|
spRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
accumRecon5m(hspin._internal,fspin._internal);
|
accumRecon5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
accumRecon5m(hspin._internal[i],fspin._internal[i]);
|
accumRecon5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
|
|||||||
|
|
||||||
|
|
||||||
// four spinor projectors for chiral proj
|
// four spinor projectors for chiral proj
|
||||||
// template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
||||||
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
|
|
||||||
spProj5m(hspin._internal,fspin._internal);
|
spProj5m(hspin._internal,fspin._internal);
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
|
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++) {
|
for(int i=0;i<N;i++) {
|
||||||
spProj5m(hspin._internal[i],fspin._internal[i]);
|
spProj5m(hspin._internal[i],fspin._internal[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
|
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
||||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
|
|
||||||
{
|
{
|
||||||
//typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
|
|
||||||
for(int i=0;i<N;i++){
|
for(int i=0;i<N;i++){
|
||||||
for(int j=0;j<N;j++){
|
for(int j=0;j<N;j++){
|
||||||
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
|
||||||
|
@ -53,6 +53,24 @@ namespace PeriodicBC {
|
|||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu)
|
||||||
|
{
|
||||||
|
return Cshift(adj(Link), mu, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu)
|
||||||
|
{
|
||||||
|
return Link;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
ShiftStaple(const Lattice<gauge> &Link, int mu)
|
||||||
|
{
|
||||||
|
return Cshift(Link, mu, 1);
|
||||||
|
}
|
||||||
|
|
||||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||||
auto CovShiftForward(const Lattice<gauge> &Link,
|
auto CovShiftForward(const Lattice<gauge> &Link,
|
||||||
int mu,
|
int mu,
|
||||||
@ -70,6 +88,7 @@ namespace PeriodicBC {
|
|||||||
return CovShiftBackward(Link,mu,arg);
|
return CovShiftBackward(Link,mu,arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -139,6 +158,38 @@ namespace ConjugateBC {
|
|||||||
// std::cout<<"Gparity::CovCshiftBackward mu="<<mu<<std::endl;
|
// std::cout<<"Gparity::CovCshiftBackward mu="<<mu<<std::endl;
|
||||||
return Cshift(tmp,mu,-1);// moves towards positive mu
|
return Cshift(tmp,mu,-1);// moves towards positive mu
|
||||||
}
|
}
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
CovShiftIdentityBackward(const Lattice<gauge> &Link, int mu) {
|
||||||
|
GridBase *grid = Link.Grid();
|
||||||
|
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger>> coor(grid);
|
||||||
|
LatticeCoordinate(coor, mu);
|
||||||
|
|
||||||
|
Lattice<gauge> tmp(grid);
|
||||||
|
tmp = adj(Link);
|
||||||
|
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||||
|
return Cshift(tmp, mu, -1); // moves towards positive mu
|
||||||
|
}
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
CovShiftIdentityForward(const Lattice<gauge> &Link, int mu) {
|
||||||
|
return Link;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class gauge> Lattice<gauge>
|
||||||
|
ShiftStaple(const Lattice<gauge> &Link, int mu)
|
||||||
|
{
|
||||||
|
GridBase *grid = Link.Grid();
|
||||||
|
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger>> coor(grid);
|
||||||
|
LatticeCoordinate(coor, mu);
|
||||||
|
|
||||||
|
Lattice<gauge> tmp(grid);
|
||||||
|
tmp = Cshift(Link, mu, 1);
|
||||||
|
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
template<class gauge,class Expr,typename std::enable_if<is_lattice_expr<Expr>::value,void>::type * = nullptr>
|
||||||
auto CovShiftForward(const Lattice<gauge> &Link,
|
auto CovShiftForward(const Lattice<gauge> &Link,
|
||||||
|
@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
|
|||||||
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
accelerator_for(sss,nloop,vobj::Nsimd(),{
|
||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
decltype(coalescedRead(y_v[ss+sp])) tmp;
|
decltype(coalescedRead(y_v[ss+sp])) tmp;
|
||||||
spProj5m(tmp,y_v(ss+sp));
|
spProj5m(tmp,y_v(ss+sp));
|
||||||
tmp = a*x_v(ss+s)+b*tmp;
|
tmp = a*x_v(ss+s)+b*tmp;
|
||||||
coalescedWrite(z_v[ss+s],tmp);
|
coalescedWrite(z_v[ss+s],tmp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
|||||||
z.Checkerboard() = x.Checkerboard();
|
z.Checkerboard() = x.Checkerboard();
|
||||||
conformable(x,z);
|
conformable(x,z);
|
||||||
int Ls = grid->_rdimensions[0];
|
int Ls = grid->_rdimensions[0];
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
|
||||||
autoView( x_v, x, AcceleratorRead);
|
autoView( x_v, x, AcceleratorRead);
|
||||||
autoView( z_v, z, AcceleratorWrite);
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
uint64_t nloop = grid->oSites()/Ls;
|
uint64_t nloop = grid->oSites()/Ls;
|
||||||
@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
|
|||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sp = Ls-1-s;
|
int sp = Ls-1-s;
|
||||||
coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
|
auto tmp = x_v(ss+s);
|
||||||
|
decltype(tmp) tmp_p;
|
||||||
|
decltype(tmp) tmp_m;
|
||||||
|
spProj5p(tmp_p,tmp);
|
||||||
|
spProj5m(tmp_m,tmp);
|
||||||
|
// Use of spProj5m, 5p captures the coarse space too
|
||||||
|
coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
|
|||||||
z.Checkerboard() = x.Checkerboard();
|
z.Checkerboard() = x.Checkerboard();
|
||||||
conformable(x, z);
|
conformable(x, z);
|
||||||
|
|
||||||
Gamma G5(Gamma::Algebra::Gamma5);
|
autoView( x_v, x, AcceleratorRead);
|
||||||
z = G5 * x;
|
autoView( z_v, z, AcceleratorWrite);
|
||||||
|
uint64_t nloop = grid->oSites();
|
||||||
|
accelerator_for(ss,nloop,vobj::Nsimd(),{
|
||||||
|
auto tmp = x_v(ss);
|
||||||
|
decltype(tmp) tmp_p;
|
||||||
|
decltype(tmp) tmp_m;
|
||||||
|
spProj5p(tmp_p,tmp);
|
||||||
|
spProj5m(tmp_m,tmp);
|
||||||
|
coalescedWrite(z_v[ss],tmp_p - tmp_m);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
template<class CComplex, int nbasis>
|
template<class CComplex, int nbasis>
|
||||||
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
|
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
|
||||||
{
|
{
|
||||||
@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -1,779 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: Fujitsu_A64FX_asm_double.h
|
|
||||||
|
|
||||||
Copyright (C) 2020
|
|
||||||
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#define LOAD_CHIMU(base) LOAD_CHIMU_INTERLEAVED_A64FXd(base)
|
|
||||||
#define PREFETCH_CHIMU_L1(A) PREFETCH_CHIMU_L1_INTERNAL_A64FXd(A)
|
|
||||||
#define PREFETCH_GAUGE_L1(A) PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A)
|
|
||||||
#define PREFETCH_CHIMU_L2(A) PREFETCH_CHIMU_L2_INTERNAL_A64FXd(A)
|
|
||||||
#define PREFETCH_GAUGE_L2(A) PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A)
|
|
||||||
#define PF_GAUGE(A)
|
|
||||||
#define PREFETCH_RESULT_L2_STORE(A) PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXd(A)
|
|
||||||
#define PREFETCH_RESULT_L1_STORE(A) PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXd(A)
|
|
||||||
#define PREFETCH1_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
|
||||||
#define PREFETCH_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
|
||||||
#define LOCK_GAUGE(A)
|
|
||||||
#define UNLOCK_GAUGE(A)
|
|
||||||
#define MASK_REGS DECLARATIONS_A64FXd
|
|
||||||
#define SAVE_RESULT(A,B) RESULT_A64FXd(A); PREFETCH_RESULT_L2_STORE(B)
|
|
||||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXd(Dir)
|
|
||||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXd
|
|
||||||
#define LOAD_CHI(base) LOAD_CHI_A64FXd(base)
|
|
||||||
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXd; RESULT_A64FXd(base)
|
|
||||||
#define XP_PROJ XP_PROJ_A64FXd
|
|
||||||
#define YP_PROJ YP_PROJ_A64FXd
|
|
||||||
#define ZP_PROJ ZP_PROJ_A64FXd
|
|
||||||
#define TP_PROJ TP_PROJ_A64FXd
|
|
||||||
#define XM_PROJ XM_PROJ_A64FXd
|
|
||||||
#define YM_PROJ YM_PROJ_A64FXd
|
|
||||||
#define ZM_PROJ ZM_PROJ_A64FXd
|
|
||||||
#define TM_PROJ TM_PROJ_A64FXd
|
|
||||||
#define XP_RECON XP_RECON_A64FXd
|
|
||||||
#define XM_RECON XM_RECON_A64FXd
|
|
||||||
#define XM_RECON_ACCUM XM_RECON_ACCUM_A64FXd
|
|
||||||
#define YM_RECON_ACCUM YM_RECON_ACCUM_A64FXd
|
|
||||||
#define ZM_RECON_ACCUM ZM_RECON_ACCUM_A64FXd
|
|
||||||
#define TM_RECON_ACCUM TM_RECON_ACCUM_A64FXd
|
|
||||||
#define XP_RECON_ACCUM XP_RECON_ACCUM_A64FXd
|
|
||||||
#define YP_RECON_ACCUM YP_RECON_ACCUM_A64FXd
|
|
||||||
#define ZP_RECON_ACCUM ZP_RECON_ACCUM_A64FXd
|
|
||||||
#define TP_RECON_ACCUM TP_RECON_ACCUM_A64FXd
|
|
||||||
#define PERMUTE_DIR0 0
|
|
||||||
#define PERMUTE_DIR1 1
|
|
||||||
#define PERMUTE_DIR2 2
|
|
||||||
#define PERMUTE_DIR3 3
|
|
||||||
#define PERMUTE PERMUTE_A64FXd;
|
|
||||||
#define LOAD_TABLE(Dir) if (Dir == 0) { LOAD_TABLE0; } else if (Dir == 1) { LOAD_TABLE1; } else if (Dir == 2) { LOAD_TABLE2; }
|
|
||||||
#define MAYBEPERM(Dir,perm) if (Dir != 3) { if (perm) { PERMUTE; } }
|
|
||||||
// DECLARATIONS
|
|
||||||
#define DECLARATIONS_A64FXd \
|
|
||||||
const uint64_t lut[4][8] = { \
|
|
||||||
{4, 5, 6, 7, 0, 1, 2, 3}, \
|
|
||||||
{2, 3, 0, 1, 6, 7, 4, 5}, \
|
|
||||||
{1, 0, 3, 2, 5, 4, 7, 6}, \
|
|
||||||
{0, 1, 2, 4, 5, 6, 7, 8} };\
|
|
||||||
asm ( \
|
|
||||||
"fmov z31.d , 0 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// RESULT
|
|
||||||
#define RESULT_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"str z0, [%[storeptr], -6, mul vl] \n\t" \
|
|
||||||
"str z1, [%[storeptr], -5, mul vl] \n\t" \
|
|
||||||
"str z2, [%[storeptr], -4, mul vl] \n\t" \
|
|
||||||
"str z3, [%[storeptr], -3, mul vl] \n\t" \
|
|
||||||
"str z4, [%[storeptr], -2, mul vl] \n\t" \
|
|
||||||
"str z5, [%[storeptr], -1, mul vl] \n\t" \
|
|
||||||
"str z6, [%[storeptr], 0, mul vl] \n\t" \
|
|
||||||
"str z7, [%[storeptr], 1, mul vl] \n\t" \
|
|
||||||
"str z8, [%[storeptr], 2, mul vl] \n\t" \
|
|
||||||
"str z9, [%[storeptr], 3, mul vl] \n\t" \
|
|
||||||
"str z10, [%[storeptr], 4, mul vl] \n\t" \
|
|
||||||
"str z11, [%[storeptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [storeptr] "r" (base + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sUn](A)); uint64_t baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 12, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 16, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 20, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 24, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 28, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHI
|
|
||||||
#define LOAD_CHI_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ldr z12, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU
|
|
||||||
#define LOAD_CHIMU_INTERLEAVED_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU_0213
|
|
||||||
#define LOAD_CHIMU_0213_A64FXd \
|
|
||||||
{ \
|
|
||||||
const SiteSpinor & ref(in[offset]); \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (&ref[2][0]) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU_0312
|
|
||||||
#define LOAD_CHIMU_0312_A64FXd \
|
|
||||||
{ \
|
|
||||||
const SiteSpinor & ref(in[offset]); \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (&ref[2][0]) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_TABLE0
|
|
||||||
#define LOAD_TABLE0 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (0) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE1
|
|
||||||
#define LOAD_TABLE1 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (1) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE2
|
|
||||||
#define LOAD_TABLE2 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (2) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE3
|
|
||||||
#define LOAD_TABLE3 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (3) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// PERMUTE
|
|
||||||
#define PERMUTE_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"tbl z12.d, { z12.d }, z30.d \n\t" \
|
|
||||||
"tbl z13.d, { z13.d }, z30.d \n\t" \
|
|
||||||
"tbl z14.d, { z14.d }, z30.d \n\t" \
|
|
||||||
"tbl z15.d, { z15.d }, z30.d \n\t" \
|
|
||||||
"tbl z16.d, { z16.d }, z30.d \n\t" \
|
|
||||||
"tbl z17.d, { z17.d }, z30.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_GAUGE
|
|
||||||
#define LOAD_GAUGE \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"ldr z24, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z27, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z28, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z29, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// MULT_2SPIN
|
|
||||||
#define MULT_2SPIN_1_A64FXd(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
asm ( \
|
|
||||||
"ldr z24, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z27, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z28, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z29, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"movprfx z18.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z18.d, p5/m, z24.d, z12.d, 0 \n\t" \
|
|
||||||
"movprfx z21.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z24.d, z15.d, 0 \n\t" \
|
|
||||||
"movprfx z19.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z25.d, z12.d, 0 \n\t" \
|
|
||||||
"movprfx z22.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z25.d, z15.d, 0 \n\t" \
|
|
||||||
"movprfx z20.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z26.d, z12.d, 0 \n\t" \
|
|
||||||
"movprfx z23.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z26.d, z15.d, 0 \n\t" \
|
|
||||||
"fcmla z18.d, p5/m, z24.d, z12.d, 90 \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z24.d, z15.d, 90 \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z25.d, z12.d, 90 \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z25.d, z15.d, 90 \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z26.d, z12.d, 90 \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z26.d, z15.d, 90 \n\t" \
|
|
||||||
"ldr z24, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// MULT_2SPIN_BACKEND
|
|
||||||
#define MULT_2SPIN_2_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcmla z18.d, p5/m, z27.d, z13.d, 0 \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z27.d, z16.d, 0 \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z28.d, z13.d, 0 \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z28.d, z16.d, 0 \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z29.d, z13.d, 0 \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z29.d, z16.d, 0 \n\t" \
|
|
||||||
"fcmla z18.d, p5/m, z27.d, z13.d, 90 \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z27.d, z16.d, 90 \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z28.d, z13.d, 90 \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z28.d, z16.d, 90 \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z29.d, z13.d, 90 \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z29.d, z16.d, 90 \n\t" \
|
|
||||||
"fcmla z18.d, p5/m, z24.d, z14.d, 0 \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z24.d, z17.d, 0 \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z25.d, z14.d, 0 \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z25.d, z17.d, 0 \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z26.d, z14.d, 0 \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z26.d, z17.d, 0 \n\t" \
|
|
||||||
"fcmla z18.d, p5/m, z24.d, z14.d, 90 \n\t" \
|
|
||||||
"fcmla z21.d, p5/m, z24.d, z17.d, 90 \n\t" \
|
|
||||||
"fcmla z19.d, p5/m, z25.d, z14.d, 90 \n\t" \
|
|
||||||
"fcmla z22.d, p5/m, z25.d, z17.d, 90 \n\t" \
|
|
||||||
"fcmla z20.d, p5/m, z26.d, z14.d, 90 \n\t" \
|
|
||||||
"fcmla z23.d, p5/m, z26.d, z17.d, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XP_PROJ
|
|
||||||
#define XP_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.d, p5/m, z12.d, z21.d, 90 \n\t" \
|
|
||||||
"fcadd z13.d, p5/m, z13.d, z22.d, 90 \n\t" \
|
|
||||||
"fcadd z14.d, p5/m, z14.d, z23.d, 90 \n\t" \
|
|
||||||
"fcadd z15.d, p5/m, z15.d, z18.d, 90 \n\t" \
|
|
||||||
"fcadd z16.d, p5/m, z16.d, z19.d, 90 \n\t" \
|
|
||||||
"fcadd z17.d, p5/m, z17.d, z20.d, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XP_RECON
|
|
||||||
#define XP_RECON_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"movprfx z6.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z21.d, 270 \n\t" \
|
|
||||||
"movprfx z7.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z22.d, 270 \n\t" \
|
|
||||||
"movprfx z8.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z23.d, 270 \n\t" \
|
|
||||||
"movprfx z9.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z18.d, 270 \n\t" \
|
|
||||||
"movprfx z10.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z19.d, 270 \n\t" \
|
|
||||||
"movprfx z11.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z20.d, 270 \n\t" \
|
|
||||||
"mov z0.d, p5/m, z18.d \n\t" \
|
|
||||||
"mov z1.d, p5/m, z19.d \n\t" \
|
|
||||||
"mov z2.d, p5/m, z20.d \n\t" \
|
|
||||||
"mov z3.d, p5/m, z21.d \n\t" \
|
|
||||||
"mov z4.d, p5/m, z22.d \n\t" \
|
|
||||||
"mov z5.d, p5/m, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// XP_RECON_ACCUM
|
|
||||||
#define XP_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z18.d, 270 \n\t" \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z19.d, 270 \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z20.d, 270 \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z21.d, 270 \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z22.d, 270 \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z23.d, 270 \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YP_PROJ
|
|
||||||
#define YP_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fsub z12.d, p5/m, z12.d, z21.d \n\t" \
|
|
||||||
"fsub z13.d, p5/m, z13.d, z22.d \n\t" \
|
|
||||||
"fsub z14.d, p5/m, z14.d, z23.d \n\t" \
|
|
||||||
"fadd z15.d, p5/m, z15.d, z18.d \n\t" \
|
|
||||||
"fadd z16.d, p5/m, z16.d, z19.d \n\t" \
|
|
||||||
"fadd z17.d, p5/m, z17.d, z20.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ZP_PROJ
|
|
||||||
#define ZP_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.d, p5/m, z12.d, z18.d, 90 \n\t" \
|
|
||||||
"fcadd z13.d, p5/m, z13.d, z19.d, 90 \n\t" \
|
|
||||||
"fcadd z14.d, p5/m, z14.d, z20.d, 90 \n\t" \
|
|
||||||
"fcadd z15.d, p5/m, z15.d, z21.d, 270 \n\t" \
|
|
||||||
"fcadd z16.d, p5/m, z16.d, z22.d, 270 \n\t" \
|
|
||||||
"fcadd z17.d, p5/m, z17.d, z23.d, 270 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// TP_PROJ
|
|
||||||
#define TP_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fadd z12.d, p5/m, z12.d, z18.d \n\t" \
|
|
||||||
"fadd z13.d, p5/m, z13.d, z19.d \n\t" \
|
|
||||||
"fadd z14.d, p5/m, z14.d, z20.d \n\t" \
|
|
||||||
"fadd z15.d, p5/m, z15.d, z21.d \n\t" \
|
|
||||||
"fadd z16.d, p5/m, z16.d, z22.d \n\t" \
|
|
||||||
"fadd z17.d, p5/m, z17.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_PROJ
|
|
||||||
#define XM_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.d, p5/m, z12.d, z21.d, 270 \n\t" \
|
|
||||||
"fcadd z13.d, p5/m, z13.d, z22.d, 270 \n\t" \
|
|
||||||
"fcadd z14.d, p5/m, z14.d, z23.d, 270 \n\t" \
|
|
||||||
"fcadd z15.d, p5/m, z15.d, z18.d, 270 \n\t" \
|
|
||||||
"fcadd z16.d, p5/m, z16.d, z19.d, 270 \n\t" \
|
|
||||||
"fcadd z17.d, p5/m, z17.d, z20.d, 270 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_RECON
|
|
||||||
#define XM_RECON_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"movprfx z6.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z21.d, 90 \n\t" \
|
|
||||||
"movprfx z7.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z22.d, 90 \n\t" \
|
|
||||||
"movprfx z8.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z23.d, 90 \n\t" \
|
|
||||||
"movprfx z9.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z18.d, 90 \n\t" \
|
|
||||||
"movprfx z10.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z19.d, 90 \n\t" \
|
|
||||||
"movprfx z11.d, p5/m, z31.d \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z20.d, 90 \n\t" \
|
|
||||||
"mov z0.d, p5/m, z18.d \n\t" \
|
|
||||||
"mov z1.d, p5/m, z19.d \n\t" \
|
|
||||||
"mov z2.d, p5/m, z20.d \n\t" \
|
|
||||||
"mov z3.d, p5/m, z21.d \n\t" \
|
|
||||||
"mov z4.d, p5/m, z22.d \n\t" \
|
|
||||||
"mov z5.d, p5/m, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YM_PROJ
|
|
||||||
#define YM_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fadd z12.d, p5/m, z12.d, z21.d \n\t" \
|
|
||||||
"fadd z13.d, p5/m, z13.d, z22.d \n\t" \
|
|
||||||
"fadd z14.d, p5/m, z14.d, z23.d \n\t" \
|
|
||||||
"fsub z15.d, p5/m, z15.d, z18.d \n\t" \
|
|
||||||
"fsub z16.d, p5/m, z16.d, z19.d \n\t" \
|
|
||||||
"fsub z17.d, p5/m, z17.d, z20.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ZM_PROJ
|
|
||||||
#define ZM_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.d, p5/m, z12.d, z18.d, 270 \n\t" \
|
|
||||||
"fcadd z13.d, p5/m, z13.d, z19.d, 270 \n\t" \
|
|
||||||
"fcadd z14.d, p5/m, z14.d, z20.d, 270 \n\t" \
|
|
||||||
"fcadd z15.d, p5/m, z15.d, z21.d, 90 \n\t" \
|
|
||||||
"fcadd z16.d, p5/m, z16.d, z22.d, 90 \n\t" \
|
|
||||||
"fcadd z17.d, p5/m, z17.d, z23.d, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// TM_PROJ
|
|
||||||
#define TM_PROJ_A64FXd \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"fsub z12.d, p5/m, z12.d, z18.d \n\t" \
|
|
||||||
"fsub z13.d, p5/m, z13.d, z19.d \n\t" \
|
|
||||||
"fsub z14.d, p5/m, z14.d, z20.d \n\t" \
|
|
||||||
"fsub z15.d, p5/m, z15.d, z21.d \n\t" \
|
|
||||||
"fsub z16.d, p5/m, z16.d, z22.d \n\t" \
|
|
||||||
"fsub z17.d, p5/m, z17.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_RECON_ACCUM
|
|
||||||
#define XM_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z18.d, 90 \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z19.d, 90 \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z20.d, 90 \n\t" \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z21.d, 90 \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z22.d, 90 \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z23.d, 90 \n\t" \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YP_RECON_ACCUM
|
|
||||||
#define YP_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fsub z9.d, p5/m, z9.d, z18.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fsub z10.d, p5/m, z10.d, z19.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fsub z11.d, p5/m, z11.d, z20.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fadd z6.d, p5/m, z6.d, z21.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fadd z7.d, p5/m, z7.d, z22.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
"fadd z8.d, p5/m, z8.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YM_RECON_ACCUM
|
|
||||||
#define YM_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fadd z9.d, p5/m, z9.d, z18.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fadd z10.d, p5/m, z10.d, z19.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fadd z11.d, p5/m, z11.d, z20.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fsub z6.d, p5/m, z6.d, z21.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fsub z7.d, p5/m, z7.d, z22.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
"fsub z8.d, p5/m, z8.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZP_RECON_ACCUM
|
|
||||||
#define ZP_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z18.d, 270 \n\t" \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z19.d, 270 \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z20.d, 270 \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z21.d, 90 \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z22.d, 90 \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z23.d, 90 \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZM_RECON_ACCUM
|
|
||||||
#define ZM_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z6.d, p5/m, z6.d, z18.d, 90 \n\t" \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fcadd z7.d, p5/m, z7.d, z19.d, 90 \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fcadd z8.d, p5/m, z8.d, z20.d, 90 \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fcadd z9.d, p5/m, z9.d, z21.d, 270 \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fcadd z10.d, p5/m, z10.d, z22.d, 270 \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fcadd z11.d, p5/m, z11.d, z23.d, 270 \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// TP_RECON_ACCUM
|
|
||||||
#define TP_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fadd z6.d, p5/m, z6.d, z18.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fadd z7.d, p5/m, z7.d, z19.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fadd z8.d, p5/m, z8.d, z20.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fadd z9.d, p5/m, z9.d, z21.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fadd z10.d, p5/m, z10.d, z22.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
"fadd z11.d, p5/m, z11.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// TM_RECON_ACCUM
|
|
||||||
#define TM_RECON_ACCUM_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z18.d \n\t" \
|
|
||||||
"fsub z6.d, p5/m, z6.d, z18.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z19.d \n\t" \
|
|
||||||
"fsub z7.d, p5/m, z7.d, z19.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z20.d \n\t" \
|
|
||||||
"fsub z8.d, p5/m, z8.d, z20.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z21.d \n\t" \
|
|
||||||
"fsub z9.d, p5/m, z9.d, z21.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z22.d \n\t" \
|
|
||||||
"fsub z10.d, p5/m, z10.d, z22.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z23.d \n\t" \
|
|
||||||
"fsub z11.d, p5/m, z11.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZERO_PSI
|
|
||||||
#define ZERO_PSI_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.d \n\t" \
|
|
||||||
"fmov z0.d , 0 \n\t" \
|
|
||||||
"fmov z1.d , 0 \n\t" \
|
|
||||||
"fmov z2.d , 0 \n\t" \
|
|
||||||
"fmov z3.d , 0 \n\t" \
|
|
||||||
"fmov z4.d , 0 \n\t" \
|
|
||||||
"fmov z5.d , 0 \n\t" \
|
|
||||||
"fmov z6.d , 0 \n\t" \
|
|
||||||
"fmov z7.d , 0 \n\t" \
|
|
||||||
"fmov z8.d , 0 \n\t" \
|
|
||||||
"fmov z9.d , 0 \n\t" \
|
|
||||||
"fmov z10.d , 0 \n\t" \
|
|
||||||
"fmov z11.d , 0 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
|
||||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
|
||||||
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXd(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ADD_RESULT_INTERNAL
|
|
||||||
#define ADD_RESULT_INTERNAL_A64FXd \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.d, p5/m, z0.d, z12.d \n\t" \
|
|
||||||
"fadd z1.d, p5/m, z1.d, z13.d \n\t" \
|
|
||||||
"fadd z2.d, p5/m, z2.d, z14.d \n\t" \
|
|
||||||
"fadd z3.d, p5/m, z3.d, z15.d \n\t" \
|
|
||||||
"fadd z4.d, p5/m, z4.d, z16.d \n\t" \
|
|
||||||
"fadd z5.d, p5/m, z5.d, z17.d \n\t" \
|
|
||||||
"fadd z6.d, p5/m, z6.d, z18.d \n\t" \
|
|
||||||
"fadd z7.d, p5/m, z7.d, z19.d \n\t" \
|
|
||||||
"fadd z8.d, p5/m, z8.d, z20.d \n\t" \
|
|
||||||
"fadd z9.d, p5/m, z9.d, z21.d \n\t" \
|
|
||||||
"fadd z10.d, p5/m, z10.d, z22.d \n\t" \
|
|
||||||
"fadd z11.d, p5/m, z11.d, z23.d \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
@ -1,779 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: Fujitsu_A64FX_asm_single.h
|
|
||||||
|
|
||||||
Copyright (C) 2020
|
|
||||||
|
|
||||||
Author: Nils Meyer <nils.meyer@ur.de>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#define LOAD_CHIMU(base) LOAD_CHIMU_INTERLEAVED_A64FXf(base)
|
|
||||||
#define PREFETCH_CHIMU_L1(A) PREFETCH_CHIMU_L1_INTERNAL_A64FXf(A)
|
|
||||||
#define PREFETCH_GAUGE_L1(A) PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A)
|
|
||||||
#define PREFETCH_CHIMU_L2(A) PREFETCH_CHIMU_L2_INTERNAL_A64FXf(A)
|
|
||||||
#define PREFETCH_GAUGE_L2(A) PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A)
|
|
||||||
#define PF_GAUGE(A)
|
|
||||||
#define PREFETCH_RESULT_L2_STORE(A) PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(A)
|
|
||||||
#define PREFETCH_RESULT_L1_STORE(A) PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXf(A)
|
|
||||||
#define PREFETCH1_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
|
||||||
#define PREFETCH_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
|
||||||
#define LOCK_GAUGE(A)
|
|
||||||
#define UNLOCK_GAUGE(A)
|
|
||||||
#define MASK_REGS DECLARATIONS_A64FXf
|
|
||||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A); PREFETCH_RESULT_L2_STORE(B)
|
|
||||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXf(Dir)
|
|
||||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXf
|
|
||||||
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
|
|
||||||
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXf; RESULT_A64FXf(base)
|
|
||||||
#define XP_PROJ XP_PROJ_A64FXf
|
|
||||||
#define YP_PROJ YP_PROJ_A64FXf
|
|
||||||
#define ZP_PROJ ZP_PROJ_A64FXf
|
|
||||||
#define TP_PROJ TP_PROJ_A64FXf
|
|
||||||
#define XM_PROJ XM_PROJ_A64FXf
|
|
||||||
#define YM_PROJ YM_PROJ_A64FXf
|
|
||||||
#define ZM_PROJ ZM_PROJ_A64FXf
|
|
||||||
#define TM_PROJ TM_PROJ_A64FXf
|
|
||||||
#define XP_RECON XP_RECON_A64FXf
|
|
||||||
#define XM_RECON XM_RECON_A64FXf
|
|
||||||
#define XM_RECON_ACCUM XM_RECON_ACCUM_A64FXf
|
|
||||||
#define YM_RECON_ACCUM YM_RECON_ACCUM_A64FXf
|
|
||||||
#define ZM_RECON_ACCUM ZM_RECON_ACCUM_A64FXf
|
|
||||||
#define TM_RECON_ACCUM TM_RECON_ACCUM_A64FXf
|
|
||||||
#define XP_RECON_ACCUM XP_RECON_ACCUM_A64FXf
|
|
||||||
#define YP_RECON_ACCUM YP_RECON_ACCUM_A64FXf
|
|
||||||
#define ZP_RECON_ACCUM ZP_RECON_ACCUM_A64FXf
|
|
||||||
#define TP_RECON_ACCUM TP_RECON_ACCUM_A64FXf
|
|
||||||
#define PERMUTE_DIR0 0
|
|
||||||
#define PERMUTE_DIR1 1
|
|
||||||
#define PERMUTE_DIR2 2
|
|
||||||
#define PERMUTE_DIR3 3
|
|
||||||
#define PERMUTE PERMUTE_A64FXf;
|
|
||||||
#define LOAD_TABLE(Dir) if (Dir == 0) { LOAD_TABLE0; } else if (Dir == 1) { LOAD_TABLE1 } else if (Dir == 2) { LOAD_TABLE2; } else if (Dir == 3) { LOAD_TABLE3; }
|
|
||||||
#define MAYBEPERM(A,perm) if (perm) { PERMUTE; }
|
|
||||||
// DECLARATIONS
|
|
||||||
#define DECLARATIONS_A64FXf \
|
|
||||||
const uint32_t lut[4][16] = { \
|
|
||||||
{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, \
|
|
||||||
{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, \
|
|
||||||
{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, \
|
|
||||||
{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14} }; \
|
|
||||||
asm ( \
|
|
||||||
"fmov z31.s , 0 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// RESULT
|
|
||||||
#define RESULT_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"str z0, [%[storeptr], -6, mul vl] \n\t" \
|
|
||||||
"str z1, [%[storeptr], -5, mul vl] \n\t" \
|
|
||||||
"str z2, [%[storeptr], -4, mul vl] \n\t" \
|
|
||||||
"str z3, [%[storeptr], -3, mul vl] \n\t" \
|
|
||||||
"str z4, [%[storeptr], -2, mul vl] \n\t" \
|
|
||||||
"str z5, [%[storeptr], -1, mul vl] \n\t" \
|
|
||||||
"str z6, [%[storeptr], 0, mul vl] \n\t" \
|
|
||||||
"str z7, [%[storeptr], 1, mul vl] \n\t" \
|
|
||||||
"str z8, [%[storeptr], 2, mul vl] \n\t" \
|
|
||||||
"str z9, [%[storeptr], 3, mul vl] \n\t" \
|
|
||||||
"str z10, [%[storeptr], 4, mul vl] \n\t" \
|
|
||||||
"str z11, [%[storeptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [storeptr] "r" (base + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sUn](A)); uint64_t baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 12, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 16, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 20, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 24, mul vl] \n\t" \
|
|
||||||
"prfd PLDL2STRM, p5, [%[fetchptr], 28, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
asm ( \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PLDL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHI
|
|
||||||
#define LOAD_CHI_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ldr z12, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU
|
|
||||||
#define LOAD_CHIMU_INTERLEAVED_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU_0213
|
|
||||||
#define LOAD_CHIMU_0213_A64FXf \
|
|
||||||
{ \
|
|
||||||
const SiteSpinor & ref(in[offset]); \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (&ref[2][0]) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_CHIMU_0312
|
|
||||||
#define LOAD_CHIMU_0312_A64FXf \
|
|
||||||
{ \
|
|
||||||
const SiteSpinor & ref(in[offset]); \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"ldr z12, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z21, [%[fetchptr], 3, mul vl] \n\t" \
|
|
||||||
"ldr z13, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z22, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"ldr z14, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z23, [%[fetchptr], 5, mul vl] \n\t" \
|
|
||||||
"ldr z15, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z18, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z16, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z19, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"ldr z17, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z20, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (&ref[2][0]) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// LOAD_TABLE0
|
|
||||||
#define LOAD_TABLE0 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (0) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE1
|
|
||||||
#define LOAD_TABLE1 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (1) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE2
|
|
||||||
#define LOAD_TABLE2 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (2) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_TABLE3
|
|
||||||
#define LOAD_TABLE3 \
|
|
||||||
asm ( \
|
|
||||||
"ldr z30, [%[tableptr], %[index], mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [tableptr] "r" (&lut[0]),[index] "i" (3) \
|
|
||||||
: "memory","cc","p5","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// PERMUTE
|
|
||||||
#define PERMUTE_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"tbl z12.s, { z12.s }, z30.s \n\t" \
|
|
||||||
"tbl z13.s, { z13.s }, z30.s \n\t" \
|
|
||||||
"tbl z14.s, { z14.s }, z30.s \n\t" \
|
|
||||||
"tbl z15.s, { z15.s }, z30.s \n\t" \
|
|
||||||
"tbl z16.s, { z16.s }, z30.s \n\t" \
|
|
||||||
"tbl z17.s, { z17.s }, z30.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// LOAD_GAUGE
|
|
||||||
#define LOAD_GAUGE \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"ldr z24, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z27, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z28, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z29, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// MULT_2SPIN
|
|
||||||
#define MULT_2SPIN_1_A64FXf(A) \
|
|
||||||
{ \
|
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
asm ( \
|
|
||||||
"ldr z24, [%[fetchptr], -6, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -3, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"ldr z27, [%[fetchptr], -5, mul vl] \n\t" \
|
|
||||||
"ldr z28, [%[fetchptr], -2, mul vl] \n\t" \
|
|
||||||
"ldr z29, [%[fetchptr], 1, mul vl] \n\t" \
|
|
||||||
"movprfx z18.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z18.s, p5/m, z24.s, z12.s, 0 \n\t" \
|
|
||||||
"movprfx z21.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z24.s, z15.s, 0 \n\t" \
|
|
||||||
"movprfx z19.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z25.s, z12.s, 0 \n\t" \
|
|
||||||
"movprfx z22.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z25.s, z15.s, 0 \n\t" \
|
|
||||||
"movprfx z20.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z26.s, z12.s, 0 \n\t" \
|
|
||||||
"movprfx z23.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z26.s, z15.s, 0 \n\t" \
|
|
||||||
"fcmla z18.s, p5/m, z24.s, z12.s, 90 \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z24.s, z15.s, 90 \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z25.s, z12.s, 90 \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z25.s, z15.s, 90 \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z26.s, z12.s, 90 \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z26.s, z15.s, 90 \n\t" \
|
|
||||||
"ldr z24, [%[fetchptr], -4, mul vl] \n\t" \
|
|
||||||
"ldr z25, [%[fetchptr], -1, mul vl] \n\t" \
|
|
||||||
"ldr z26, [%[fetchptr], 2, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (baseU + 2 * 3 * 64) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// MULT_2SPIN_BACKEND
|
|
||||||
#define MULT_2SPIN_2_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcmla z18.s, p5/m, z27.s, z13.s, 0 \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z27.s, z16.s, 0 \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z28.s, z13.s, 0 \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z28.s, z16.s, 0 \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z29.s, z13.s, 0 \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z29.s, z16.s, 0 \n\t" \
|
|
||||||
"fcmla z18.s, p5/m, z27.s, z13.s, 90 \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z27.s, z16.s, 90 \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z28.s, z13.s, 90 \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z28.s, z16.s, 90 \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z29.s, z13.s, 90 \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z29.s, z16.s, 90 \n\t" \
|
|
||||||
"fcmla z18.s, p5/m, z24.s, z14.s, 0 \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z24.s, z17.s, 0 \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z25.s, z14.s, 0 \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z25.s, z17.s, 0 \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z26.s, z14.s, 0 \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z26.s, z17.s, 0 \n\t" \
|
|
||||||
"fcmla z18.s, p5/m, z24.s, z14.s, 90 \n\t" \
|
|
||||||
"fcmla z21.s, p5/m, z24.s, z17.s, 90 \n\t" \
|
|
||||||
"fcmla z19.s, p5/m, z25.s, z14.s, 90 \n\t" \
|
|
||||||
"fcmla z22.s, p5/m, z25.s, z17.s, 90 \n\t" \
|
|
||||||
"fcmla z20.s, p5/m, z26.s, z14.s, 90 \n\t" \
|
|
||||||
"fcmla z23.s, p5/m, z26.s, z17.s, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XP_PROJ
|
|
||||||
#define XP_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.s, p5/m, z12.s, z21.s, 90 \n\t" \
|
|
||||||
"fcadd z13.s, p5/m, z13.s, z22.s, 90 \n\t" \
|
|
||||||
"fcadd z14.s, p5/m, z14.s, z23.s, 90 \n\t" \
|
|
||||||
"fcadd z15.s, p5/m, z15.s, z18.s, 90 \n\t" \
|
|
||||||
"fcadd z16.s, p5/m, z16.s, z19.s, 90 \n\t" \
|
|
||||||
"fcadd z17.s, p5/m, z17.s, z20.s, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XP_RECON
|
|
||||||
#define XP_RECON_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"movprfx z6.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z21.s, 270 \n\t" \
|
|
||||||
"movprfx z7.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z22.s, 270 \n\t" \
|
|
||||||
"movprfx z8.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z23.s, 270 \n\t" \
|
|
||||||
"movprfx z9.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z18.s, 270 \n\t" \
|
|
||||||
"movprfx z10.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z19.s, 270 \n\t" \
|
|
||||||
"movprfx z11.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z20.s, 270 \n\t" \
|
|
||||||
"mov z0.s, p5/m, z18.s \n\t" \
|
|
||||||
"mov z1.s, p5/m, z19.s \n\t" \
|
|
||||||
"mov z2.s, p5/m, z20.s \n\t" \
|
|
||||||
"mov z3.s, p5/m, z21.s \n\t" \
|
|
||||||
"mov z4.s, p5/m, z22.s \n\t" \
|
|
||||||
"mov z5.s, p5/m, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// XP_RECON_ACCUM
|
|
||||||
#define XP_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z18.s, 270 \n\t" \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z19.s, 270 \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z20.s, 270 \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z21.s, 270 \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z22.s, 270 \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z23.s, 270 \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YP_PROJ
|
|
||||||
#define YP_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fsub z12.s, p5/m, z12.s, z21.s \n\t" \
|
|
||||||
"fsub z13.s, p5/m, z13.s, z22.s \n\t" \
|
|
||||||
"fsub z14.s, p5/m, z14.s, z23.s \n\t" \
|
|
||||||
"fadd z15.s, p5/m, z15.s, z18.s \n\t" \
|
|
||||||
"fadd z16.s, p5/m, z16.s, z19.s \n\t" \
|
|
||||||
"fadd z17.s, p5/m, z17.s, z20.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ZP_PROJ
|
|
||||||
#define ZP_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.s, p5/m, z12.s, z18.s, 90 \n\t" \
|
|
||||||
"fcadd z13.s, p5/m, z13.s, z19.s, 90 \n\t" \
|
|
||||||
"fcadd z14.s, p5/m, z14.s, z20.s, 90 \n\t" \
|
|
||||||
"fcadd z15.s, p5/m, z15.s, z21.s, 270 \n\t" \
|
|
||||||
"fcadd z16.s, p5/m, z16.s, z22.s, 270 \n\t" \
|
|
||||||
"fcadd z17.s, p5/m, z17.s, z23.s, 270 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// TP_PROJ
|
|
||||||
#define TP_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fadd z12.s, p5/m, z12.s, z18.s \n\t" \
|
|
||||||
"fadd z13.s, p5/m, z13.s, z19.s \n\t" \
|
|
||||||
"fadd z14.s, p5/m, z14.s, z20.s \n\t" \
|
|
||||||
"fadd z15.s, p5/m, z15.s, z21.s \n\t" \
|
|
||||||
"fadd z16.s, p5/m, z16.s, z22.s \n\t" \
|
|
||||||
"fadd z17.s, p5/m, z17.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_PROJ
|
|
||||||
#define XM_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.s, p5/m, z12.s, z21.s, 270 \n\t" \
|
|
||||||
"fcadd z13.s, p5/m, z13.s, z22.s, 270 \n\t" \
|
|
||||||
"fcadd z14.s, p5/m, z14.s, z23.s, 270 \n\t" \
|
|
||||||
"fcadd z15.s, p5/m, z15.s, z18.s, 270 \n\t" \
|
|
||||||
"fcadd z16.s, p5/m, z16.s, z19.s, 270 \n\t" \
|
|
||||||
"fcadd z17.s, p5/m, z17.s, z20.s, 270 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_RECON
|
|
||||||
#define XM_RECON_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"movprfx z6.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z21.s, 90 \n\t" \
|
|
||||||
"movprfx z7.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z22.s, 90 \n\t" \
|
|
||||||
"movprfx z8.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z23.s, 90 \n\t" \
|
|
||||||
"movprfx z9.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z18.s, 90 \n\t" \
|
|
||||||
"movprfx z10.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z19.s, 90 \n\t" \
|
|
||||||
"movprfx z11.s, p5/m, z31.s \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z20.s, 90 \n\t" \
|
|
||||||
"mov z0.s, p5/m, z18.s \n\t" \
|
|
||||||
"mov z1.s, p5/m, z19.s \n\t" \
|
|
||||||
"mov z2.s, p5/m, z20.s \n\t" \
|
|
||||||
"mov z3.s, p5/m, z21.s \n\t" \
|
|
||||||
"mov z4.s, p5/m, z22.s \n\t" \
|
|
||||||
"mov z5.s, p5/m, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YM_PROJ
|
|
||||||
#define YM_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fadd z12.s, p5/m, z12.s, z21.s \n\t" \
|
|
||||||
"fadd z13.s, p5/m, z13.s, z22.s \n\t" \
|
|
||||||
"fadd z14.s, p5/m, z14.s, z23.s \n\t" \
|
|
||||||
"fsub z15.s, p5/m, z15.s, z18.s \n\t" \
|
|
||||||
"fsub z16.s, p5/m, z16.s, z19.s \n\t" \
|
|
||||||
"fsub z17.s, p5/m, z17.s, z20.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ZM_PROJ
|
|
||||||
#define ZM_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z12.s, p5/m, z12.s, z18.s, 270 \n\t" \
|
|
||||||
"fcadd z13.s, p5/m, z13.s, z19.s, 270 \n\t" \
|
|
||||||
"fcadd z14.s, p5/m, z14.s, z20.s, 270 \n\t" \
|
|
||||||
"fcadd z15.s, p5/m, z15.s, z21.s, 90 \n\t" \
|
|
||||||
"fcadd z16.s, p5/m, z16.s, z22.s, 90 \n\t" \
|
|
||||||
"fcadd z17.s, p5/m, z17.s, z23.s, 90 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// TM_PROJ
|
|
||||||
#define TM_PROJ_A64FXf \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"fsub z12.s, p5/m, z12.s, z18.s \n\t" \
|
|
||||||
"fsub z13.s, p5/m, z13.s, z19.s \n\t" \
|
|
||||||
"fsub z14.s, p5/m, z14.s, z20.s \n\t" \
|
|
||||||
"fsub z15.s, p5/m, z15.s, z21.s \n\t" \
|
|
||||||
"fsub z16.s, p5/m, z16.s, z22.s \n\t" \
|
|
||||||
"fsub z17.s, p5/m, z17.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// XM_RECON_ACCUM
|
|
||||||
#define XM_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z18.s, 90 \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z19.s, 90 \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z20.s, 90 \n\t" \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z21.s, 90 \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z22.s, 90 \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z23.s, 90 \n\t" \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YP_RECON_ACCUM
|
|
||||||
#define YP_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fsub z9.s, p5/m, z9.s, z18.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fsub z10.s, p5/m, z10.s, z19.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fsub z11.s, p5/m, z11.s, z20.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fadd z6.s, p5/m, z6.s, z21.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fadd z7.s, p5/m, z7.s, z22.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
"fadd z8.s, p5/m, z8.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// YM_RECON_ACCUM
|
|
||||||
#define YM_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fadd z9.s, p5/m, z9.s, z18.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fadd z10.s, p5/m, z10.s, z19.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fadd z11.s, p5/m, z11.s, z20.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fsub z6.s, p5/m, z6.s, z21.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fsub z7.s, p5/m, z7.s, z22.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
"fsub z8.s, p5/m, z8.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZP_RECON_ACCUM
|
|
||||||
#define ZP_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z18.s, 270 \n\t" \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z19.s, 270 \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z20.s, 270 \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z21.s, 90 \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z22.s, 90 \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z23.s, 90 \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZM_RECON_ACCUM
|
|
||||||
#define ZM_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fcadd z6.s, p5/m, z6.s, z18.s, 90 \n\t" \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fcadd z7.s, p5/m, z7.s, z19.s, 90 \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fcadd z8.s, p5/m, z8.s, z20.s, 90 \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fcadd z9.s, p5/m, z9.s, z21.s, 270 \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fcadd z10.s, p5/m, z10.s, z22.s, 270 \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fcadd z11.s, p5/m, z11.s, z23.s, 270 \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// TP_RECON_ACCUM
|
|
||||||
#define TP_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fadd z6.s, p5/m, z6.s, z18.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fadd z7.s, p5/m, z7.s, z19.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fadd z8.s, p5/m, z8.s, z20.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fadd z9.s, p5/m, z9.s, z21.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fadd z10.s, p5/m, z10.s, z22.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
"fadd z11.s, p5/m, z11.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// TM_RECON_ACCUM
|
|
||||||
#define TM_RECON_ACCUM_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z18.s \n\t" \
|
|
||||||
"fsub z6.s, p5/m, z6.s, z18.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z19.s \n\t" \
|
|
||||||
"fsub z7.s, p5/m, z7.s, z19.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z20.s \n\t" \
|
|
||||||
"fsub z8.s, p5/m, z8.s, z20.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z21.s \n\t" \
|
|
||||||
"fsub z9.s, p5/m, z9.s, z21.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z22.s \n\t" \
|
|
||||||
"fsub z10.s, p5/m, z10.s, z22.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z23.s \n\t" \
|
|
||||||
"fsub z11.s, p5/m, z11.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// ZERO_PSI
|
|
||||||
#define ZERO_PSI_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"ptrue p5.s \n\t" \
|
|
||||||
"fmov z0.s , 0 \n\t" \
|
|
||||||
"fmov z1.s , 0 \n\t" \
|
|
||||||
"fmov z2.s , 0 \n\t" \
|
|
||||||
"fmov z3.s , 0 \n\t" \
|
|
||||||
"fmov z4.s , 0 \n\t" \
|
|
||||||
"fmov z5.s , 0 \n\t" \
|
|
||||||
"fmov z6.s , 0 \n\t" \
|
|
||||||
"fmov z7.s , 0 \n\t" \
|
|
||||||
"fmov z8.s , 0 \n\t" \
|
|
||||||
"fmov z9.s , 0 \n\t" \
|
|
||||||
"fmov z10.s , 0 \n\t" \
|
|
||||||
"fmov z11.s , 0 \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
||||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
|
||||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PSTL2STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
|
||||||
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXf(base) \
|
|
||||||
{ \
|
|
||||||
asm ( \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 0, mul vl] \n\t" \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 4, mul vl] \n\t" \
|
|
||||||
"prfd PSTL1STRM, p5, [%[fetchptr], 8, mul vl] \n\t" \
|
|
||||||
: \
|
|
||||||
: [fetchptr] "r" (base) \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31","memory" \
|
|
||||||
); \
|
|
||||||
}
|
|
||||||
// ADD_RESULT_INTERNAL
|
|
||||||
#define ADD_RESULT_INTERNAL_A64FXf \
|
|
||||||
asm ( \
|
|
||||||
"fadd z0.s, p5/m, z0.s, z12.s \n\t" \
|
|
||||||
"fadd z1.s, p5/m, z1.s, z13.s \n\t" \
|
|
||||||
"fadd z2.s, p5/m, z2.s, z14.s \n\t" \
|
|
||||||
"fadd z3.s, p5/m, z3.s, z15.s \n\t" \
|
|
||||||
"fadd z4.s, p5/m, z4.s, z16.s \n\t" \
|
|
||||||
"fadd z5.s, p5/m, z5.s, z17.s \n\t" \
|
|
||||||
"fadd z6.s, p5/m, z6.s, z18.s \n\t" \
|
|
||||||
"fadd z7.s, p5/m, z7.s, z19.s \n\t" \
|
|
||||||
"fadd z8.s, p5/m, z8.s, z20.s \n\t" \
|
|
||||||
"fadd z9.s, p5/m, z9.s, z21.s \n\t" \
|
|
||||||
"fadd z10.s, p5/m, z10.s, z22.s \n\t" \
|
|
||||||
"fadd z11.s, p5/m, z11.s, z23.s \n\t" \
|
|
||||||
: \
|
|
||||||
: \
|
|
||||||
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
|
|
||||||
);
|
|
||||||
|
|
@ -38,10 +38,11 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
#define LOCK_GAUGE(A)
|
#define LOCK_GAUGE(A)
|
||||||
#define UNLOCK_GAUGE(A)
|
#define UNLOCK_GAUGE(A)
|
||||||
#define MASK_REGS DECLARATIONS_A64FXd
|
#define MASK_REGS DECLARATIONS_A64FXd
|
||||||
#define SAVE_RESULT(A,B) RESULT_A64FXd(A); PREFETCH_RESULT_L2_STORE(B)
|
#define SAVE_RESULT(A,B) RESULT_A64FXd(A);
|
||||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXd(Dir)
|
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXd(Dir)
|
||||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXd
|
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXd
|
||||||
#define LOAD_CHI(base) LOAD_CHI_A64FXd(base)
|
#define LOAD_CHI(base) LOAD_CHI_A64FXd(base)
|
||||||
|
#define ZERO_PSI ZERO_PSI_A64FXd
|
||||||
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXd; RESULT_A64FXd(base)
|
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXd; RESULT_A64FXd(base)
|
||||||
#define XP_PROJ XP_PROJ_A64FXd
|
#define XP_PROJ XP_PROJ_A64FXd
|
||||||
#define YP_PROJ YP_PROJ_A64FXd
|
#define YP_PROJ YP_PROJ_A64FXd
|
||||||
@ -70,6 +71,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
#define MAYBEPERM(Dir,perm) if (Dir != 3) { if (perm) { PERMUTE; } }
|
#define MAYBEPERM(Dir,perm) if (Dir != 3) { if (perm) { PERMUTE; } }
|
||||||
// DECLARATIONS
|
// DECLARATIONS
|
||||||
#define DECLARATIONS_A64FXd \
|
#define DECLARATIONS_A64FXd \
|
||||||
|
uint64_t baseU; \
|
||||||
const uint64_t lut[4][8] = { \
|
const uint64_t lut[4][8] = { \
|
||||||
{4, 5, 6, 7, 0, 1, 2, 3}, \
|
{4, 5, 6, 7, 0, 1, 2, 3}, \
|
||||||
{2, 3, 0, 1, 6, 7, 4, 5}, \
|
{2, 3, 0, 1, 6, 7, 4, 5}, \
|
||||||
@ -126,114 +128,114 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
// RESULT
|
// RESULT
|
||||||
#define RESULT_A64FXd(base) \
|
#define RESULT_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -6 * 64), result_00); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-6), result_00); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -5 * 64), result_01); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-5), result_01); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -4 * 64), result_02); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-4), result_02); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -3 * 64), result_10); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-3), result_10); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -2 * 64), result_11); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-2), result_11); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + -1 * 64), result_12); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(-1), result_12); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 0 * 64), result_20); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(0), result_20); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 1 * 64), result_21); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(1), result_21); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 2 * 64), result_22); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(2), result_22); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 3 * 64), result_30); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(3), result_30); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 4 * 64), result_31); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(4), result_31); \
|
||||||
svst1(pg1, (float64_t*)(base + 2 * 3 * 64 + 5 * 64), result_32); \
|
svst1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64),(int64_t)(5), result_32); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXd(base) \
|
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXd(base) \
|
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A) \
|
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sUn](A)); uint64_t baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + -256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(-4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 768), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(12), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1024), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(16), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1280), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(20), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1536), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(24), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1792), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(28), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A) \
|
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// LOAD_CHI
|
// LOAD_CHI
|
||||||
#define LOAD_CHI_A64FXd(base) \
|
#define LOAD_CHI_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
Chi_00 = svld1(pg1, (float64_t*)(base + 0 * 64)); \
|
Chi_00 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(0)); \
|
||||||
Chi_01 = svld1(pg1, (float64_t*)(base + 1 * 64)); \
|
Chi_01 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(1)); \
|
||||||
Chi_02 = svld1(pg1, (float64_t*)(base + 2 * 64)); \
|
Chi_02 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(2)); \
|
||||||
Chi_10 = svld1(pg1, (float64_t*)(base + 3 * 64)); \
|
Chi_10 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(3)); \
|
||||||
Chi_11 = svld1(pg1, (float64_t*)(base + 4 * 64)); \
|
Chi_11 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(4)); \
|
||||||
Chi_12 = svld1(pg1, (float64_t*)(base + 5 * 64)); \
|
Chi_12 = svld1_vnum(pg1, (float64_t*)(base), (int64_t)(5)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU
|
// LOAD_CHIMU
|
||||||
#define LOAD_CHIMU_INTERLEAVED_A64FXd(base) \
|
#define LOAD_CHIMU_INTERLEAVED_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
Chimu_00 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_30 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_10 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_20 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_01 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_31 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_11 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_21 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_02 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_32 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
Chimu_12 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_22 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU_0213
|
// LOAD_CHIMU_0213
|
||||||
#define LOAD_CHIMU_0213_A64FXd \
|
#define LOAD_CHIMU_0213_A64FXd \
|
||||||
{ \
|
{ \
|
||||||
const SiteSpinor & ref(in[offset]); \
|
const SiteSpinor & ref(in[offset]); \
|
||||||
Chimu_00 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_20 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_01 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_21 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_02 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_22 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
Chimu_10 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_30 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_11 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_31 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_12 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_32 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU_0312
|
// LOAD_CHIMU_0312
|
||||||
#define LOAD_CHIMU_0312_A64FXd \
|
#define LOAD_CHIMU_0312_A64FXd \
|
||||||
{ \
|
{ \
|
||||||
const SiteSpinor & ref(in[offset]); \
|
const SiteSpinor & ref(in[offset]); \
|
||||||
Chimu_00 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_30 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_01 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_31 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_02 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_32 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
Chimu_10 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_20 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_11 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_21 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_12 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_22 = svld1(pg1, (float64_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float64_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// LOAD_TABLE0
|
// LOAD_TABLE0
|
||||||
#define LOAD_TABLE0 \
|
#define LOAD_TABLE0 \
|
||||||
@ -261,26 +263,26 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
Chi_12 = svtbl(Chi_12, table0);
|
Chi_12 = svtbl(Chi_12, table0);
|
||||||
|
|
||||||
// LOAD_GAUGE
|
// LOAD_GAUGE
|
||||||
#define LOAD_GAUGE \
|
#define LOAD_GAUGE(A) \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
{ \
|
{ \
|
||||||
U_00 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -6 * 64)); \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
U_10 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -3 * 64)); \
|
U_00 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
U_20 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + 0 * 64)); \
|
U_10 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
U_01 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -5 * 64)); \
|
U_20 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
U_11 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -2 * 64)); \
|
U_01 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
U_21 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + 1 * 64)); \
|
U_11 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
|
U_21 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
}
|
}
|
||||||
// MULT_2SPIN
|
// MULT_2SPIN
|
||||||
#define MULT_2SPIN_1_A64FXd(A) \
|
#define MULT_2SPIN_1_A64FXd(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
U_00 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -6 * 64)); \
|
U_00 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
U_10 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -3 * 64)); \
|
U_10 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
U_20 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + 0 * 64)); \
|
U_20 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
U_01 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -5 * 64)); \
|
U_01 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
U_11 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -2 * 64)); \
|
U_11 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
U_21 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + 1 * 64)); \
|
U_21 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
UChi_00 = svcmla_x(pg1, zero0, U_00, Chi_00, 0); \
|
UChi_00 = svcmla_x(pg1, zero0, U_00, Chi_00, 0); \
|
||||||
UChi_10 = svcmla_x(pg1, zero0, U_00, Chi_10, 0); \
|
UChi_10 = svcmla_x(pg1, zero0, U_00, Chi_10, 0); \
|
||||||
UChi_01 = svcmla_x(pg1, zero0, U_10, Chi_00, 0); \
|
UChi_01 = svcmla_x(pg1, zero0, U_10, Chi_00, 0); \
|
||||||
@ -293,9 +295,9 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
UChi_11 = svcmla_x(pg1, UChi_11, U_10, Chi_10, 90); \
|
UChi_11 = svcmla_x(pg1, UChi_11, U_10, Chi_10, 90); \
|
||||||
UChi_02 = svcmla_x(pg1, UChi_02, U_20, Chi_00, 90); \
|
UChi_02 = svcmla_x(pg1, UChi_02, U_20, Chi_00, 90); \
|
||||||
UChi_12 = svcmla_x(pg1, UChi_12, U_20, Chi_10, 90); \
|
UChi_12 = svcmla_x(pg1, UChi_12, U_20, Chi_10, 90); \
|
||||||
U_00 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -4 * 64)); \
|
U_00 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
U_10 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + -1 * 64)); \
|
U_10 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
U_20 = svld1(pg1, (float64_t*)(baseU + 2 * 3 * 64 + 2 * 64)); \
|
U_20 = svld1_vnum(pg1, (float64_t*)(baseU + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// MULT_2SPIN_BACKEND
|
// MULT_2SPIN_BACKEND
|
||||||
#define MULT_2SPIN_2_A64FXd \
|
#define MULT_2SPIN_2_A64FXd \
|
||||||
@ -570,12 +572,12 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
result_31 = svdup_f64(0.); \
|
result_31 = svdup_f64(0.); \
|
||||||
result_32 = svdup_f64(0.);
|
result_32 = svdup_f64(0.);
|
||||||
|
|
||||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
// PREFETCH_RESULT_L2_STORE (uses DC ZVA for cache line zeroing)
|
||||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXd(base) \
|
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 0) : "memory" ); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 1) : "memory" ); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 2) : "memory" ); \
|
||||||
}
|
}
|
||||||
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
||||||
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXd(base) \
|
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXd(base) \
|
||||||
|
@ -38,10 +38,11 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
#define LOCK_GAUGE(A)
|
#define LOCK_GAUGE(A)
|
||||||
#define UNLOCK_GAUGE(A)
|
#define UNLOCK_GAUGE(A)
|
||||||
#define MASK_REGS DECLARATIONS_A64FXf
|
#define MASK_REGS DECLARATIONS_A64FXf
|
||||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A); PREFETCH_RESULT_L2_STORE(B)
|
#define SAVE_RESULT(A,B) RESULT_A64FXf(A);
|
||||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXf(Dir)
|
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXf(Dir)
|
||||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXf
|
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXf
|
||||||
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
|
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
|
||||||
|
#define ZERO_PSI ZERO_PSI_A64FXf
|
||||||
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXf; RESULT_A64FXf(base)
|
#define ADD_RESULT(base,basep) LOAD_CHIMU(base); ADD_RESULT_INTERNAL_A64FXf; RESULT_A64FXf(base)
|
||||||
#define XP_PROJ XP_PROJ_A64FXf
|
#define XP_PROJ XP_PROJ_A64FXf
|
||||||
#define YP_PROJ YP_PROJ_A64FXf
|
#define YP_PROJ YP_PROJ_A64FXf
|
||||||
@ -70,6 +71,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
#define MAYBEPERM(A,perm) if (perm) { PERMUTE; }
|
#define MAYBEPERM(A,perm) if (perm) { PERMUTE; }
|
||||||
// DECLARATIONS
|
// DECLARATIONS
|
||||||
#define DECLARATIONS_A64FXf \
|
#define DECLARATIONS_A64FXf \
|
||||||
|
uint64_t baseU; \
|
||||||
const uint32_t lut[4][16] = { \
|
const uint32_t lut[4][16] = { \
|
||||||
{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, \
|
{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, \
|
||||||
{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, \
|
{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, \
|
||||||
@ -126,114 +128,114 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
// RESULT
|
// RESULT
|
||||||
#define RESULT_A64FXf(base) \
|
#define RESULT_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -6 * 64), result_00); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-6), result_00); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -5 * 64), result_01); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-5), result_01); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -4 * 64), result_02); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-4), result_02); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -3 * 64), result_10); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-3), result_10); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -2 * 64), result_11); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-2), result_11); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + -1 * 64), result_12); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(-1), result_12); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 0 * 64), result_20); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(0), result_20); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 1 * 64), result_21); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(1), result_21); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 2 * 64), result_22); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(2), result_22); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 3 * 64), result_30); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(3), result_30); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 4 * 64), result_31); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(4), result_31); \
|
||||||
svst1(pg1, (float32_t*)(base + 2 * 3 * 64 + 5 * 64), result_32); \
|
svst1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64),(int64_t)(5), result_32); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXf(base) \
|
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXf(base) \
|
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sUn](A)); uint64_t baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + -256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(-4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 768), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(12), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1024), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(16), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1280), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(20), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1536), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(24), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1792), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(28), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// LOAD_CHI
|
// LOAD_CHI
|
||||||
#define LOAD_CHI_A64FXf(base) \
|
#define LOAD_CHI_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
Chi_00 = svld1(pg1, (float32_t*)(base + 0 * 64)); \
|
Chi_00 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(0)); \
|
||||||
Chi_01 = svld1(pg1, (float32_t*)(base + 1 * 64)); \
|
Chi_01 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(1)); \
|
||||||
Chi_02 = svld1(pg1, (float32_t*)(base + 2 * 64)); \
|
Chi_02 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(2)); \
|
||||||
Chi_10 = svld1(pg1, (float32_t*)(base + 3 * 64)); \
|
Chi_10 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(3)); \
|
||||||
Chi_11 = svld1(pg1, (float32_t*)(base + 4 * 64)); \
|
Chi_11 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(4)); \
|
||||||
Chi_12 = svld1(pg1, (float32_t*)(base + 5 * 64)); \
|
Chi_12 = svld1_vnum(pg1, (float32_t*)(base), (int64_t)(5)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU
|
// LOAD_CHIMU
|
||||||
#define LOAD_CHIMU_INTERLEAVED_A64FXf(base) \
|
#define LOAD_CHIMU_INTERLEAVED_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
Chimu_00 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_30 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_10 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_20 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_01 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_31 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_11 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_21 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_02 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_32 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
Chimu_12 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_22 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU_0213
|
// LOAD_CHIMU_0213
|
||||||
#define LOAD_CHIMU_0213_A64FXf \
|
#define LOAD_CHIMU_0213_A64FXf \
|
||||||
{ \
|
{ \
|
||||||
const SiteSpinor & ref(in[offset]); \
|
const SiteSpinor & ref(in[offset]); \
|
||||||
Chimu_00 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_20 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_01 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_21 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_02 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_22 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
Chimu_10 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_30 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_11 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_31 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_12 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_32 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
}
|
}
|
||||||
// LOAD_CHIMU_0312
|
// LOAD_CHIMU_0312
|
||||||
#define LOAD_CHIMU_0312_A64FXf \
|
#define LOAD_CHIMU_0312_A64FXf \
|
||||||
{ \
|
{ \
|
||||||
const SiteSpinor & ref(in[offset]); \
|
const SiteSpinor & ref(in[offset]); \
|
||||||
Chimu_00 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -6 * 64)); \
|
Chimu_00 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
Chimu_30 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 3 * 64)); \
|
Chimu_30 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(3)); \
|
||||||
Chimu_01 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -5 * 64)); \
|
Chimu_01 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
Chimu_31 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 4 * 64)); \
|
Chimu_31 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(4)); \
|
||||||
Chimu_02 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -4 * 64)); \
|
Chimu_02 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
Chimu_32 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 5 * 64)); \
|
Chimu_32 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(5)); \
|
||||||
Chimu_10 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -3 * 64)); \
|
Chimu_10 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
Chimu_20 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 0 * 64)); \
|
Chimu_20 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
Chimu_11 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -2 * 64)); \
|
Chimu_11 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
Chimu_21 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 1 * 64)); \
|
Chimu_21 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
Chimu_12 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + -1 * 64)); \
|
Chimu_12 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
Chimu_22 = svld1(pg1, (float32_t*)(base + 2 * 3 * 64 + 2 * 64)); \
|
Chimu_22 = svld1_vnum(pg1, (float32_t*)(base + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// LOAD_TABLE0
|
// LOAD_TABLE0
|
||||||
#define LOAD_TABLE0 \
|
#define LOAD_TABLE0 \
|
||||||
@ -261,26 +263,26 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
Chi_12 = svtbl(Chi_12, table0);
|
Chi_12 = svtbl(Chi_12, table0);
|
||||||
|
|
||||||
// LOAD_GAUGE
|
// LOAD_GAUGE
|
||||||
#define LOAD_GAUGE \
|
#define LOAD_GAUGE(A) \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
|
||||||
{ \
|
{ \
|
||||||
U_00 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -6 * 64)); \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
U_10 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -3 * 64)); \
|
U_00 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
U_20 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 0 * 64)); \
|
U_10 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
U_01 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -5 * 64)); \
|
U_20 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
U_11 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -2 * 64)); \
|
U_01 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
U_21 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 1 * 64)); \
|
U_11 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
|
U_21 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
}
|
}
|
||||||
// MULT_2SPIN
|
// MULT_2SPIN
|
||||||
#define MULT_2SPIN_1_A64FXf(A) \
|
#define MULT_2SPIN_1_A64FXf(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
U_00 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -6 * 64)); \
|
U_00 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-6)); \
|
||||||
U_10 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -3 * 64)); \
|
U_10 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-3)); \
|
||||||
U_20 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 0 * 64)); \
|
U_20 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(0)); \
|
||||||
U_01 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -5 * 64)); \
|
U_01 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-5)); \
|
||||||
U_11 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -2 * 64)); \
|
U_11 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-2)); \
|
||||||
U_21 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 1 * 64)); \
|
U_21 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(1)); \
|
||||||
UChi_00 = svcmla_x(pg1, zero0, U_00, Chi_00, 0); \
|
UChi_00 = svcmla_x(pg1, zero0, U_00, Chi_00, 0); \
|
||||||
UChi_10 = svcmla_x(pg1, zero0, U_00, Chi_10, 0); \
|
UChi_10 = svcmla_x(pg1, zero0, U_00, Chi_10, 0); \
|
||||||
UChi_01 = svcmla_x(pg1, zero0, U_10, Chi_00, 0); \
|
UChi_01 = svcmla_x(pg1, zero0, U_10, Chi_00, 0); \
|
||||||
@ -293,9 +295,9 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
UChi_11 = svcmla_x(pg1, UChi_11, U_10, Chi_10, 90); \
|
UChi_11 = svcmla_x(pg1, UChi_11, U_10, Chi_10, 90); \
|
||||||
UChi_02 = svcmla_x(pg1, UChi_02, U_20, Chi_00, 90); \
|
UChi_02 = svcmla_x(pg1, UChi_02, U_20, Chi_00, 90); \
|
||||||
UChi_12 = svcmla_x(pg1, UChi_12, U_20, Chi_10, 90); \
|
UChi_12 = svcmla_x(pg1, UChi_12, U_20, Chi_10, 90); \
|
||||||
U_00 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -4 * 64)); \
|
U_00 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-4)); \
|
||||||
U_10 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -1 * 64)); \
|
U_10 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(-1)); \
|
||||||
U_20 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 2 * 64)); \
|
U_20 = svld1_vnum(pg1, (float32_t*)(baseU + 2 * 3 * 64), (int64_t)(2)); \
|
||||||
}
|
}
|
||||||
// MULT_2SPIN_BACKEND
|
// MULT_2SPIN_BACKEND
|
||||||
#define MULT_2SPIN_2_A64FXf \
|
#define MULT_2SPIN_2_A64FXf \
|
||||||
@ -570,12 +572,12 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
result_31 = svdup_f32(0.); \
|
result_31 = svdup_f32(0.); \
|
||||||
result_32 = svdup_f32(0.);
|
result_32 = svdup_f32(0.);
|
||||||
|
|
||||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
// PREFETCH_RESULT_L2_STORE (uses DC ZVA for cache line zeroing)
|
||||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(base) \
|
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 0) : "memory" ); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 1) : "memory" ); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PSTL2STRM); \
|
asm( "dc zva, %[fetchptr] \n\t" : : [fetchptr] "r" (base + 256 * 2) : "memory" ); \
|
||||||
}
|
}
|
||||||
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
// PREFETCH_RESULT_L1_STORE (prefetch store to L1)
|
||||||
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXf(base) \
|
#define PREFETCH_RESULT_L1_STORE_INTERNAL_A64FXf(base) \
|
||||||
|
@ -46,6 +46,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
#undef MULT_2SPIN_2
|
#undef MULT_2SPIN_2
|
||||||
#undef MAYBEPERM
|
#undef MAYBEPERM
|
||||||
#undef LOAD_CHI
|
#undef LOAD_CHI
|
||||||
|
#undef ZERO_PSI
|
||||||
#undef XP_PROJ
|
#undef XP_PROJ
|
||||||
#undef YP_PROJ
|
#undef YP_PROJ
|
||||||
#undef ZP_PROJ
|
#undef ZP_PROJ
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -117,7 +117,19 @@ accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
|
|||||||
ret._internal[b][c] -= pr * ret._internal[c1][c];
|
ret._internal[b][c] -= pr * ret._internal[c1][c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalise last row
|
||||||
|
{
|
||||||
|
int c1 = N-1;
|
||||||
|
zeroit(inner);
|
||||||
|
for(int c2=0;c2<N;c2++)
|
||||||
|
inner += innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]);
|
||||||
|
|
||||||
|
nrm = sqrt(inner);
|
||||||
|
nrm = 1.0/nrm;
|
||||||
|
for(int c2=0;c2<N;c2++)
|
||||||
|
ret._internal[c1][c2]*= nrm;
|
||||||
}
|
}
|
||||||
// assuming the determinant is ok
|
// assuming the determinant is ok
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -103,7 +103,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
detU= Determinant(U) ;
|
detU= Determinant(U) ;
|
||||||
detU=detU-1.0;
|
detU=detU-1.0;
|
||||||
std::cout << "Determinant before screw up " << norm2(detU)<<std::endl;
|
std::cout << "Determinant defect before screw up " << norm2(detU)<<std::endl;
|
||||||
|
|
||||||
std::cout << " Screwing up determinant " << std::endl;
|
std::cout << " Screwing up determinant " << std::endl;
|
||||||
|
|
||||||
@ -113,7 +113,8 @@ int main (int argc, char ** argv)
|
|||||||
auto element = PeekIndex<ColourIndex>(U,Nc-1,i);
|
auto element = PeekIndex<ColourIndex>(U,Nc-1,i);
|
||||||
element = element * phase;
|
element = element * phase;
|
||||||
PokeIndex<ColourIndex>(U,element,Nc-1,i);
|
PokeIndex<ColourIndex>(U,element,Nc-1,i);
|
||||||
}
|
}
|
||||||
|
U=U*0.1;
|
||||||
UU=U;
|
UU=U;
|
||||||
|
|
||||||
detU= Determinant(U) ;
|
detU= Determinant(U) ;
|
||||||
|
@ -108,8 +108,18 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
SU<Nc>::ColdConfiguration(Umu);
|
if( argc > 1 && argv[1][0] != '-' )
|
||||||
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
{
|
||||||
|
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu, header, argv[1]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout<<GridLogMessage <<"Using cold configuration"<<std::endl;
|
||||||
|
SU<Nc>::ColdConfiguration(Umu);
|
||||||
|
// SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
}
|
||||||
|
|
||||||
RealD mass=0.3;
|
RealD mass=0.3;
|
||||||
RealD M5 =1.0;
|
RealD M5 =1.0;
|
||||||
|
@ -81,6 +81,10 @@ int main(int argc, char **argv) {
|
|||||||
// that have a complex construction
|
// that have a complex construction
|
||||||
// standard
|
// standard
|
||||||
RealD beta = 5.6 ;
|
RealD beta = 5.6 ;
|
||||||
|
const int nu = 3;
|
||||||
|
std::vector<int> twists(Nd,0);
|
||||||
|
twists[nu] = 1;
|
||||||
|
ConjugateGimplD::setDirections(twists);
|
||||||
ConjugateWilsonGaugeActionR Waction(beta);
|
ConjugateWilsonGaugeActionR Waction(beta);
|
||||||
|
|
||||||
const int Ls = 8;
|
const int Ls = 8;
|
||||||
@ -93,9 +97,6 @@ int main(int argc, char **argv) {
|
|||||||
// temporarily need a gauge field
|
// temporarily need a gauge field
|
||||||
LatticeGaugeField U(GridPtr);
|
LatticeGaugeField U(GridPtr);
|
||||||
|
|
||||||
const int nu = 3;
|
|
||||||
std::vector<int> twists(Nd,0);
|
|
||||||
twists[nu] = 1;
|
|
||||||
FermionAction::ImplParams params;
|
FermionAction::ImplParams params;
|
||||||
params.twists = twists;
|
params.twists = twists;
|
||||||
Real mass=0.04;
|
Real mass=0.04;
|
||||||
|
@ -79,6 +79,10 @@ int main(int argc, char **argv) {
|
|||||||
// that have a complex construction
|
// that have a complex construction
|
||||||
// standard
|
// standard
|
||||||
RealD beta = 2.6 ;
|
RealD beta = 2.6 ;
|
||||||
|
const int nu = 3;
|
||||||
|
std::vector<int> twists(Nd,0);
|
||||||
|
twists[nu] = 1;
|
||||||
|
ConjugateGimplD::setDirections(twists);
|
||||||
ConjugateIwasakiGaugeActionR Waction(beta);
|
ConjugateIwasakiGaugeActionR Waction(beta);
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,6 +80,9 @@ int main(int argc, char **argv) {
|
|||||||
// that have a complex construction
|
// that have a complex construction
|
||||||
// standard
|
// standard
|
||||||
RealD beta = 5.6 ;
|
RealD beta = 5.6 ;
|
||||||
|
std::vector<int> twists(Nd,0);
|
||||||
|
twists[3] = 1;
|
||||||
|
ConjugateGimplD::setDirections(twists);
|
||||||
ConjugateWilsonGaugeActionR Waction(beta);
|
ConjugateWilsonGaugeActionR Waction(beta);
|
||||||
|
|
||||||
|
|
||||||
|
@ -222,9 +222,16 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
|
||||||
|
|
||||||
|
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
GridRedBlackCartesian *CoarseCoarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseCoarse4d);
|
||||||
|
GridRedBlackCartesian *CoarseCoarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
std::vector<int> cseeds({5,6,7,8});
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
@ -282,8 +289,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// Deflate the course space. Recursive multigrid?
|
// Deflate the course space. Recursive multigrid?
|
||||||
@ -311,12 +317,11 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
|
Level2Op L2Op(*CoarseCoarse5d,*CoarseCoarse5dRB,1); // Hermitian matrix
|
||||||
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
|
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
|
||||||
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
|
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
|
std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
397
tests/solver/Test_dwf_hdcr_16_rb.cc
Normal file
397
tests/solver/Test_dwf_hdcr_16_rb.cc
Normal file
@ -0,0 +1,397 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field> class SolverWrapper : public LinearFunction<Field> {
|
||||||
|
private:
|
||||||
|
CheckerBoardedSparseMatrixBase<Field> & _Matrix;
|
||||||
|
SchurRedBlackBase<Field> & _Solver;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SolverWrapper(CheckerBoardedSparseMatrixBase<Field> &Matrix,
|
||||||
|
SchurRedBlackBase<Field> &Solver)
|
||||||
|
: _Matrix(Matrix), _Solver(Solver) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
_Solver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||||
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
CoarseOperator & _CoarseOperator;
|
||||||
|
Matrix & _FineMatrix;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
Guesser & _Guess;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
|
FineOperator &Fine,Matrix &FineMatrix,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_CoarseOperator(Coarse),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_FineMatrix(FineMatrix),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=16;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
std::vector<int> blockc ({2,2,2,2});
|
||||||
|
const int nbasis= 32;
|
||||||
|
const int nbasisc= 32;
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
auto cclatt = clatt;
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
cclatt[d] = clatt[d]/blockc[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
// GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
// GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("./ckpoint_lat.4000");
|
||||||
|
//std::string file("./ckpoint_lat.1000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.001;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
|
|
||||||
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
{
|
||||||
|
int nb=nbasis/2;
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.002,1000,800,100,0.0);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,1000,800,100,0.0);
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.05,500,200,150,0.0);//
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.01,1000,100,100,0.0); // Slightly faster
|
||||||
|
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
std::cout << GridLogMessage << " G5R5 "<<n<<std::endl;
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
std::cout << GridLogMessage << " Projection "<<n<<std::endl;
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
std::cout << GridLogMessage << " Copy "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
std::cout << GridLogMessage << " P+ "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
std::cout << GridLogMessage << " P- "<<n<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
std::cout << " Making 5D coarse RB grid " <<std::endl;
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
std::cout << " Made 5D coarse RB grid " <<std::endl;
|
||||||
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Deflate the course space. Recursive multigrid?
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
|
||||||
|
// CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||||
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector> , SolverWrapper<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
|
||||||
|
|
||||||
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,12,HermIndefOp,Ddwf);
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(0.005,1000);
|
||||||
|
// SchurDiagMooeeOperator<CoarseOperator,CoarseVector> CoarseMpcDagMpc(LDOp);
|
||||||
|
SchurRedBlackDiagMooeeSolve<CoarseVector> CoarseRBCG(CoarseCG);
|
||||||
|
SolverWrapper<CoarseVector> CoarseSolver(LDOp,CoarseRBCG);
|
||||||
|
|
||||||
|
// NormalEquations<CoarseVector> CoarseCGNE(LDOp,CoarseCG,CoarseZeroGuesser);
|
||||||
|
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
CoarseSolver);
|
||||||
|
TwoLevelPrecon.Level(1);
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,20,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
l1PGCR(src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
ConjugateGradient<LatticeFermion> pCG(1.0e-8,60000);
|
||||||
|
result=Zero();
|
||||||
|
// pCG(HermDefOp,src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling red black CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
LatticeFermion src_o(FrbGrid);
|
||||||
|
LatticeFermion result_o(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o=Zero();
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
|
// pCG(HermOpEO,src_o,result_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||||
|
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||||
|
// std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
|
||||||
|
// PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
477
tests/solver/Test_dwf_hdcr_24_regression.cc
Normal file
477
tests/solver/Test_dwf_hdcr_24_regression.cc
Normal file
@ -0,0 +1,477 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||||
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
CoarseOperator & _CoarseOperator;
|
||||||
|
Matrix & _FineMatrix;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
Guesser & _Guess;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
|
FineOperator &Fine,Matrix &FineMatrix,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_CoarseOperator(Coarse),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_FineMatrix(FineMatrix),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=24;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
std::vector<int> blockc ({2,2,2,2});
|
||||||
|
const int nbasis= 40;
|
||||||
|
const int nbasisc= 40;
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
auto cclatt = clatt;
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
cclatt[d] = clatt[d]/blockc[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
// GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
// GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
// std::string file("./ckpoint_lat.4000");
|
||||||
|
// std::string file("./ckpoint_lat.1000");
|
||||||
|
// NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
SU<Nc>::HotConfiguration(RNG4,Umu);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.00078;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
|
|
||||||
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
{
|
||||||
|
int nb=nbasis/2;
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.002,1000,800,100,0.0);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,1000,800,100,0.0);
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.01,400,50,50,0.0); // Slightly faster
|
||||||
|
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
std::cout << GridLogMessage << " G5R5 "<<n<<std::endl;
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
std::cout << GridLogMessage << " Projection "<<n<<std::endl;
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
std::cout << GridLogMessage << " Copy "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
std::cout << GridLogMessage << " P+ "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
std::cout << GridLogMessage << " P- "<<n<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
std::cout << " Making 5D coarse RB grid " <<std::endl;
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
std::cout << " Made 5D coarse RB grid " <<std::endl;
|
||||||
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1);
|
||||||
|
std::cout << " LDOp.CoarsenOperator " <<std::endl;
|
||||||
|
LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
std::cout << " Coarsened Operator " <<std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Deflate the course space. Recursive multigrid?
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
|
||||||
|
// CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||||
|
/*
|
||||||
|
{
|
||||||
|
int nb=nbasisc/2;
|
||||||
|
CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,15.0,0.02,1000,800,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
autoView( subspace , CoarseAggregates.subspace[n],CpuWrite);
|
||||||
|
autoView( subspace_g5, CoarseAggregates.subspace[n+nb],CpuWrite);
|
||||||
|
for(int nn=0;nn<nb;nn++){
|
||||||
|
for(int site=0;site<Coarse5d->oSites();site++){
|
||||||
|
subspace_g5[site](nn) = subspace[site](nn);
|
||||||
|
subspace_g5[site](nn+nb)=-subspace[site](nn+nb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
|
/*
|
||||||
|
Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
|
||||||
|
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
|
||||||
|
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<Level2Op,CoarseCoarseVector> IRLHermOpL2(L2Op);
|
||||||
|
CoarseCoarseVector cc_src(CoarseCoarse5d); cc_src=1.0;
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Chebyshev<CoarseCoarseVector> IRLChebyL2(0.001,15.0,301);
|
||||||
|
FunctionHermOp<CoarseCoarseVector> IRLOpChebyL2(IRLChebyL2,IRLHermOpL2);
|
||||||
|
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
|
||||||
|
int cNk=24;
|
||||||
|
int cNm=36;
|
||||||
|
int cNstop=24;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
|
||||||
|
|
||||||
|
int cNconv;
|
||||||
|
std::vector<RealD> eval2(cNm);
|
||||||
|
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
|
||||||
|
IRLL2.calc(eval2,evec2,cc_src,cNconv);
|
||||||
|
|
||||||
|
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.1,1000);
|
||||||
|
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
|
||||||
|
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(L2Op,CoarseCoarseCG,DeflCoarseCoarseGuesser);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<Level1Op,CoarseVector> IRLHermOp(LDOp);
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.001,15.0,301);
|
||||||
|
Chebyshev<CoarseVector> IRLCheby(0.03,12.0,101);
|
||||||
|
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,IRLHermOp);
|
||||||
|
PlainHermOp<CoarseVector> IRLOp (IRLHermOp);
|
||||||
|
int Nk=64;
|
||||||
|
int Nm=128;
|
||||||
|
int Nstop=Nk;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
std::vector<RealD> eval(Nm);
|
||||||
|
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||||
|
IRL.calc(eval,evec,c_src,Nconv);
|
||||||
|
*/
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
// DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
|
// NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
// typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
|
||||||
|
|
||||||
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf);
|
||||||
|
/*
|
||||||
|
// MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
|
||||||
|
ChebyshevSmoother<CoarseVector, Level1Op > CoarseSmoother(0.1,15.0,3,L1LinOp,LDOp);
|
||||||
|
|
||||||
|
// MirsSmoother<CoarseVector, Level1Op > CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp);
|
||||||
|
// MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf);
|
||||||
|
|
||||||
|
CoarseMG Level2Precon (CoarseAggregates, L2Op,
|
||||||
|
L1LinOp,LDOp,
|
||||||
|
CoarseSmoother,
|
||||||
|
DeflCoarseCoarseGuesser,
|
||||||
|
DeflCoarseCoarseCGNE);
|
||||||
|
Level2Precon.Level(2);
|
||||||
|
|
||||||
|
// PGCR Applying this solver to solve the coarse space problem
|
||||||
|
PrecGeneralisedConjugateResidual<CoarseVector> l2PGCR(0.1, 100, L1LinOp,Level2Precon,16,16);
|
||||||
|
l2PGCR.Level(2);
|
||||||
|
|
||||||
|
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ThreeLevelMG ThreeLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
l2PGCR);
|
||||||
|
ThreeLevelPrecon.Level(1);
|
||||||
|
|
||||||
|
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,ThreeLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
*/
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
|
||||||
|
NormalEquations<CoarseVector> CoarseCGNE(LDOp,CoarseCG,CoarseZeroGuesser);
|
||||||
|
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
CoarseCGNE);
|
||||||
|
TwoLevelPrecon.Level(1);
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,20,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
l1PGCR(src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
ConjugateGradient<LatticeFermion> pCG(1.0e-8,60000);
|
||||||
|
result=Zero();
|
||||||
|
// pCG(HermDefOp,src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling red black CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
LatticeFermion src_o(FrbGrid);
|
||||||
|
LatticeFermion result_o(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o=Zero();
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
|
// pCG(HermOpEO,src_o,result_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||||
|
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||||
|
// std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
|
||||||
|
// PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -262,6 +262,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
@ -328,7 +330,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
||||||
@ -352,7 +354,9 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
// ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
|
// ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
|
||||||
|
|
||||||
ConjugateGradient<CoarseVector> CoarseCG(0.02,1000);// 14.7s
|
ConjugateGradient<CoarseVector> CoarseCG(0.01,2000);// 14.7s
|
||||||
|
eval.resize(0);
|
||||||
|
evec.resize(0,Coarse5d);
|
||||||
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
|
NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
|
||||||
|
|
||||||
|
397
tests/solver/Test_dwf_hdcr_48_rb.cc
Normal file
397
tests/solver/Test_dwf_hdcr_48_rb.cc
Normal file
@ -0,0 +1,397 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field> class SolverWrapper : public LinearFunction<Field> {
|
||||||
|
private:
|
||||||
|
CheckerBoardedSparseMatrixBase<Field> & _Matrix;
|
||||||
|
SchurRedBlackBase<Field> & _Solver;
|
||||||
|
public:
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Wrap the usual normal equations trick
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
SolverWrapper(CheckerBoardedSparseMatrixBase<Field> &Matrix,
|
||||||
|
SchurRedBlackBase<Field> &Solver)
|
||||||
|
: _Matrix(Matrix), _Solver(Solver) {};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out){
|
||||||
|
|
||||||
|
_Solver(_Matrix,in,out); // Mdag M out = Mdag in
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||||
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
CoarseOperator & _CoarseOperator;
|
||||||
|
Matrix & _FineMatrix;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
Guesser & _Guess;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
|
FineOperator &Fine,Matrix &FineMatrix,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_CoarseOperator(Coarse),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_FineMatrix(FineMatrix),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=24;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
//std::vector<int> block ({2,2,2,2});
|
||||||
|
const int nbasis= 40;
|
||||||
|
const int nbasisc= 40;
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
// GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
// GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
//std::string file("./ckpoint_lat.4000");
|
||||||
|
std::string file("./ckpoint_lat.1000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.00078;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
|
|
||||||
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
{
|
||||||
|
int nb=nbasis/2;
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.002,1000,800,100,0.0);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,1000,800,100,0.0);
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.01,1000,100,100,0.0); // Slightly faster
|
||||||
|
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
std::cout << GridLogMessage << " G5R5 "<<n<<std::endl;
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
std::cout << GridLogMessage << " Projection "<<n<<std::endl;
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
std::cout << GridLogMessage << " Copy "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
std::cout << GridLogMessage << " P+ "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
std::cout << GridLogMessage << " P- "<<n<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
|
||||||
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Deflate the course space. Recursive multigrid?
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
|
||||||
|
// CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||||
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector> , SolverWrapper<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
std::vector<RealD> tols({0.015});
|
||||||
|
std::vector<int> ords({12});
|
||||||
|
std::vector<RealD> los({0.8});
|
||||||
|
for(int l=0;l<los.size();l++){
|
||||||
|
for(int o=0;o<ords.size();o++){
|
||||||
|
for(int t=0;t<tols.size();t++){
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage <<" tol " << tols[t] << " cheby order " <<ords[o]<< " lo "<<los[l] <<std::endl;
|
||||||
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(los[l],60.0,ords[o],HermIndefOp,Ddwf);
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(tols[t],10000);
|
||||||
|
SchurRedBlackDiagMooeeSolve<CoarseVector> CoarseRBCG(CoarseCG);
|
||||||
|
SolverWrapper<CoarseVector> CoarseSolver(LDOp,CoarseRBCG);
|
||||||
|
|
||||||
|
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
CoarseSolver);
|
||||||
|
TwoLevelPrecon.Level(1);
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,20,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
l1PGCR(src,result);
|
||||||
|
}}}
|
||||||
|
|
||||||
|
ConjugateGradient<LatticeFermion> pCG(1.0e-8,60000);
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling red black CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
LatticeFermion src_o(FrbGrid);
|
||||||
|
LatticeFermion result_o(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o=Zero();
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
|
pCG(HermOpEO,src_o,result_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
pCG(HermDefOp,src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||||
|
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||||
|
// std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
|
||||||
|
// PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
473
tests/solver/Test_dwf_hdcr_48_regression.cc
Normal file
473
tests/solver/Test_dwf_hdcr_48_regression.cc
Normal file
@ -0,0 +1,473 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_hdcr.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
/* Params
|
||||||
|
* Grid:
|
||||||
|
* block1(4)
|
||||||
|
* block2(4)
|
||||||
|
*
|
||||||
|
* Subspace
|
||||||
|
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
|
||||||
|
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
|
||||||
|
|
||||||
|
* Smoother:
|
||||||
|
* * Fine: Cheby(hi, lo, order) -- 60,0.5,10
|
||||||
|
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
|
||||||
|
|
||||||
|
* Lanczos:
|
||||||
|
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
|
||||||
|
*/
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & _SmootherMatrix;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
_SmootherMatrix(SmootherMatrix),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
|
||||||
|
_SmootherOperator.AdjOp(in,tmp);
|
||||||
|
Cheby(MdagMOp,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
Matrix & SmootherMatrix;
|
||||||
|
FineOperator & SmootherOperator;
|
||||||
|
RealD tol;
|
||||||
|
RealD shift;
|
||||||
|
int maxit;
|
||||||
|
|
||||||
|
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
|
||||||
|
shift(_shift),tol(_tol),maxit(_maxit),
|
||||||
|
SmootherOperator(_SmootherOperator),
|
||||||
|
SmootherMatrix(_SmootherMatrix)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ZeroGuesser<Field> Guess;
|
||||||
|
ConjugateGradient<Field> CG(tol,maxit,false);
|
||||||
|
|
||||||
|
Field src(in.Grid());
|
||||||
|
|
||||||
|
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
|
||||||
|
SmootherOperator.AdjOp(in,src);
|
||||||
|
Guess(src,out);
|
||||||
|
CG(MdagMOp,src,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
|
||||||
|
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||||
|
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||||
|
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||||
|
typedef LinearOperatorBase<FineField> FineOperator;
|
||||||
|
typedef LinearFunction <FineField> FineSmoother;
|
||||||
|
|
||||||
|
Aggregates & _Aggregates;
|
||||||
|
CoarseOperator & _CoarseOperator;
|
||||||
|
Matrix & _FineMatrix;
|
||||||
|
FineOperator & _FineOperator;
|
||||||
|
Guesser & _Guess;
|
||||||
|
FineSmoother & _Smoother;
|
||||||
|
CoarseSolver & _CoarseSolve;
|
||||||
|
|
||||||
|
int level; void Level(int lv) {level = lv; };
|
||||||
|
|
||||||
|
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
|
||||||
|
|
||||||
|
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
|
||||||
|
FineOperator &Fine,Matrix &FineMatrix,
|
||||||
|
FineSmoother &Smoother,
|
||||||
|
Guesser &Guess_,
|
||||||
|
CoarseSolver &CoarseSolve_)
|
||||||
|
: _Aggregates(Agg),
|
||||||
|
_CoarseOperator(Coarse),
|
||||||
|
_FineOperator(Fine),
|
||||||
|
_FineMatrix(FineMatrix),
|
||||||
|
_Smoother(Smoother),
|
||||||
|
_Guess(Guess_),
|
||||||
|
_CoarseSolve(CoarseSolve_),
|
||||||
|
level(1) { }
|
||||||
|
|
||||||
|
virtual void operator()(const FineField &in, FineField & out)
|
||||||
|
{
|
||||||
|
CoarseVector Csrc(_CoarseOperator.Grid());
|
||||||
|
CoarseVector Csol(_CoarseOperator.Grid());
|
||||||
|
FineField vec1(in.Grid());
|
||||||
|
FineField vec2(in.Grid());
|
||||||
|
|
||||||
|
double t;
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(in,out);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Update the residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||||
|
|
||||||
|
// Fine to Coarse
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse correction
|
||||||
|
t=-usecond();
|
||||||
|
_CoarseSolve(Csrc,Csol);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Coarse to Fine
|
||||||
|
t=-usecond();
|
||||||
|
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||||
|
add(out,out,vec1);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
// Residual
|
||||||
|
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||||
|
|
||||||
|
// Fine Smoother
|
||||||
|
t=-usecond();
|
||||||
|
_Smoother(vec1,vec2);
|
||||||
|
t+=usecond();
|
||||||
|
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||||
|
|
||||||
|
add( out,out,vec2);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=24;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
// Construct a coarsened grid; utility for this?
|
||||||
|
///////////////////////////////////////////////////
|
||||||
|
std::vector<int> block ({2,2,2,2});
|
||||||
|
std::vector<int> blockc ({2,2,2,2});
|
||||||
|
const int nbasis= 40;
|
||||||
|
const int nbasisc= 40;
|
||||||
|
auto clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/block[d];
|
||||||
|
}
|
||||||
|
auto cclatt = clatt;
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
cclatt[d] = clatt[d]/blockc[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
// GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
|
// GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
|
||||||
|
LatticeFermion result(FGrid);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
// std::string file("./ckpoint_lat.4000");
|
||||||
|
std::string file("./ckpoint_lat.1000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
RealD mass=0.00078;
|
||||||
|
RealD M5=1.8;
|
||||||
|
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
|
||||||
|
typedef CoarseOperator::CoarseVector CoarseVector;
|
||||||
|
typedef CoarseOperator::siteVector siteVector;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
|
||||||
|
|
||||||
|
Subspace Aggregates(Coarse5d,FGrid,0);
|
||||||
|
|
||||||
|
assert ( (nbasis & 0x1)==0);
|
||||||
|
{
|
||||||
|
int nb=nbasis/2;
|
||||||
|
LatticeFermion A(FGrid);
|
||||||
|
LatticeFermion B(FGrid);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.002,1000,800,100,0.0);
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,1000,800,100,0.0);
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.01,1000,100,100,0.0); // Slightly faster
|
||||||
|
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
std::cout << GridLogMessage << " G5R5 "<<n<<std::endl;
|
||||||
|
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
|
||||||
|
std::cout << GridLogMessage << " Projection "<<n<<std::endl;
|
||||||
|
A = Aggregates.subspace[n];
|
||||||
|
B = Aggregates.subspace[n+nb];
|
||||||
|
std::cout << GridLogMessage << " Copy "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
|
||||||
|
std::cout << GridLogMessage << " P+ "<<n<<std::endl;
|
||||||
|
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
|
||||||
|
std::cout << GridLogMessage << " P- "<<n<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
|
||||||
|
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
|
||||||
|
|
||||||
|
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
|
||||||
|
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
std::cout << " Making 5D coarse RB grid " <<std::endl;
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
std::cout << " Made 5D coarse RB grid " <<std::endl;
|
||||||
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// Deflate the course space. Recursive multigrid?
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
|
||||||
|
// CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
|
||||||
|
/*
|
||||||
|
{
|
||||||
|
int nb=nbasisc/2;
|
||||||
|
CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,15.0,0.02,1000,800,100,0.0);
|
||||||
|
for(int n=0;n<nb;n++){
|
||||||
|
autoView( subspace , CoarseAggregates.subspace[n],CpuWrite);
|
||||||
|
autoView( subspace_g5, CoarseAggregates.subspace[n+nb],CpuWrite);
|
||||||
|
for(int nn=0;nn<nb;nn++){
|
||||||
|
for(int site=0;site<Coarse5d->oSites();site++){
|
||||||
|
subspace_g5[site](nn) = subspace[site](nn);
|
||||||
|
subspace_g5[site](nn+nb)=-subspace[site](nn+nb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
typedef Level2Op::CoarseVector CoarseCoarseVector;
|
||||||
|
/*
|
||||||
|
Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
|
||||||
|
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
|
||||||
|
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
MdagMLinearOperator<Level2Op,CoarseCoarseVector> IRLHermOpL2(L2Op);
|
||||||
|
CoarseCoarseVector cc_src(CoarseCoarse5d); cc_src=1.0;
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Chebyshev<CoarseCoarseVector> IRLChebyL2(0.001,15.0,301);
|
||||||
|
FunctionHermOp<CoarseCoarseVector> IRLOpChebyL2(IRLChebyL2,IRLHermOpL2);
|
||||||
|
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
|
||||||
|
int cNk=24;
|
||||||
|
int cNm=36;
|
||||||
|
int cNstop=24;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
|
||||||
|
|
||||||
|
int cNconv;
|
||||||
|
std::vector<RealD> eval2(cNm);
|
||||||
|
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
|
||||||
|
IRLL2.calc(eval2,evec2,cc_src,cNconv);
|
||||||
|
|
||||||
|
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.1,1000);
|
||||||
|
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
|
||||||
|
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(L2Op,CoarseCoarseCG,DeflCoarseCoarseGuesser);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Running Coarse grid Lanczos "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
|
MdagMLinearOperator<Level1Op,CoarseVector> IRLHermOp(LDOp);
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.001,15.0,301);
|
||||||
|
Chebyshev<CoarseVector> IRLCheby(0.03,12.0,101);
|
||||||
|
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,IRLHermOp);
|
||||||
|
PlainHermOp<CoarseVector> IRLOp (IRLHermOp);
|
||||||
|
int Nk=64;
|
||||||
|
int Nm=128;
|
||||||
|
int Nstop=Nk;
|
||||||
|
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
std::vector<RealD> eval(Nm);
|
||||||
|
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||||
|
IRL.calc(eval,evec,c_src,Nconv);
|
||||||
|
*/
|
||||||
|
CoarseVector c_src(Coarse5d); c_src=1.0;
|
||||||
|
// DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
|
// NormalEquations<CoarseVector> DeflCoarseCGNE(LDOp,CoarseCG,DeflCoarseGuesser);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
// typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
|
||||||
|
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
|
||||||
|
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
|
||||||
|
|
||||||
|
ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.25,60.0,12,HermIndefOp,Ddwf);
|
||||||
|
/*
|
||||||
|
// MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
|
||||||
|
ChebyshevSmoother<CoarseVector, Level1Op > CoarseSmoother(0.1,15.0,3,L1LinOp,LDOp);
|
||||||
|
|
||||||
|
// MirsSmoother<CoarseVector, Level1Op > CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp);
|
||||||
|
// MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf);
|
||||||
|
|
||||||
|
CoarseMG Level2Precon (CoarseAggregates, L2Op,
|
||||||
|
L1LinOp,LDOp,
|
||||||
|
CoarseSmoother,
|
||||||
|
DeflCoarseCoarseGuesser,
|
||||||
|
DeflCoarseCoarseCGNE);
|
||||||
|
Level2Precon.Level(2);
|
||||||
|
|
||||||
|
// PGCR Applying this solver to solve the coarse space problem
|
||||||
|
PrecGeneralisedConjugateResidual<CoarseVector> l2PGCR(0.1, 100, L1LinOp,Level2Precon,16,16);
|
||||||
|
l2PGCR.Level(2);
|
||||||
|
|
||||||
|
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ThreeLevelMG ThreeLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
l2PGCR);
|
||||||
|
ThreeLevelPrecon.Level(1);
|
||||||
|
|
||||||
|
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,ThreeLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
*/
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling 2 level Multigrid "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
ConjugateGradient<CoarseVector> CoarseCG(0.01,1000);
|
||||||
|
NormalEquations<CoarseVector> CoarseCGNE(LDOp,CoarseCG,CoarseZeroGuesser);
|
||||||
|
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
|
||||||
|
HermIndefOp,Ddwf,
|
||||||
|
FineSmoother,
|
||||||
|
CoarseZeroGuesser,
|
||||||
|
CoarseCGNE);
|
||||||
|
TwoLevelPrecon.Level(1);
|
||||||
|
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,20,HermIndefOp,TwoLevelPrecon,16,16);
|
||||||
|
l1PGCR.Level(1);
|
||||||
|
l1PGCR(src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
ConjugateGradient<LatticeFermion> pCG(1.0e-8,60000);
|
||||||
|
result=Zero();
|
||||||
|
// pCG(HermDefOp,src,result);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Calling red black CG "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
result=Zero();
|
||||||
|
|
||||||
|
LatticeFermion src_o(FrbGrid);
|
||||||
|
LatticeFermion result_o(FrbGrid);
|
||||||
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
result_o=Zero();
|
||||||
|
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||||
|
pCG(HermOpEO,src_o,result_o);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
|
||||||
|
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
|
||||||
|
// std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
|
||||||
|
// PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Done "<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
@ -370,6 +370,11 @@ int main (int argc, char ** argv)
|
|||||||
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
GridRedBlackCartesian *CoarseCoarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseCoarse4d);
|
||||||
|
GridRedBlackCartesian *CoarseCoarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,CoarseCoarse4d);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
std::vector<int> cseeds({5,6,7,8});
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
@ -434,8 +439,8 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
|
||||||
Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
Level1Op LDOp(*Coarse5d,*Coarse5dRB,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
|
||||||
Level1Op LDOpPV(*Coarse5d,1); LDOpPV.CoarsenOperator(FGrid,HermIndefOpPV,Aggregates);
|
Level1Op LDOpPV(*Coarse5d,*Coarse5dRB,1); LDOpPV.CoarsenOperator(FGrid,HermIndefOpPV,Aggregates);
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
|
||||||
|
@ -274,6 +274,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(Ls,Coarse4d);
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(Ls,Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse4dRB = SpaceTimeGrid::makeFourDimRedBlackGrid(Coarse4d);
|
||||||
|
GridRedBlackCartesian * Coarse5dRB = SpaceTimeGrid::makeFiveDimRedBlackGrid(1,Coarse4d);
|
||||||
|
|
||||||
std::vector<int> seeds({1,2,3,4});
|
std::vector<int> seeds({1,2,3,4});
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds);
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds);
|
||||||
@ -335,7 +337,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
NonHermitianLinearOperator<DomainWallFermionR,LatticeFermion> LinOpDwf(Ddwf);
|
NonHermitianLinearOperator<DomainWallFermionR,LatticeFermion> LinOpDwf(Ddwf);
|
||||||
|
|
||||||
Level1Op LDOp (*Coarse5d,0);
|
Level1Op LDOp (*Coarse5d,*Coarse5dRB,0);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << " Callinig Coarsen the operator " <<std::endl;
|
std::cout<<GridLogMessage << " Callinig Coarsen the operator " <<std::endl;
|
||||||
LDOp.CoarsenOperator(FGrid,LinOpDwf,Aggregates5D);
|
LDOp.CoarsenOperator(FGrid,LinOpDwf,Aggregates5D);
|
||||||
|
1287
tests/solver/Test_hw_multigrid_mixed_48.cc
Normal file
1287
tests/solver/Test_hw_multigrid_mixed_48.cc
Normal file
File diff suppressed because it is too large
Load Diff
1326
tests/solver/Test_hw_multigrid_mixed_48_rb.cc
Normal file
1326
tests/solver/Test_hw_multigrid_mixed_48_rb.cc
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user