1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-20 00:36:55 +01:00

Merge branch 'develop' into feature/hirep

This commit is contained in:
Guido Cossu
2016-09-01 12:59:53 +01:00
75 changed files with 16078 additions and 2795 deletions

276
lib/FFT.h Normal file
View File

@ -0,0 +1,276 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Cshift.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _GRID_FFT_H_
#define _GRID_FFT_H_
#ifdef HAVE_FFTW
#include <fftw3.h>
#endif
namespace Grid {
template<class scalar> struct FFTW { };
#ifdef HAVE_FFTW
template<> struct FFTW<ComplexD> {
public:
typedef fftw_complex FFTW_scalar;
typedef fftw_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftw_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftw_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftw_destroy_plan(p);
}
};
template<> struct FFTW<ComplexF> {
public:
typedef fftwf_complex FFTW_scalar;
typedef fftwf_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftwf_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftwf_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftwf_destroy_plan(p);
}
};
#endif
#ifndef FFTW_FORWARD
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#endif
class FFT {
private:
GridCartesian *vgrid;
GridCartesian *sgrid;
int Nd;
double flops;
double flops_call;
uint64_t usec;
std::vector<int> dimensions;
std::vector<int> processors;
std::vector<int> processor_coor;
public:
static const int forward=FFTW_FORWARD;
static const int backward=FFTW_BACKWARD;
double Flops(void) {return flops;}
double MFlops(void) {return flops/usec;}
FFT ( GridCartesian * grid ) :
vgrid(grid),
Nd(grid->_ndimension),
dimensions(grid->_fdimensions),
processors(grid->_processors),
processor_coor(grid->_processor_coor)
{
flops=0;
usec =0;
std::vector<int> layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors);
};
~FFT ( void) {
delete sgrid;
}
template<class vobj>
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
conformable(result._grid,vgrid);
conformable(source._grid,vgrid);
int L = vgrid->_ldimensions[dim];
int G = vgrid->_fdimensions[dim];
std::vector<int> layout(Nd,1);
std::vector<int> pencil_gd(vgrid->_fdimensions);
pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors);
// Construct pencils
typedef typename vobj::scalar_object sobj;
typedef typename sobj::scalar_type scalar;
Lattice<vobj> ssource(vgrid); ssource =source;
Lattice<sobj> pgsource(&pencil_g);
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
#ifndef HAVE_FFTW
assert(0);
#else
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
{
int Ncomp = sizeof(sobj)/sizeof(scalar);
int Nlow = 1;
for(int d=0;d<dim;d++){
Nlow*=vgrid->_ldimensions[d];
}
int rank = 1; /* 1d transforms */
int n[] = {G}; /* 1d transforms of length G */
int howmany = Ncomp;
int odist,idist,istride,ostride;
idist = odist = 1; /* Distance between consecutive FT's */
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
int *inembed = n, *onembed = n;
int sign = FFTW_FORWARD;
if (inverse) sign = FFTW_BACKWARD;
FFTW_plan p;
{
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
in,inembed,
istride,idist,
out,onembed,
ostride, odist,
sign,FFTW_ESTIMATE);
}
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s;
peekLocalSite(s,ssource,lcoor);
lcoor[dim]+=p*L;
pokeLocalSite(s,pgsource,lcoor);
}
ssource = Cshift(ssource,dim,L);
}
// Loop over orthog coords
int NN=pencil_g.lSites();
GridStopWatch Timer;
Timer.Start();
PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
}
Timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor;
// extract the result
sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgresult,gcoor);
pokeLocalSite(s,result,lcoor);
}
FFTW<scalar>::fftw_destroy_plan(p);
}
#endif
}
};
}
#endif

View File

@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/Simd.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/Init.h>
#include <Grid/FFT.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>

View File

@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]);
}
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
std::vector<int> cores(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
GridLogConfigure(logstreams);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,3 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/include/
extra_sources=
if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc
@ -20,16 +17,8 @@ endif
include Make.inc
include Eigen.inc
lib_LIBRARIES = libGrid.a
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
fftwdir = $(prefix)/lib/
fftw_DATA = libfftw3.a
#
# Include files
#
otherincludedir = $(includedir)/Grid
nobase_otherinclude_HEADERS =$(HFILES) $(EFILES) fftw3.h Config.h
lib_LTLIBRARIES = libGrid.la
libGrid_la_SOURCES = $(CCFILES) $(extra_sources)
libGrid_ladir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

View File

@ -18,10 +18,10 @@
#include <stddef.h>
#include <Config.h>
#ifdef HAVE_GMP_H
#ifdef HAVE_LIBGMP
#include "bigfloat.h"
#else
#include "algorithms/approx/bigfloat_double.h"
#include "bigfloat_double.h"
#endif
#define JMAX 10000 //Maximum number of iterations of Newton's approximation

View File

@ -127,21 +127,12 @@ class CartesianCommunicator {
int recv_from_rank,
int bytes);
void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
////////////////////////////////////////////////////////////

View File

@ -144,28 +144,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
}
// Basic Halo comms primitive
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
MPI_Startall(list.size(),&list[0]);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
@ -173,12 +151,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromInit(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromBegin(reqs);
for(int i=0;i<reqs.size();i++){
list.push_back(reqs[i]);
}
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{

View File

@ -84,19 +84,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);

View File

@ -268,10 +268,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0); //unimplemented
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
@ -284,15 +280,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest);
}
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0); // Unimplemented
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
// shmem_quiet(); // I'm done

View File

@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
}
PARALLEL_FOR_LOOP
//PARALLEL_FOR_LOOP
for(int idx=0;idx<ig->lSites();idx++){
std::vector<int> lcoor(ni);
ig->LocalIndexToLocalCoor(idx,lcoor);
@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
}
template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,lowDim,lcoor);
pokeLocalSite(s,higherDim,hcoor);
}
}
}
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,higherDim,hcoor);
pokeLocalSite(s,lowDim,lcoor);
}
}
}
template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{

View File

@ -111,6 +111,8 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \
template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>;
@ -120,7 +122,9 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>;
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \
@ -143,6 +147,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -185,6 +190,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;

View File

@ -54,18 +54,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass;
std::vector<Coeff_t> diag (Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag = bs;
std::vector<RealD> upper= cs;
std::vector<RealD> lower= cs;
std::vector<Coeff_t> diag = bs;
std::vector<Coeff_t> upper= cs;
std::vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper);
@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = beo;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = beo;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
@ -88,9 +88,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-cee[i];
lower[i]=-cee[i];
@ -104,9 +104,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag = bee;
std::vector<RealD> upper(Ls);
std::vector<RealD> lower(Ls);
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
@ -129,9 +129,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0);
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
@ -141,9 +141,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{
int Ls=this->Ls;
std::vector<RealD> diag =bs;
std::vector<RealD> upper=cs;
std::vector<RealD> lower=cs;
std::vector<Coeff_t> diag =bs;
std::vector<Coeff_t> upper=cs;
std::vector<Coeff_t> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper);
@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
{
int Ls=this->Ls;
@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
}
{
double delta_d=mass*cee[Ls-1];
Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}

View File

@ -62,16 +62,16 @@ namespace Grid {
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper);
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper);
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
virtual void Instantiatable(void)=0;
@ -91,23 +91,23 @@ namespace Grid {
RealD mass;
// Cayley form Moebius (tanh and zolotarev)
std::vector<RealD> omega;
std::vector<RealD> bs; // S dependent coeffs
std::vector<RealD> cs;
std::vector<RealD> as;
std::vector<Coeff_t> omega;
std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<Coeff_t> cs;
std::vector<Coeff_t> as;
// For preconditioning Cayley form
std::vector<RealD> bee;
std::vector<RealD> cee;
std::vector<RealD> aee;
std::vector<RealD> beo;
std::vector<RealD> ceo;
std::vector<RealD> aeo;
std::vector<Coeff_t> bee;
std::vector<Coeff_t> cee;
std::vector<Coeff_t> aee;
std::vector<Coeff_t> beo;
std::vector<Coeff_t> ceo;
std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix
std::vector<RealD> lee;
std::vector<RealD> leem;
std::vector<RealD> uee;
std::vector<RealD> ueem;
std::vector<RealD> dee;
std::vector<Coeff_t> lee;
std::vector<Coeff_t> leem;
std::vector<Coeff_t> uee;
std::vector<Coeff_t> ueem;
std::vector<Coeff_t> dee;
// Constructors
CayleyFermion5D(GaugeField &_Umu,
@ -117,20 +117,19 @@ namespace Grid {
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
};
}
}
#define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
@ -82,9 +82,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
#endif
}}

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
@ -65,9 +65,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){

View File

@ -53,9 +53,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
@ -121,9 +121,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<RealD> &lower,
std::vector<RealD> &diag,
std::vector<RealD> &upper)
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
chi.checkerboard=psi.checkerboard;
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -103,7 +103,7 @@ namespace Grid {
typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base)
@ -122,9 +122,9 @@ namespace Grid {
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
@ -211,10 +211,9 @@ namespace Grid {
static const int Dimension = Nrepresentation;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
@ -312,7 +311,7 @@ namespace Grid {
static const int Dimension = Nrepresentation;
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
@ -515,6 +514,7 @@ namespace Grid {
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double

View File

@ -38,90 +38,6 @@ int WilsonKernelsStatic::AsmOpt;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
/*
template <class Impl>
typename std::enable_if<Impl::Dimension == 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, in,
out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, in,
out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
}
template <class Impl>
typename std::enable_if<Impl::Dimension != 3>::type WilsonKernels<Impl>::DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
out);
sF++;
}
sU++;
}
}
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out,
typename std::enable_if<Impl::Dimension == 3, int>::type = 0)
{
// No asm implementation yet.
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
// else
for(int site=0;site<Ns;site++) {
for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
template <class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sU, int Ls, int Ns, const FermionField &in, FermionField &out,
typename std::enable_if<Impl::Dimension != 3, int>::type = 0) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
*/
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////

View File

@ -33,26 +33,27 @@ directory
namespace Grid {
namespace QCD {
namespace QCD {
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static int AsmOpt; // these are a temporary hack
static int HandOpt; // these are a temporary hack
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Helper routines that implement Wilson stencil for a single site.
// Common to both the WilsonFermion and WilsonFermion5D
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class WilsonKernelsStatic {
public:
// S-direction is INNERMOST and takes no part in the parity.
static int AsmOpt; // these are a temporary hack
static int HandOpt; // these are a temporary hack
};
template <class Impl>
class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
public:
public:
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DiracOptDhopSite(
@ -102,35 +103,45 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc== 3 && EnableBool, void>::type
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
// No asm implementation yet.
// if ( AsmOpt )
// WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
// else
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
Ns, in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
}
sU++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
typename std::enable_if<
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
@ -140,7 +151,7 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
sU++;
}
}
void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
@ -165,6 +176,12 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
@ -177,7 +194,9 @@ class WilsonKernels : public FermionOperator<Impl>, public WilsonKernelsStatic {
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
}
};
}
}
#endif

View File

@ -73,12 +73,21 @@ static int signInit = setupSigns();
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A
#undef KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP
#undef VMOVRDUP
#undef MAYBEPERM
@ -89,14 +98,25 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);

View File

@ -30,7 +30,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_PROJMEM(base);
#else
XM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm);
} else {
LOAD_CHI(base);
@ -41,15 +45,22 @@
MULT_2SPIN_DIR_PFXP(Xp,basep);
}
LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_RECON;
#else
XM_RECON;
#endif
////////////////////////////////
// Yp
////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_PROJMEM(base);
#else
YM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm);
} else {
LOAD_CHI(base);
@ -60,7 +71,11 @@
MULT_2SPIN_DIR_PFYP(Yp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_RECON_ACCUM;
#else
YM_RECON_ACCUM;
#endif
////////////////////////////////
// Zp
@ -68,7 +83,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_PROJMEM(base);
#else
ZM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm);
} else {
LOAD_CHI(base);
@ -79,7 +98,11 @@
MULT_2SPIN_DIR_PFZP(Zp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_RECON_ACCUM;
#else
ZM_RECON_ACCUM;
#endif
////////////////////////////////
// Tp
@ -87,7 +110,11 @@
basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_PROJMEM(base);
#else
TM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm);
} else {
LOAD_CHI(base);
@ -98,7 +125,11 @@
MULT_2SPIN_DIR_PFTP(Tp,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_RECON_ACCUM;
#else
TM_RECON_ACCUM;
#endif
////////////////////////////////
// Xm
@ -107,7 +138,11 @@
// basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_PROJMEM(base);
#else
XP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm);
} else {
LOAD_CHI(base);
@ -118,7 +153,11 @@
MULT_2SPIN_DIR_PFXM(Xm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_RECON_ACCUM;
#else
XP_RECON_ACCUM;
#endif
////////////////////////////////
// Ym
@ -126,7 +165,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_PROJMEM(base);
#else
YP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm);
} else {
LOAD_CHI(base);
@ -137,7 +180,11 @@
MULT_2SPIN_DIR_PFYM(Ym,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_RECON_ACCUM;
#else
YP_RECON_ACCUM;
#endif
////////////////////////////////
// Zm
@ -145,7 +192,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_PROJMEM(base);
#else
ZP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm);
} else {
LOAD_CHI(base);
@ -156,7 +207,11 @@
MULT_2SPIN_DIR_PFZM(Zm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_RECON_ACCUM;
#else
ZP_RECON_ACCUM;
#endif
////////////////////////////////
// Tm
@ -164,7 +219,11 @@
basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_PROJMEM(base);
#else
TP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm);
} else {
LOAD_CHI(base);
@ -175,7 +234,11 @@
MULT_2SPIN_DIR_PFTM(Tm,basep);
}
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_RECON_ACCUM;
#else
TP_RECON_ACCUM;
#endif
basep= st.GetPFInfo(nent,plocal); nent++;
SAVE_RESULT(base,basep);

View File

@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though //
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
}}

View File

@ -0,0 +1,79 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
}
}
#endif

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_empty.h
Source file: ./lib/simd/Grid_generic.h
Copyright (C) 2015
@ -26,14 +26,6 @@ Author: neo <cossu@post.kek.jp>
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
//----------------------------------------------------------------------
/*! @file Grid_sse4.h
@brief Empty Optimization libraries for debugging
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:28:02 neo>
//----------------------------------------------------------------------
namespace Grid {
namespace Optimization {

View File

@ -38,8 +38,8 @@ directory
#ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES
#ifdef EMPTY_SIMD
#include "Grid_empty.h"
#ifdef GENERIC_VEC
#include "Grid_generic.h"
#endif
#ifdef SSE4
#include "Grid_sse4.h"
@ -388,6 +388,12 @@ class Grid_simd {
}; // end of Grid_simd class definition
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
inline void permute(RealD &y,RealD b, int perm) { y=b; }
inline void permute(RealF &y,RealF b, int perm) { y=b; }
////////////////////////////////////////////////////////////////////
// General rotate
////////////////////////////////////////////////////////////////////

View File

@ -67,15 +67,13 @@ template <class scalar>
struct AsinRealFunctor {
scalar operator()(const scalar &a) const { return asin(real(a)); }
};
template <class scalar>
struct LogRealFunctor {
scalar operator()(const scalar &a) const { return log(real(a)); }
};
template <class scalar>
struct ExpRealFunctor {
scalar operator()(const scalar &a) const { return exp(real(a)); }
struct ExpFunctor {
scalar operator()(const scalar &a) const { return exp(a); }
};
template <class scalar>
struct NotFunctor {
@ -85,7 +83,6 @@ template <class scalar>
struct AbsRealFunctor {
scalar operator()(const scalar &a) const { return std::abs(real(a)); }
};
template <class scalar>
struct PowRealFunctor {
double y;
@ -135,7 +132,6 @@ template <class Scalar>
inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
return SimdApply(CosRealFunctor<S>(), r);
@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
}
template <class S, class V>
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
return SimdApply(ExpRealFunctor<S>(), r);
return SimdApply(ExpFunctor<S>(), r);
}
template <class S, class V>
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {