1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'develop' of github.com:paboyle/Grid into feature/baryonSpeedup

This commit is contained in:
ferben 2020-05-07 11:13:21 +01:00
commit 591ebb6213
192 changed files with 2866 additions and 6632 deletions

View File

@ -22,8 +22,18 @@
#undef __CUDACC__ #undef __CUDACC__
#undef __CUDA_ARCH__ #undef __CUDA_ARCH__
#define __NVCC__REDEFINE__ #define __NVCC__REDEFINE__
#endif
/* SYCL save and restore compile environment*/
#ifdef __SYCL_DEVICE_ONLY__
#pragma push
#pragma push_macro("__SYCL_DEVICE_ONLY__")
#undef __SYCL_DEVICE_ONLY__
#undef EIGEN_USE_SYCL
#define EIGEN_DONT_VECTORIZE
#endif #endif
#include <Grid/Eigen/Dense> #include <Grid/Eigen/Dense>
#include <Grid/Eigen/unsupported/CXX11/Tensor> #include <Grid/Eigen/unsupported/CXX11/Tensor>
@ -35,7 +45,14 @@
#pragma pop #pragma pop
#endif #endif
/*SYCL restore*/
#ifdef __SYCL__REDEFINE__
#pragma pop_macro("__SYCL_DEVICE_ONLY__")
#pragma pop
#endif
#if defined __GNUC__ #if defined __GNUC__
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif

View File

@ -37,211 +37,6 @@ Author: Christoph Lehner <clehner@bnl.gov>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////
// Move following 100 LOC to lattice/Lattice_basis.h
////////////////////////////////////////////////////////
template<class Field>
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
// If assume basis[j] are already orthonormal,
// can take all inner products in parallel saving 2x bandwidth
// Save 3x bandwidth on the second line of loop.
// perhaps 2.5x speed up.
// 2x overall in Multigrid Lanczos
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
}
}
template<class Field>
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
{
typedef decltype(basis[0].View()) View;
auto tmp_v = basis[0].View();
Vector<View> basis_v(basis.size(),tmp_v);
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0].Grid();
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].View();
}
#if 0
std::vector < vobj , commAllocator<vobj> > Bt(thread_max() * Nm); // Thread private
thread_region
{
vobj* B = Bt.data() + Nm * thread_num();
thread_for_in_region(ss, grid->oSites(),{
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt(j,k) * basis_v[k][ss];
}
}
for(int j=j0; j<j1; ++j){
basis_v[j][ss] = B[j];
}
});
}
#else
int nrot = j1-j0;
uint64_t oSites =grid->oSites();
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
// printf("BasisRotate %d %d nrot %d siteBlock %d\n",j0,j1,nrot,siteBlock);
Vector <vobj> Bt(siteBlock * nrot);
auto Bp=&Bt[0];
// GPU readable copy of Eigen matrix
Vector<double> Qt_jv(Nm*Nm);
double *Qt_p = & Qt_jv[0];
for(int k=0;k<Nm;++k){
for(int j=0;j<Nm;++j){
Qt_p[j*Nm+k]=Qt(j,k);
}
}
// Block the loop to keep storage footprint down
vobj zz=Zero();
for(uint64_t s=0;s<oSites;s+=siteBlock){
// remaining work in this block
int ssites=MIN(siteBlock,oSites-s);
// zero out the accumulators
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
auto z=coalescedRead(zz);
coalescedWrite(Bp[ss],z);
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
for(int k=k0; k<k1; ++k){
auto tmp = coalescedRead(Bp[ss*nrot+j]);
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
}
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
});
}
#endif
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef decltype(basis[0].View()) View;
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0].Grid();
result.Checkerboard() = basis[0].Checkerboard();
auto result_v=result.View();
Vector<View> basis_v(basis.size(),result_v);
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].View();
}
vobj zz=Zero();
Vector<double> Qt_jv(Nm);
double * Qt_j = & Qt_jv[0];
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
auto B=coalescedRead(zz);
for(int k=k0; k<k1; ++k){
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
}
coalescedWrite(result_v[ss], B);
});
}
template<class Field>
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
{
int vlen = idx.size();
assert(vlen>=1);
assert(vlen<=sort_vals.size());
assert(vlen<=_v.size());
for (size_t i=0;i<vlen;i++) {
if (idx[i] != i) {
//////////////////////////////////////
// idx[i] is a table of desired sources giving a permutation.
// Swap v[i] with v[idx[i]].
// Find j>i for which _vnew[j] = _vold[i],
// track the move idx[j] => idx[i]
// track the move idx[i] => i
//////////////////////////////////////
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
std::swap(sort_vals[i],sort_vals[idx[i]]);
idx[j] = idx[i];
idx[i] = i;
}
}
}
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
{
std::vector<int> idx(sort_vals.size());
std::iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
});
return idx;
}
template<class Field>
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
{
std::vector<int> idx = basisSortGetIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
basisReorderInPlace(_v,sort_vals,idx);
}
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = Zero();
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Implicitly restarted lanczos // Implicitly restarted lanczos
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////

View File

@ -114,6 +114,7 @@ public:
void GlobalSumVector(RealD *,int N); void GlobalSumVector(RealD *,int N);
void GlobalSum(uint32_t &); void GlobalSum(uint32_t &);
void GlobalSum(uint64_t &); void GlobalSum(uint64_t &);
void GlobalSumVector(uint64_t*,int N);
void GlobalSum(ComplexF &c); void GlobalSum(ComplexF &c);
void GlobalSumVector(ComplexF *c,int N); void GlobalSumVector(ComplexF *c,int N);
void GlobalSum(ComplexD &c); void GlobalSum(ComplexD &c);

View File

@ -255,6 +255,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){
int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint32_t &u){ void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -70,9 +70,10 @@ CartesianCommunicator::~CartesianCommunicator(){}
void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSum(float &){}
void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSumVector(float *,int N){}
void CartesianCommunicator::GlobalSum(double &){} void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSum(uint64_t &){} void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){} void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){} void CartesianCommunicator::GlobalXOR(uint64_t &){}

View File

@ -31,11 +31,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_ET.h> #include <Grid/lattice/Lattice_ET.h>
#include <Grid/lattice/Lattice_arith.h> #include <Grid/lattice/Lattice_arith.h>
#include <Grid/lattice/Lattice_trace.h> #include <Grid/lattice/Lattice_trace.h>
#include <Grid/lattice/Lattice_transpose.h> //#include <Grid/lattice/Lattice_transpose.h>
#include <Grid/lattice/Lattice_local.h> #include <Grid/lattice/Lattice_local.h>
#include <Grid/lattice/Lattice_reduction.h> #include <Grid/lattice/Lattice_reduction.h>
#include <Grid/lattice/Lattice_peekpoke.h> #include <Grid/lattice/Lattice_peekpoke.h>
#include <Grid/lattice/Lattice_reality.h> //#include <Grid/lattice/Lattice_reality.h>
#include <Grid/lattice/Lattice_comparison_utils.h> #include <Grid/lattice/Lattice_comparison_utils.h>
#include <Grid/lattice/Lattice_comparison.h> #include <Grid/lattice/Lattice_comparison.h>
#include <Grid/lattice/Lattice_coordinate.h> #include <Grid/lattice/Lattice_coordinate.h>
@ -43,4 +43,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/lattice/Lattice_rng.h> #include <Grid/lattice/Lattice_rng.h>
#include <Grid/lattice/Lattice_unary.h> #include <Grid/lattice/Lattice_unary.h>
#include <Grid/lattice/Lattice_transfer.h> #include <Grid/lattice/Lattice_transfer.h>
#include <Grid/lattice/Lattice_basis.h>

View File

@ -9,6 +9,7 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: Christoph Lehner <christoph@lhnr.de
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -94,7 +95,7 @@ const lobj & eval(const uint64_t ss, const LatticeView<lobj> &arg)
template <class lobj> accelerator_inline template <class lobj> accelerator_inline
const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg) const lobj & eval(const uint64_t ss, const Lattice<lobj> &arg)
{ {
auto view = arg.View(); auto view = arg.AcceleratorView(ViewRead);
return view[ss]; return view[ss];
} }

View File

@ -7,6 +7,7 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -36,9 +37,9 @@ NAMESPACE_BEGIN(Grid);
template<class obj1,class obj2,class obj3> inline template<class obj1,class obj2,class obj3> inline
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
auto rhs_v = rhs.View(); auto rhs_v = rhs.AcceleratorView(ViewRead);
conformable(ret,rhs); conformable(ret,rhs);
conformable(lhs,rhs); conformable(lhs,rhs);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
@ -55,9 +56,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
conformable(lhs,rhs); conformable(lhs,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
auto rhs_v = rhs.View(); auto rhs_v = rhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -72,9 +73,9 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
conformable(lhs,rhs); conformable(lhs,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
auto rhs_v = rhs.View(); auto rhs_v = rhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -88,9 +89,9 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
conformable(lhs,rhs); conformable(lhs,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
auto rhs_v = rhs.View(); auto rhs_v = rhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -107,8 +108,8 @@ template<class obj1,class obj2,class obj3> inline
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(lhs,ret); conformable(lhs,ret);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
mult(&tmp,&lhs_v(ss),&rhs); mult(&tmp,&lhs_v(ss),&rhs);
@ -120,8 +121,8 @@ template<class obj1,class obj2,class obj3> inline
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(ret,lhs); conformable(ret,lhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -134,8 +135,8 @@ template<class obj1,class obj2,class obj3> inline
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(ret,lhs); conformable(ret,lhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -147,8 +148,8 @@ template<class obj1,class obj2,class obj3> inline
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
ret.Checkerboard() = lhs.Checkerboard(); ret.Checkerboard() = lhs.Checkerboard();
conformable(lhs,ret); conformable(lhs,ret);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto lhs_v = lhs.View(); auto lhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
@ -164,8 +165,8 @@ template<class obj1,class obj2,class obj3> inline
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){ void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = rhs.Checkerboard(); ret.Checkerboard() = rhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto rhs_v = lhs.View(); auto rhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
@ -178,8 +179,8 @@ template<class obj1,class obj2,class obj3> inline
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){ void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = rhs.Checkerboard(); ret.Checkerboard() = rhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto rhs_v = lhs.View(); auto rhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
@ -192,8 +193,8 @@ template<class obj1,class obj2,class obj3> inline
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){ void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = rhs.Checkerboard(); ret.Checkerboard() = rhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto rhs_v = lhs.View(); auto rhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
@ -205,8 +206,8 @@ template<class obj1,class obj2,class obj3> inline
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){ void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
ret.Checkerboard() = rhs.Checkerboard(); ret.Checkerboard() = rhs.Checkerboard();
conformable(ret,rhs); conformable(ret,rhs);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto rhs_v = lhs.View(); auto rhs_v = lhs.AcceleratorView(ViewRead);
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; decltype(coalescedRead(obj1())) tmp;
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
@ -220,9 +221,9 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
ret.Checkerboard() = x.Checkerboard(); ret.Checkerboard() = x.Checkerboard();
conformable(ret,x); conformable(ret,x);
conformable(x,y); conformable(x,y);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto x_v = x.View(); auto x_v = x.AcceleratorView(ViewRead);
auto y_v = y.View(); auto y_v = y.AcceleratorView(ViewRead);
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
auto tmp = a*x_v(ss)+y_v(ss); auto tmp = a*x_v(ss)+y_v(ss);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);
@ -233,9 +234,9 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
ret.Checkerboard() = x.Checkerboard(); ret.Checkerboard() = x.Checkerboard();
conformable(ret,x); conformable(ret,x);
conformable(x,y); conformable(x,y);
auto ret_v = ret.View(); auto ret_v = ret.AcceleratorView(ViewWrite);
auto x_v = x.View(); auto x_v = x.AcceleratorView(ViewRead);
auto y_v = y.View(); auto y_v = y.AcceleratorView(ViewRead);
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
auto tmp = a*x_v(ss)+b*y_v(ss); auto tmp = a*x_v(ss)+b*y_v(ss);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);

View File

@ -9,6 +9,7 @@ Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -49,6 +50,26 @@ void accelerator_inline conformable(GridBase *lhs,GridBase *rhs)
assert(lhs == rhs); assert(lhs == rhs);
} }
////////////////////////////////////////////////////////////////////////////
// Advise the LatticeAccelerator class
////////////////////////////////////////////////////////////////////////////
enum LatticeAcceleratorAdvise {
AdviseInfrequentUse = 0x1, // Advise that the data is used infrequently. This can
// significantly influence performance of bulk storage.
AdviseReadMostly = 0x2, // Data will mostly be read. On some architectures
// enables read-only copies of memory to be kept on
// host and device.
};
////////////////////////////////////////////////////////////////////////////
// View Access Mode
////////////////////////////////////////////////////////////////////////////
enum ViewMode {
ViewRead = 0x1,
ViewWrite = 0x2,
ViewReadWrite = 0x3
};
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Minimal base class containing only data valid to access from accelerator // Minimal base class containing only data valid to access from accelerator
// _odata will be a managed pointer in CUDA // _odata will be a managed pointer in CUDA
@ -75,6 +96,37 @@ public:
if (grid) conformable(grid, _grid); if (grid) conformable(grid, _grid);
else grid = _grid; else grid = _grid;
}; };
accelerator_inline void Advise(int advise) {
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
if (advise & AdviseInfrequentUse) {
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
}
if (advise & AdviseReadMostly) {
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
}
#endif
#endif
};
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
int target;
cudaGetDevice(&target);
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
#endif
#endif
};
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
#endif
#endif
};
}; };
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
@ -206,9 +258,23 @@ public:
// The view is trivially copy constructible and may be copied to an accelerator device // The view is trivially copy constructible and may be copied to an accelerator device
// in device lambdas // in device lambdas
///////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////
LatticeView<vobj> View (void) const LatticeView<vobj> View (void) const // deprecated, should pick AcceleratorView for accelerator_for
{ // and HostView for thread_for
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
return accessor;
}
LatticeView<vobj> AcceleratorView(int mode = ViewReadWrite) const
{ {
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this)); LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
accessor.AcceleratorPrefetch(mode);
return accessor;
}
LatticeView<vobj> HostView(int mode = ViewReadWrite) const
{
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this));
accessor.HostPrefetch(mode);
return accessor; return accessor;
} }
@ -232,7 +298,7 @@ public:
assert( (cb==Odd) || (cb==Even)); assert( (cb==Odd) || (cb==Even));
this->checkerboard=cb; this->checkerboard=cb;
auto me = View(); auto me = AcceleratorView(ViewWrite);
accelerator_for(ss,me.size(),1,{ accelerator_for(ss,me.size(),1,{
auto tmp = eval(ss,expr); auto tmp = eval(ss,expr);
vstream(me[ss],tmp); vstream(me[ss],tmp);
@ -251,7 +317,7 @@ public:
assert( (cb==Odd) || (cb==Even)); assert( (cb==Odd) || (cb==Even));
this->checkerboard=cb; this->checkerboard=cb;
auto me = View(); auto me = AcceleratorView(ViewWrite);
accelerator_for(ss,me.size(),1,{ accelerator_for(ss,me.size(),1,{
auto tmp = eval(ss,expr); auto tmp = eval(ss,expr);
vstream(me[ss],tmp); vstream(me[ss],tmp);
@ -269,7 +335,7 @@ public:
CBFromExpression(cb,expr); CBFromExpression(cb,expr);
assert( (cb==Odd) || (cb==Even)); assert( (cb==Odd) || (cb==Even));
this->checkerboard=cb; this->checkerboard=cb;
auto me = View(); auto me = AcceleratorView(ViewWrite);
accelerator_for(ss,me.size(),1,{ accelerator_for(ss,me.size(),1,{
auto tmp = eval(ss,expr); auto tmp = eval(ss,expr);
vstream(me[ss],tmp); vstream(me[ss],tmp);
@ -357,7 +423,6 @@ public:
// copy constructor // copy constructor
/////////////////////////////////////////// ///////////////////////////////////////////
Lattice(const Lattice& r){ Lattice(const Lattice& r){
// std::cout << "Lattice constructor(const Lattice &) "<<this<<std::endl;
this->_grid = r.Grid(); this->_grid = r.Grid();
resize(this->_grid->oSites()); resize(this->_grid->oSites());
*this = r; *this = r;
@ -380,8 +445,8 @@ public:
typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0; typename std::enable_if<!std::is_same<robj,vobj>::value,int>::type i=0;
conformable(*this,r); conformable(*this,r);
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
auto me = View(); auto me = AcceleratorView(ViewWrite);
auto him= r.View(); auto him= r.AcceleratorView(ViewRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss)); coalescedWrite(me[ss],him(ss));
}); });
@ -394,8 +459,8 @@ public:
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){ inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
this->checkerboard = r.Checkerboard(); this->checkerboard = r.Checkerboard();
conformable(*this,r); conformable(*this,r);
auto me = View(); auto me = AcceleratorView(ViewWrite);
auto him= r.View(); auto him= r.AcceleratorView(ViewRead);
accelerator_for(ss,me.size(),vobj::Nsimd(),{ accelerator_for(ss,me.size(),vobj::Nsimd(),{
coalescedWrite(me[ss],him(ss)); coalescedWrite(me[ss],him(ss));
}); });

View File

@ -0,0 +1,236 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_basis.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
template<class Field>
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{
// If assume basis[j] are already orthonormal,
// can take all inner products in parallel saving 2x bandwidth
// Save 3x bandwidth on the second line of loop.
// perhaps 2.5x speed up.
// 2x overall in Multigrid Lanczos
for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j];
}
}
template<class VField, class Matrix>
void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
{
typedef decltype(basis[0]) Field;
typedef decltype(basis[0].View()) View;
auto tmp_v = basis[0].AcceleratorView(ViewReadWrite);
Vector<View> basis_v(basis.size(),tmp_v);
typedef typename std::remove_reference<decltype(tmp_v[0])>::type vobj;
GridBase* grid = basis[0].Grid();
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].AcceleratorView(ViewReadWrite);
}
#ifndef GRID_NVCC
thread_region
{
std::vector < vobj > B(Nm); // Thread private
thread_for_in_region(ss, grid->oSites(),{
for(int j=j0; j<j1; ++j) B[j]=0.;
for(int j=j0; j<j1; ++j){
for(int k=k0; k<k1; ++k){
B[j] +=Qt(j,k) * basis_v[k][ss];
}
}
for(int j=j0; j<j1; ++j){
basis_v[j][ss] = B[j];
}
});
}
#else
int nrot = j1-j0;
if (!nrot) // edge case not handled gracefully by Cuda
return;
uint64_t oSites =grid->oSites();
uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead
Vector <vobj> Bt(siteBlock * nrot);
auto Bp=&Bt[0];
// GPU readable copy of matrix
Vector<double> Qt_jv(Nm*Nm);
double *Qt_p = & Qt_jv[0];
thread_for(i,Nm*Nm,{
int j = i/Nm;
int k = i%Nm;
Qt_p[i]=Qt(j,k);
});
// Block the loop to keep storage footprint down
for(uint64_t s=0;s<oSites;s+=siteBlock){
// remaining work in this block
int ssites=MIN(siteBlock,oSites-s);
// zero out the accumulators
accelerator_for(ss,siteBlock*nrot,vobj::Nsimd(),{
decltype(coalescedRead(Bp[ss])) z;
z=Zero();
coalescedWrite(Bp[ss],z);
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
for(int k=k0; k<k1; ++k){
auto tmp = coalescedRead(Bp[ss*nrot+j]);
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss]));
}
});
accelerator_for(sj,ssites*nrot,vobj::Nsimd(),{
int j =sj%nrot;
int jj =j0+j;
int ss =sj/nrot;
int sss=ss+s;
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j]));
});
}
#endif
}
// Extract a single rotated vector
template<class Field>
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
{
typedef decltype(basis[0].AcceleratorView()) View;
typedef typename Field::vector_object vobj;
GridBase* grid = basis[0].Grid();
result.Checkerboard() = basis[0].Checkerboard();
auto result_v=result.AcceleratorView(ViewWrite);
Vector<View> basis_v(basis.size(),result_v);
for(int k=0;k<basis.size();k++){
basis_v[k] = basis[k].AcceleratorView(ViewRead);
}
vobj zz=Zero();
Vector<double> Qt_jv(Nm);
double * Qt_j = & Qt_jv[0];
for(int k=0;k<Nm;++k) Qt_j[k]=Qt(j,k);
accelerator_for(ss, grid->oSites(),vobj::Nsimd(),{
auto B=coalescedRead(zz);
for(int k=k0; k<k1; ++k){
B +=Qt_j[k] * coalescedRead(basis_v[k][ss]);
}
coalescedWrite(result_v[ss], B);
});
}
template<class Field>
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
{
int vlen = idx.size();
assert(vlen>=1);
assert(vlen<=sort_vals.size());
assert(vlen<=_v.size());
for (size_t i=0;i<vlen;i++) {
if (idx[i] != i) {
//////////////////////////////////////
// idx[i] is a table of desired sources giving a permutation.
// Swap v[i] with v[idx[i]].
// Find j>i for which _vnew[j] = _vold[i],
// track the move idx[j] => idx[i]
// track the move idx[i] => i
//////////////////////////////////////
size_t j;
for (j=i;j<idx.size();j++)
if (idx[j]==i)
break;
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
std::swap(sort_vals[i],sort_vals[idx[i]]);
idx[j] = idx[i];
idx[i] = i;
}
}
}
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
{
std::vector<int> idx(sort_vals.size());
std::iota(idx.begin(), idx.end(), 0);
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
});
return idx;
}
template<class Field>
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
{
std::vector<int> idx = basisSortGetIndex(sort_vals);
if (reverse)
std::reverse(idx.begin(), idx.end());
basisReorderInPlace(_v,sort_vals,idx);
}
// PAB: faster to compute the inner products first then fuse loops.
// If performance critical can improve.
template<class Field>
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
result = Zero();
assert(_v.size()==eval.size());
int N = (int)_v.size();
for (int i=0;i<N;i++) {
Field& tmp = _v[i];
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
}
}
NAMESPACE_END(Grid);

View File

@ -156,7 +156,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
// Peek a scalar object from the SIMD array // Peek a scalar object from the SIMD array
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
template<class vobj,class sobj> template<class vobj,class sobj>
accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){ inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate &site){
GridBase *grid = l.Grid(); GridBase *grid = l.Grid();
@ -185,7 +185,7 @@ accelerator_inline void peekLocalSite(sobj &s,const Lattice<vobj> &l,Coordinate
}; };
template<class vobj,class sobj> template<class vobj,class sobj>
accelerator_inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){ inline void pokeLocalSite(const sobj &s,Lattice<vobj> &l,Coordinate &site){
GridBase *grid=l.Grid(); GridBase *grid=l.Grid();

View File

@ -5,6 +5,7 @@
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -93,7 +94,7 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
// Double inner product // Double inner product
template<class vobj> template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) inline ComplexD rankInnerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{ {
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type; typedef typename vobj::vector_typeD vector_type;
@ -102,8 +103,8 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
GridBase *grid = left.Grid(); GridBase *grid = left.Grid();
// Might make all code paths go this way. // Might make all code paths go this way.
auto left_v = left.View(); auto left_v = left.AcceleratorView(ViewRead);
auto right_v=right.View(); auto right_v=right.AcceleratorView(ViewRead);
const uint64_t nsimd = grid->Nsimd(); const uint64_t nsimd = grid->Nsimd();
const uint64_t sites = grid->oSites(); const uint64_t sites = grid->oSites();
@ -137,11 +138,18 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
}) })
nrm = TensorRemove(sum(inner_tmp_v,sites)); nrm = TensorRemove(sum(inner_tmp_v,sites));
#endif #endif
grid->GlobalSum(nrm);
return nrm; return nrm;
} }
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) {
GridBase *grid = left.Grid();
ComplexD nrm = rankInnerProduct(left,right);
grid->GlobalSum(nrm);
return nrm;
}
///////////////////////// /////////////////////////
// Fast axpby_norm // Fast axpby_norm
// z = a x + b y // z = a x + b y
@ -167,9 +175,9 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
GridBase *grid = x.Grid(); GridBase *grid = x.Grid();
auto x_v=x.View(); auto x_v=x.AcceleratorView(ViewRead);
auto y_v=y.View(); auto y_v=y.AcceleratorView(ViewRead);
auto z_v=z.View(); auto z_v=z.AcceleratorView(ViewWrite);
const uint64_t nsimd = grid->Nsimd(); const uint64_t nsimd = grid->Nsimd();
const uint64_t sites = grid->oSites(); const uint64_t sites = grid->oSites();
@ -204,8 +212,64 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
grid->GlobalSum(nrm); grid->GlobalSum(nrm);
return nrm; return nrm;
} }
template<class vobj> strong_inline void
innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Lattice<vobj> &right)
{
conformable(left,right);
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
Vector<ComplexD> tmp(2);
GridBase *grid = left.Grid();
auto left_v=left.AcceleratorView(ViewRead);
auto right_v=right.AcceleratorView(ViewRead);
const uint64_t nsimd = grid->Nsimd();
const uint64_t sites = grid->oSites();
#ifdef GRID_NVCC
// GPU
typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t;
typedef decltype(innerProduct(left_v[0],left_v[0])) norm_t;
Vector<inner_t> inner_tmp(sites);
Vector<norm_t> norm_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
auto norm_tmp_v = &norm_tmp[0];
accelerator_for( ss, sites, nsimd,{
auto left_tmp = left_v(ss);
coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss)));
coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp));
});
tmp[0] = TensorRemove(sumD_gpu(inner_tmp_v,sites));
tmp[1] = TensorRemove(sumD_gpu(norm_tmp_v,sites));
#else
// CPU
typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t;
typedef decltype(innerProductD(left_v[0],left_v[0])) norm_t;
Vector<inner_t> inner_tmp(sites);
Vector<norm_t> norm_tmp(sites);
auto inner_tmp_v = &inner_tmp[0];
auto norm_tmp_v = &norm_tmp[0];
accelerator_for( ss, sites, nsimd,{
auto left_tmp = left_v(ss);
inner_tmp_v[ss] = innerProductD(left_tmp,right_v(ss));
norm_tmp_v[ss] = innerProductD(left_tmp,left_tmp);
});
// Already promoted to double
tmp[0] = TensorRemove(sum(inner_tmp_v,sites));
tmp[1] = TensorRemove(sum(norm_tmp_v,sites));
#endif
grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector
ip = tmp[0];
nrm = real(tmp[1]);
}
template<class Op,class T1> template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object ->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object

View File

@ -37,6 +37,7 @@ NAMESPACE_BEGIN(Grid);
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Trace // Trace
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
/*
template<class vobj> template<class vobj>
inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))> inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
{ {
@ -48,6 +49,7 @@ inline auto trace(const Lattice<vobj> &lhs) -> Lattice<decltype(trace(vobj()))>
}); });
return ret; return ret;
}; };
*/
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Trace Index level dependent operation // Trace Index level dependent operation

View File

@ -6,6 +6,7 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -63,6 +64,7 @@ template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,con
} }
}); });
} }
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.Checkerboard(); int cb = half.Checkerboard();
auto half_v = half.View(); auto half_v = half.View();
@ -81,25 +83,130 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
} }
}); });
} }
template<class vobj,class CComplex,int nbasis> ////////////////////////////////////////////////////////////////////////////////////////////
// Flexible Type Conversion for internal promotion to double as well as graceful
// treatment of scalar-compatible types
////////////////////////////////////////////////////////////////////////////////////////////
accelerator_inline void convertType(ComplexD & out, const std::complex<double> & in) {
out = in;
}
accelerator_inline void convertType(ComplexF & out, const std::complex<float> & in) {
out = in;
}
#ifdef __CUDA_ARCH__
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in;
}
accelerator_inline void convertType(vComplexD & out, const ComplexD & in) {
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd())] = in;
}
accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) {
((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd()*2)] = in;
}
#endif
accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) {
out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v);
}
accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) {
Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v);
}
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in);
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in);
template<typename T1,typename T2, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
convertType(out,in._internal);
}
template<typename T1,typename T2>
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
convertType(out._internal,in);
}
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in) {
for (int i=0;i<N;i++)
for (int j=0;j<N;j++)
convertType(out._internal[i][j],in._internal[i][j]);
}
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in) {
for (int i=0;i<N;i++)
convertType(out._internal[i],in._internal[i]);
}
template<typename T, typename std::enable_if<isGridFundamental<T>::value, T>::type* = nullptr>
accelerator_inline void convertType(T & out, const T & in) {
out = in;
}
template<typename T1,typename T2>
accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
auto out_v = out.AcceleratorView(ViewWrite);
auto in_v = in.AcceleratorView(ViewRead);
accelerator_for(ss,out_v.size(),T1::Nsimd(),{
convertType(out_v[ss],in_v(ss));
});
}
////////////////////////////////////////////////////////////////////////////////////////////
// precision-promoted local inner product
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View()[0],rhs.View()[0])))>>
{
auto lhs_v = lhs.AcceleratorView(ViewRead);
auto rhs_v = rhs.AcceleratorView(ViewRead);
typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner;
Lattice<iScalar<t_inner>> ret(lhs.Grid());
auto ret_v = ret.AcceleratorView(ViewWrite);
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss)));
});
return ret;
}
////////////////////////////////////////////////////////////////////////////////////////////
// block routines
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj,class CComplex,int nbasis,class VLattice>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData, const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis) const VLattice &Basis)
{ {
GridBase * fine = fineData.Grid(); GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid(); GridBase * coarse= coarseData.Grid();
Lattice<CComplex> ip(coarse); Lattice<iScalar<CComplex>> ip(coarse);
Lattice<vobj> fineDataRed = fineData;
// auto fineData_ = fineData.View(); // auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View(); auto coarseData_ = coarseData.AcceleratorView(ViewWrite);
auto ip_ = ip.View(); auto ip_ = ip.AcceleratorView(ViewReadWrite);
for(int v=0;v<nbasis;v++) { for(int v=0;v<nbasis;v++) {
blockInnerProduct(ip,Basis[v],fineData); blockInnerProductD(ip,Basis[v],fineDataRed); // ip = <basis|fine>
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), { accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
coalescedWrite(coarseData_[sc](v),ip_(sc)); convertType(coarseData_[sc](v),ip_[sc]);
}); });
// improve numerical stability of projection
// |fine> = |fine> - <basis|fine> |basis>
ip=-ip;
blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed);
} }
} }
@ -166,11 +273,11 @@ inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
return; return;
} }
template<class vobj,class CComplex> template<class vobj,class vobj2,class CComplex>
inline void blockZAXPY(Lattice<vobj> &fineZ, inline void blockZAXPY(Lattice<vobj> &fineZ,
const Lattice<CComplex> &coarseA, const Lattice<CComplex> &coarseA,
const Lattice<vobj> &fineX, const Lattice<vobj2> &fineX,
const Lattice<vobj> &fineY) const Lattice<vobj> &fineY)
{ {
GridBase * fine = fineZ.Grid(); GridBase * fine = fineZ.Grid();
GridBase * coarse= coarseA.Grid(); GridBase * coarse= coarseA.Grid();
@ -182,7 +289,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
conformable(fineX,fineZ); conformable(fineX,fineZ);
int _ndimension = coarse->_ndimension; int _ndimension = coarse->_ndimension;
Coordinate block_r (_ndimension); Coordinate block_r (_ndimension);
// FIXME merge with subdivide checking routine as this is redundant // FIXME merge with subdivide checking routine as this is redundant
@ -191,29 +298,65 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
} }
auto fineZ_ = fineZ.View(); auto fineZ_ = fineZ.AcceleratorView(ViewWrite);
auto fineX_ = fineX.View(); auto fineX_ = fineX.AcceleratorView(ViewRead);
auto fineY_ = fineY.View(); auto fineY_ = fineY.AcceleratorView(ViewRead);
auto coarseA_= coarseA.View(); auto coarseA_= coarseA.AcceleratorView(ViewRead);
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), { accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); int sc;
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Coordinate coor_c(_ndimension);
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); Coordinate coor_f(_ndimension);
// z = A x + y Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf)); for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
}); // z = A x + y
#ifdef __CUDA_ARCH__
typename vobj2::tensor_reduced::scalar_object cA;
typename vobj::scalar_object cAx;
#else
typename vobj2::tensor_reduced cA;
vobj cAx;
#endif
convertType(cA,TensorRemove(coarseA_(sc)));
auto prod = cA*fineX_(sf);
convertType(cAx,prod);
coalescedWrite(fineZ_[sf],cAx+fineY_(sf));
});
return; return;
} }
template<class vobj,class CComplex> template<class vobj,class CComplex>
inline void blockInnerProductD(Lattice<CComplex> &CoarseInner,
const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY)
{
typedef iScalar<decltype(TensorRemove(innerProductD2(vobj(),vobj())))> dotp;
GridBase *coarse(CoarseInner.Grid());
GridBase *fine (fineX.Grid());
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
Lattice<dotp> coarse_inner(coarse);
auto CoarseInner_ = CoarseInner.AcceleratorView(ViewWrite);
auto coarse_inner_ = coarse_inner.AcceleratorView(ViewReadWrite);
// Precision promotion
fine_inner = localInnerProductD(fineX,fineY);
blockSum(coarse_inner,fine_inner);
accelerator_for(ss, coarse->oSites(), 1, {
convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss]));
});
}
template<class vobj,class CComplex> // deprecate
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner, inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
const Lattice<vobj> &fineX, const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY) const Lattice<vobj> &fineY)
@ -227,8 +370,8 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
Lattice<dotp> coarse_inner(coarse); Lattice<dotp> coarse_inner(coarse);
// Precision promotion? // Precision promotion?
auto CoarseInner_ = CoarseInner.View(); auto CoarseInner_ = CoarseInner.AcceleratorView(ViewWrite);
auto coarse_inner_ = coarse_inner.View(); auto coarse_inner_ = coarse_inner.AcceleratorView(ViewReadWrite);
fine_inner = localInnerProduct(fineX,fineY); fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner); blockSum(coarse_inner,fine_inner);
@ -236,6 +379,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
CoarseInner_[ss] = coarse_inner_[ss]; CoarseInner_[ss] = coarse_inner_[ss];
}); });
} }
template<class vobj,class CComplex> template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX) inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{ {
@ -248,7 +392,7 @@ inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
// useful in multigrid project; // useful in multigrid project;
// Generic name : Coarsen? // Generic name : Coarsen?
template<class vobj> template<class vobj>
inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData) inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
{ {
GridBase * fine = fineData.Grid(); GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid(); GridBase * coarse= coarseData.Grid();
@ -256,42 +400,41 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
subdivides(coarse,fine); // require they map subdivides(coarse,fine); // require they map
int _ndimension = coarse->_ndimension; int _ndimension = coarse->_ndimension;
Coordinate block_r (_ndimension); Coordinate block_r (_ndimension);
for(int d=0 ; d<_ndimension;d++){ for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
} }
int blockVol = fine->oSites()/coarse->oSites(); int blockVol = fine->oSites()/coarse->oSites();
// Turn this around to loop threaded over sc and interior loop auto coarseData_ = coarseData.AcceleratorView(ViewReadWrite);
// over sf would thread better auto fineData_ = fineData.AcceleratorView(ViewRead);
auto coarseData_ = coarseData.View();
auto fineData_ = fineData.View();
accelerator_for(sc,coarse->oSites(),1,{ accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block // One thread per sub block
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
coarseData_[sc]=Zero(); coarseData_[sc]=Zero();
for(int sb=0;sb<blockVol;sb++){ for(int sb=0;sb<blockVol;sb++){
int sf;
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
coarseData_[sc]=coarseData_[sc]+fineData_[sf]; int sf;
} Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
}); coarseData_[sc]=coarseData_[sc]+fineData_[sf];
}
});
return; return;
} }
template<class vobj> template<class vobj>
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor) inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor)
{ {
@ -313,8 +456,8 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
} }
} }
template<class vobj,class CComplex> template<class CComplex,class VLattice>
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis) inline void blockOrthonormalize(Lattice<CComplex> &ip,VLattice &Basis)
{ {
GridBase *coarse = ip.Grid(); GridBase *coarse = ip.Grid();
GridBase *fine = Basis[0].Grid(); GridBase *fine = Basis[0].Grid();
@ -322,23 +465,30 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
int nbasis = Basis.size() ; int nbasis = Basis.size() ;
// checks // checks
subdivides(coarse,fine); subdivides(coarse,fine);
for(int i=0;i<nbasis;i++){ for(int i=0;i<nbasis;i++){
conformable(Basis[i].Grid(),fine); conformable(Basis[i].Grid(),fine);
} }
for(int v=0;v<nbasis;v++) { for(int v=0;v<nbasis;v++) {
for(int u=0;u<v;u++) { for(int u=0;u<v;u++) {
//Inner product & remove component //Inner product & remove component
blockInnerProduct(ip,Basis[u],Basis[v]); blockInnerProductD(ip,Basis[u],Basis[v]);
ip = -ip; ip = -ip;
blockZAXPY<vobj,CComplex> (Basis[v],ip,Basis[u],Basis[v]); blockZAXPY(Basis[v],ip,Basis[u],Basis[v]);
} }
blockNormalise(ip,Basis[v]); blockNormalise(ip,Basis[v]);
} }
} }
template<class vobj,class CComplex>
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis) // deprecated inaccurate naming
{
blockOrthonormalize(ip,Basis);
}
#if 0 #if 0
// TODO: CPU optimized version here
template<class vobj,class CComplex,int nbasis> template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData, inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData, Lattice<vobj> &fineData,
@ -383,24 +533,18 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
} }
#else #else
template<class vobj,class CComplex,int nbasis> template<class vobj,class CComplex,int nbasis,class VLattice>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData, inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData, Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis) const VLattice &Basis)
{ {
GridBase * fine = fineData.Grid(); GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid(); GridBase * coarse= coarseData.Grid();
fineData=Zero(); fineData=Zero();
for(int i=0;i<nbasis;i++) { for(int i=0;i<nbasis;i++) {
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i); Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
Lattice<CComplex> cip(coarse); auto ip_ = ip.AcceleratorView(ViewRead);
auto cip_ = cip.View(); blockZAXPY(fineData,ip,Basis[i],fineData);
auto ip_ = ip.View();
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
coalescedWrite(cip_[sc], ip_(sc)());
});
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
} }
} }
#endif #endif
@ -470,8 +614,8 @@ void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate Fro
Coordinate rdt = Tg->_rdimensions; Coordinate rdt = Tg->_rdimensions;
Coordinate ist = Tg->_istride; Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride; Coordinate ost = Tg->_ostride;
auto t_v = To.View(); auto t_v = To.AcceleratorView(ViewWrite);
auto f_v = From.View(); auto f_v = From.AcceleratorView(ViewRead);
accelerator_for(idx,Fg->lSites(),1,{ accelerator_for(idx,Fg->lSites(),1,{
sobj s; sobj s;
Coordinate Fcoor(nd); Coordinate Fcoor(nd);

View File

@ -341,7 +341,7 @@ class BinaryIO {
int ieee32big = (format == std::string("IEEE32BIG")); int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64")); int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
assert(ieee64||ieee32|ieee64big||ieee32big); assert(ieee64||ieee32|ieee64big||ieee32big);
assert((ieee64+ieee32+ieee64big+ieee32big)==1); assert((ieee64+ieee32+ieee64big+ieee32big)==1);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////

View File

@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger {
}; };
}; };
template<class fobj,class sobj>
struct GaugeDoubleStoredMunger{
void operator()(fobj &in, sobj &out) {
for (int mu = 0; mu < Nds; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template <class fobj, class sobj>
struct GaugeDoubleStoredUnmunger {
void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nds; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template<class fobj,class sobj> template<class fobj,class sobj>
struct Gauge3x2munger{ struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){ void operator() (fobj &in,sobj &out){

View File

@ -146,7 +146,7 @@ public:
int ieee32big = (format == std::string("IEEE32BIG")); int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64")); int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger; // depending on datatype, set up munger;

224
Grid/parallelIO/OpenQcdIO.h Normal file
View File

@ -0,0 +1,224 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/OpenQcdIO.h
Copyright (C) 2015 - 2020
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
struct OpenQcdHeader : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader,
int, Nt,
int, Nx,
int, Ny,
int, Nz,
double, plaq);
};
class OpenQcdIO : public BinaryIO {
public:
static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6
static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) {
OpenQcdHeader header;
{
std::ifstream fin(file, std::ios::in | std::ios::binary);
fin.read(reinterpret_cast<char*>(&header), sizeof(OpenQcdHeader));
assert(!fin.fail());
field.data_start = fin.tellg();
fin.close();
}
header.plaq /= normalisationFactor;
// sanity check (should trigger on endian issues)
assert(0 < header.Nt && header.Nt <= 1024);
assert(0 < header.Nx && header.Nx <= 1024);
assert(0 < header.Ny && header.Ny <= 1024);
assert(0 < header.Nz && header.Nz <= 1024);
field.dimension[0] = header.Nx;
field.dimension[1] = header.Ny;
field.dimension[2] = header.Nz;
field.dimension[3] = header.Nt;
std::cout << GridLogDebug << "header: " << header << std::endl;
std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl;
std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl;
assert(grid->_ndimension == Nd);
for(int d = 0; d < Nd; d++)
assert(grid->_fdimensions[d] == field.dimension[d]);
field.plaquette = header.plaq;
return field.data_start;
}
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
FieldMetaData& header,
std::string file) {
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubleStoredGaugeField;
assert(Ns == 4 and Nd == 4 and Nc == 3);
auto grid = dynamic_cast<GridCartesian*>(Umu.Grid());
assert(grid != nullptr); assert(grid->_ndimension == Nd);
uint64_t offset = readHeader(file, Umu.Grid(), header);
FieldMetaData clone(header);
std::string format("IEEE64"); // they always store little endian double precsision
uint32_t nersc_csum, scidac_csuma, scidac_csumb;
GridCartesian* grid_openqcd = createOpenQcdGrid(grid);
GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
typedef DoubleStoredColourMatrixD fobj;
typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj;
typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word;
word w = 0;
std::vector<fobj> iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here
std::vector<sobj> scalardata(grid->lSites());
IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC,
nersc_csum, scidac_csuma, scidac_csumb);
GridStopWatch timer;
timer.Start();
DoubleStoredGaugeField Umu_ds(grid);
auto munge = GaugeDoubleStoredMunger<DoubleStoredColourMatrixD, DoubleStoredColourMatrix>();
Coordinate ldim = grid->LocalDimensions();
thread_for(idx_g, grid->lSites(), {
Coordinate coor;
grid->LocalIndexToLocalCoor(idx_g, coor);
bool isOdd = grid_rb->CheckerBoard(coor) == Odd;
if(!isOdd) continue;
int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir]
+ coor[Xdir] * ldim[Ydir] * ldim[Zdir]
+ coor[Ydir] * ldim[Zdir]
+ coor[Zdir])/2;
munge(iodata[idx_o], scalardata[idx_g]);
});
grid->Barrier(); timer.Stop();
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl;
timer.Reset(); timer.Start();
vectorizeFromLexOrdArray(scalardata, Umu_ds);
grid->Barrier(); timer.Stop();
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl;
timer.Reset(); timer.Start();
undoDoubleStore(Umu, Umu_ds);
grid->Barrier(); timer.Stop();
std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl;
GaugeStatistics(Umu, clone);
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
// clang-format off
std::cout << GridLogMessage << "OpenQcd Configuration " << file
<< " plaquette " << clone.plaquette
<< " header " << header.plaquette
<< " difference " << plaq_diff
<< std::endl;
// clang-format on
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
if(plaq_diff >= tol)
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
assert(plaq_diff < tol);
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
}
template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
std::string file) {
std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl;
exit(EXIT_FAILURE);
}
private:
static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) {
// exploit GridCartesian to be able to still use IOobject
Coordinate gdim = grid->GlobalDimensions();
Coordinate ldim = grid->LocalDimensions();
Coordinate pcoor = grid->ThisProcessorCoor();
// openqcd does rb on the z direction
gdim[Zdir] /= 2;
ldim[Zdir] /= 2;
// and has the order T X Y Z (from slowest to fastest)
std::swap(gdim[Xdir], gdim[Zdir]);
std::swap(ldim[Xdir], ldim[Zdir]);
std::swap(pcoor[Xdir], pcoor[Zdir]);
GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid());
ret->_ldimensions = ldim;
ret->_processor_coor = pcoor;
return ret;
}
template<class vsimd>
static inline void undoDoubleStore(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
conformable(Umu.Grid(), Umu_ds.Grid());
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
// they store T+, T-, X+, X-, Y+, Y-, Z+, Z-
for(int mu_g = 0; mu_g < Nd; ++mu_g) {
int mu_o = (mu_g + 1) % Nd;
U = PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o)
+ Cshift(PeekIndex<LorentzIndex>(Umu_ds, 2 * mu_o + 1), mu_g, +1);
PokeIndex<LorentzIndex>(Umu, U, mu_g);
}
}
};
NAMESPACE_END(Grid);

View File

@ -0,0 +1,281 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h
Copyright (C) 2015 - 2020
Author: Daniel Richtmann <daniel.richtmann@ur.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <ios>
#include <iostream>
#include <limits>
#include <iomanip>
#include <mpi.h>
#include <ostream>
#include <string>
#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;}
#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; }
// #undef CHECK
// #define CHECK
NAMESPACE_BEGIN(Grid);
class ParRdr {
private:
bool const swap;
MPI_Status status;
MPI_File fp;
int err;
MPI_Datatype oddSiteType;
MPI_Datatype fileViewType;
GridBase* grid;
public:
ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr)
: swap(false)
, grid(gridPtr) {
err = MPI_File_open(comm, const_cast<char*>(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp);
assert(err == MPI_SUCCESS);
}
virtual ~ParRdr() { MPI_File_close(&fp); }
inline void errInfo(int const err, std::string const& func) {
static char estring[MPI_MAX_ERROR_STRING];
int eclass = -1, len = 0;
MPI_Error_class(err, &eclass);
MPI_Error_string(err, estring, &len);
std::cerr << func << " - Error " << eclass << ": " << estring << std::endl;
}
int readHeader(FieldMetaData& field) {
assert((grid->_ndimension == Nd) && (Nd == 4));
assert(Nc == 3);
OpenQcdHeader header;
readBlock(reinterpret_cast<char*>(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR);
header.plaq /= 3.; // TODO change this into normalizationfactor
// sanity check (should trigger on endian issues) TODO remove?
assert(0 < header.Nt && header.Nt <= 1024);
assert(0 < header.Nx && header.Nx <= 1024);
assert(0 < header.Ny && header.Ny <= 1024);
assert(0 < header.Nz && header.Nz <= 1024);
field.dimension[0] = header.Nx;
field.dimension[1] = header.Ny;
field.dimension[2] = header.Nz;
field.dimension[3] = header.Nt;
for(int d = 0; d < Nd; d++)
assert(grid->FullDimensions()[d] == field.dimension[d]);
field.plaquette = header.plaq;
field.data_start = sizeof(OpenQcdHeader);
return field.data_start;
}
void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) {
err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status);
errInfo(err, "MPI_File_read_at_all");
// CHECK_VAR(err)
int read = -1;
MPI_Get_count(&status, datatype, &read);
// CHECK_VAR(read)
assert(nbytes == (uint64_t)read);
assert(err == MPI_SUCCESS);
}
void createTypes() {
constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd
err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS);
err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS);
Coordinate const L = grid->GlobalDimensions();
Coordinate const l = grid->LocalDimensions();
Coordinate const i = grid->ThisProcessorCoor();
Coordinate sizes({L[2] / 2, L[1], L[0], L[3]});
Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]});
Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]});
err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS);
err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS);
}
void freeTypes() {
err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS);
err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS);
}
bool readGauge(std::vector<ColourMatrixD>& domain_buff, FieldMetaData& meta) {
auto hdr_offset = readHeader(meta);
CHECK
createTypes();
err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS);
CHECK
int const domainSites = grid->lSites();
domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd
// the actual READ
constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex
constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd
constexpr uint64_t max_elems = std::numeric_limits<int>::max(); // int adressable elems: floor is fine
uint64_t const n_os = domainSites / 2;
for(uint64_t os_idx = 0; os_idx < n_os;) {
uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx;
uint64_t const cm = os_idx * Nd * 2;
readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType);
os_idx += read_os;
}
CHECK
err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL);
errInfo(err, "MPI_File_set_view1");
assert(err == MPI_SUCCESS);
freeTypes();
std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl;
return true;
}
};
class OpenQcdIOChromaReference : public BinaryIO {
public:
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
Grid::FieldMetaData& header,
std::string file) {
typedef Lattice<iDoubleStoredColourMatrix<vsimd>> DoubledGaugeField;
assert(Ns == 4 and Nd == 4 and Nc == 3);
auto grid = Umu.Grid();
typedef ColourMatrixD fobj;
std::vector<fobj> iodata(
Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file
{
ParRdr rdr(MPI_COMM_WORLD, file, grid);
rdr.readGauge(iodata, header);
} // equivalent to using binaryio
std::vector<iDoubleStoredColourMatrix<typename vsimd::scalar_type>> Umu_ds_scalar(grid->lSites());
copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging
DoubledGaugeField Umu_ds(grid);
vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds);
redistribute(Umu, Umu_ds); // equivalent to undoDoublestore
FieldMetaData clone(header);
GaugeStatistics(Umu, clone);
RealD plaq_diff = fabs(clone.plaquette - header.plaquette);
// clang-format off
std::cout << GridLogMessage << "OpenQcd Configuration " << file
<< " plaquette " << clone.plaquette
<< " header " << header.plaquette
<< " difference " << plaq_diff
<< std::endl;
// clang-format on
RealD precTol = (getPrecision<vsimd>::value == 1) ? 2e-7 : 2e-15;
RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code
if(plaq_diff >= tol)
std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl;
assert(plaq_diff < tol);
std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl;
}
private:
template<class vsimd>
static inline void redistribute(Lattice<iLorentzColourMatrix<vsimd>>& Umu,
Lattice<iDoubleStoredColourMatrix<vsimd>> const& Umu_ds) {
Grid::conformable(Umu.Grid(), Umu_ds.Grid());
Lattice<iColourMatrix<vsimd>> U(Umu.Grid());
U = PeekIndex<LorentzIndex>(Umu_ds, 2) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 3), 0, +1); PokeIndex<LorentzIndex>(Umu, U, 0);
U = PeekIndex<LorentzIndex>(Umu_ds, 4) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 5), 1, +1); PokeIndex<LorentzIndex>(Umu, U, 1);
U = PeekIndex<LorentzIndex>(Umu_ds, 6) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 7), 2, +1); PokeIndex<LorentzIndex>(Umu, U, 2);
U = PeekIndex<LorentzIndex>(Umu_ds, 0) + Cshift(PeekIndex<LorentzIndex>(Umu_ds, 1), 3, +1); PokeIndex<LorentzIndex>(Umu, U, 3);
}
static inline void copyToLatticeObject(std::vector<DoubleStoredColourMatrix>& u_fb,
std::vector<ColourMatrixD> const& node_buff,
GridBase* grid) {
assert(node_buff.size() == Nd * grid->lSites());
Coordinate const& l = grid->LocalDimensions();
Coordinate coord(Nd);
int& x = coord[0];
int& y = coord[1];
int& z = coord[2];
int& t = coord[3];
int buff_idx = 0;
for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering
for(x = 0; x < l[0]; ++x)
for(y = 0; y < l[1]; ++y)
for(z = 0; z < l[2]; ++z) {
if((t + z + y + x) % 2 == 0) continue;
int local_idx;
Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions());
for(int mu = 0; mu < 2 * Nd; ++mu)
for(int c1 = 0; c1 < Nc; ++c1) {
for(int c2 = 0; c2 < Nc; ++c2) {
u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2);
}
}
buff_idx += 2 * Nd;
}
assert(node_buff.size() == buff_idx);
}
};
NAMESPACE_END(Grid);

View File

@ -95,7 +95,8 @@ inline uint64_t cyclecount(void){
} }
#elif defined __x86_64__ #elif defined __x86_64__
inline uint64_t cyclecount(void){ inline uint64_t cyclecount(void){
return __rdtsc(); uint64_t ret = __rdtsc();
return (uint64_t)ret;
} }
#else #else

View File

@ -133,23 +133,23 @@ typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF; typedef iSpinColourMatrix<vComplexF> vSpinColourMatrixF;
typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD; typedef iSpinColourMatrix<vComplexD> vSpinColourMatrixD;
// SpinColourSpinColour matrix // SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// SpinColourSpinColour matrix // SpinColourSpinColour matrix
typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix<Complex > SpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix<ComplexF > SpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix<ComplexD > SpinColourSpinColourMatrixD;
typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix<vComplex > vSpinColourSpinColourMatrix;
typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix<vComplexF> vSpinColourSpinColourMatrixF;
typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD; typedef iSpinColourSpinColourMatrix<vComplexD> vSpinColourSpinColourMatrixD;
// LorentzColour // LorentzColour
typedef iLorentzColourMatrix<Complex > LorentzColourMatrix; typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;
@ -443,16 +443,16 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
////////////////////////////////////////////// //////////////////////////////////////////////
// Fermion <-> propagator assignements // Fermion <-> propagator assignements
////////////////////////////////////////////// //////////////////////////////////////////////
//template <class Prop, class Ferm> //template <class Prop, class Ferm>
template <class Fimpl> template <class Fimpl>
void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
{ {
for(int j = 0; j < Ns; ++j) for(int j = 0; j < Ns; ++j)
{ {
auto pjs = peekSpin(p, j, s); auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j); auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i) for(int i = 0; i < Fimpl::Dimension; ++i)
{ {
pokeColour(pjs, peekColour(fj, i), i, c); pokeColour(pjs, peekColour(fj, i), i, c);
} }
@ -460,16 +460,16 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
} }
} }
//template <class Prop, class Ferm> //template <class Prop, class Ferm>
template <class Fimpl> template <class Fimpl>
void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
{ {
for(int j = 0; j < Ns; ++j) for(int j = 0; j < Ns; ++j)
{ {
auto pjs = peekSpin(p, j, s); auto pjs = peekSpin(p, j, s);
auto fj = peekSpin(f, j); auto fj = peekSpin(f, j);
for(int i = 0; i < Fimpl::Dimension; ++i) for(int i = 0; i < Fimpl::Dimension; ++i)
{ {
pokeColour(fj, peekColour(pjs, i, c), i); pokeColour(fj, peekColour(pjs, i, c), i);
} }

View File

@ -141,7 +141,33 @@ public:
Vector<iSinglet<Simd> > MatpInvDag; Vector<iSinglet<Simd> > MatpInvDag;
Vector<iSinglet<Simd> > MatmInvDag; Vector<iSinglet<Simd> > MatmInvDag;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
// Virtual can't template
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
///////////////////////////////////////////////////////////////
// Constructors // Constructors
///////////////////////////////////////////////////////////////
CayleyFermion5D(GaugeField &_Umu, CayleyFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,

View File

@ -148,15 +148,19 @@ public:
virtual void ContractConservedCurrent(PropagatorField &q_in_1, virtual void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type, Current curr_type,
unsigned int mu)=0; unsigned int mu)
{assert(0);};
virtual void SeqConservedCurrent(PropagatorField &q_in, virtual void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx)=0; ComplexField &lattice_cmplx)
{assert(0);};
// Only reimplemented in Wilson5D // Only reimplemented in Wilson5D
// Default to just a zero correlation function // Default to just a zero correlation function

View File

@ -38,6 +38,7 @@ public:
static const bool isFundamental = Representation::isFundamental; static const bool isFundamental = Representation::isFundamental;
static const int Nhcs = Options::Nhcs; static const int Nhcs = Options::Nhcs;
static const bool LsVectorised=false; static const bool LsVectorised=false;
static const bool isGparity=true;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Dimension> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Dimension> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -46,7 +47,7 @@ public:
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL; typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Dimension>, Ns>, Ngp>; template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>; template <typename vtype> using iImplPropagator = iMatrix<iMatrix<iMatrix<vtype, Dimension>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhs>, Ngp>; template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhs>, Ngp>;
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhcs>, Ngp>; template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Dimension>, Nhcs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>, Ngp>; template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>, Ngp>;
@ -80,6 +81,7 @@ public:
{ {
assert(0); assert(0);
} }
template<class _Spinor> template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi, static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
@ -191,6 +193,16 @@ public:
#endif #endif
} }
template<class _SpinorField>
inline void multLinkField(_SpinorField & out,
const DoubledGaugeField &Umu,
const _SpinorField & phi,
int mu)
{
assert(0);
}
template <class ref> template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory) static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{ {

View File

@ -185,10 +185,12 @@ public:
void ContractConservedCurrent(PropagatorField &q_in_1, void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu); unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in, void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &srct,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,

View File

@ -217,15 +217,17 @@ public:
void ContractConservedCurrent(PropagatorField &q_in_1, void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu); unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in, void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx); ComplexField &lattice_cmplx);
}; };
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -179,15 +179,17 @@ public:
void ContractConservedCurrent(PropagatorField &q_in_1, void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type, Current curr_type,
unsigned int mu); unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in, void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx); ComplexField &lattice_cmplx);
}; };
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;

View File

@ -217,25 +217,7 @@ public:
// Comms buffer // Comms buffer
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf; std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
///////////////////////////////////////////////////////////////
// Conserved current utilities
///////////////////////////////////////////////////////////////
void ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu);
void SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx);
void ContractJ5q(PropagatorField &q_in,ComplexField &J5q);
void ContractJ5q(FermionField &q_in,ComplexField &J5q);
}; };

View File

@ -41,6 +41,7 @@ public:
static const int Dimension = Representation::Dimension; static const int Dimension = Representation::Dimension;
static const bool isFundamental = Representation::isFundamental; static const bool isFundamental = Representation::isFundamental;
static const bool LsVectorised=false; static const bool LsVectorised=false;
static const bool isGparity=false;
static const int Nhcs = Options::Nhcs; static const int Nhcs = Options::Nhcs;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
@ -98,8 +99,21 @@ public:
{ {
multLink(phi,U,chi,mu); multLink(phi,U,chi,mu);
} }
template<class _SpinorField>
inline void multLinkField(_SpinorField & out,
const DoubledGaugeField &Umu,
const _SpinorField & phi,
int mu)
{
auto out_v= out.View();
auto phi_v= phi.View();
auto Umu_v= Umu.View();
thread_for(sss,out.Grid()->oSites(),{
multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu);
});
}
template <class ref> template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory) static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{ {

View File

@ -66,41 +66,6 @@ public:
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
//////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current.
//////////////////////////////////////////////////////////////////////////////
static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign = false);
static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign = false);
private: private:
static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,

View File

@ -588,6 +588,355 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,Vector<Coeff_t
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); // this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
} }
template <class Impl>
void CayleyFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
{
conformable(this->GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), this->FermionGrid());
Gamma G5(Gamma::Algebra::Gamma5);
// 4d field
int Ls = this->Ls;
FermionField psi(this->GaugeGrid());
FermionField p_plus (this->GaugeGrid());
FermionField p_minus(this->GaugeGrid());
FermionField p(this->GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2-1 , 0);
ExtractSlice(p_minus, q_in, Ls/2 , 0);
p_plus = p_plus + G5*p_plus;
p_minus= p_minus - G5*p_minus;
p=0.5*(p_plus+p_minus);
J5q = localInnerProduct(p,p);
}
template <class Impl>
void CayleyFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
{
conformable(this->GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), this->FermionGrid());
Gamma G5(Gamma::Algebra::Gamma5);
// 4d field
int Ls = this->Ls;
PropagatorField psi(this->GaugeGrid());
PropagatorField p_plus (this->GaugeGrid());
PropagatorField p_minus(this->GaugeGrid());
PropagatorField p(this->GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2-1 , 0);
ExtractSlice(p_minus, q_in, Ls/2 , 0);
p_plus = p_plus + G5*p_plus;
p_minus= p_minus - G5*p_minus;
p=0.5*(p_plus+p_minus);
J5q = localInnerProduct(p,p);
}
#define Pp(Q) (0.5*(Q+g5*Q))
#define Pm(Q) (0.5*(Q-g5*Q))
#define Q_4d(Q) (Pm((Q)[0]) + Pp((Q)[Ls-1]))
#define TopRowWithSource(Q) (phys_src + (1.0-mass)*Q_4d(Q))
template <class Impl>
void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type,
unsigned int mu)
{
#ifndef GRID_NVCC
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
Gamma::Algebra::Gamma5
};
auto UGrid= this->GaugeGrid();
auto FGrid= this->FermionGrid();
RealD sgn=1.0;
if ( curr_type == Current::Axial ) sgn = -1.0;
int Ls = this->Ls;
std::vector<PropagatorField> L_Q(Ls,UGrid);
std::vector<PropagatorField> R_Q(Ls,UGrid);
for(int s=0;s<Ls;s++){
ExtractSlice(L_Q[s], q_in_1, s , 0);
ExtractSlice(R_Q[s], q_in_2, s , 0);
}
Gamma g5(Gamma::Algebra::Gamma5);
PropagatorField C(UGrid);
PropagatorField p5d(UGrid);
PropagatorField us_p5d(UGrid);
PropagatorField gp5d(UGrid);
PropagatorField gus_p5d(UGrid);
PropagatorField L_TmLsGq0(UGrid);
PropagatorField L_TmLsTmp(UGrid);
PropagatorField R_TmLsGq0(UGrid);
PropagatorField R_TmLsTmp(UGrid);
{
PropagatorField TermA(UGrid);
PropagatorField TermB(UGrid);
PropagatorField TermC(UGrid);
PropagatorField TermD(UGrid);
TermA = (Pp(Q_4d(L_Q)));
TermB = (Pm(Q_4d(L_Q)));
TermC = (Pm(TopRowWithSource(L_Q)));
TermD = (Pp(TopRowWithSource(L_Q)));
L_TmLsGq0 = (TermD - TermA + TermB);
L_TmLsTmp = (TermC - TermB + TermA);
TermA = (Pp(Q_4d(R_Q)));
TermB = (Pm(Q_4d(R_Q)));
TermC = (Pm(TopRowWithSource(R_Q)));
TermD = (Pp(TopRowWithSource(R_Q)));
R_TmLsGq0 = (TermD - TermA + TermB);
R_TmLsTmp = (TermC - TermB + TermA);
}
std::vector<PropagatorField> R_TmLsGq(Ls,UGrid);
std::vector<PropagatorField> L_TmLsGq(Ls,UGrid);
for(int s=0;s<Ls;s++){
R_TmLsGq[s] = (Pm((R_Q)[(s)]) + Pp((R_Q)[((s)-1+Ls)%Ls]));
L_TmLsGq[s] = (Pm((L_Q)[(s)]) + Pp((L_Q)[((s)-1+Ls)%Ls]));
}
Gamma gmu=Gamma(Gmu[mu]);
q_out = Zero();
PropagatorField tmp(UGrid);
for(int s=0;s<Ls;s++){
int sp = (s+1)%Ls;
int sr = Ls-1-s;
int srp= (sr+1)%Ls;
// Mobius parameters
auto b=this->bs[s];
auto c=this->cs[s];
auto bpc = 1.0/(b+c); // -0.5 factor in gauge links
if (s == 0) {
p5d =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp ));
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
} else if (s == Ls-1) {
p5d =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1]));
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
} else {
p5d =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr])+ b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp]));
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
}
tmp = Cshift(tmp,mu,1);
Impl::multLinkField(us_p5d,this->Umu,tmp,mu);
gp5d=g5*p5d*g5;
gus_p5d=gmu*us_p5d;
C = bpc*(adj(gp5d)*us_p5d);
C-= bpc*(adj(gp5d)*gus_p5d);
if (s == 0) {
p5d =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
tmp =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp ));
} else if (s == Ls-1) {
p5d =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
tmp =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1]));
} else {
p5d =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
tmp =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr]) + b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp]));
}
tmp = Cshift(tmp,mu,1);
Impl::multLinkField(us_p5d,this->Umu,tmp,mu);
gp5d=gmu*p5d;
gus_p5d=g5*us_p5d*g5;
C-= bpc*(adj(gus_p5d)*gp5d);
C-= bpc*(adj(gus_p5d)*p5d);
if (s < Ls/2) q_out += sgn*C;
else q_out += C;
}
#endif
}
template <class Impl>
void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
PropagatorField &phys_src,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &ph)// Complex phase factor
{
assert(mu>=0);
assert(mu<Nd);
int tshift = (mu == Nd-1) ? 1 : 0;
#if 0
////////////////////////////////////////////////
// SHAMIR CASE
////////////////////////////////////////////////
int Ls = this->Ls;
auto UGrid= this->GaugeGrid();
auto FGrid= this->FermionGrid();
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
Gamma gmu=Gamma(Gmu[mu]);
PropagatorField L_Q(UGrid);
PropagatorField R_Q(UGrid);
PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid);
LatticeInteger zz (UGrid); zz=0.0;
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
for (int s=0;s<Ls;s++) {
RealD G_s = (curr_type == Current::Axial ) ? ((s < Ls/2) ? -1 : 1) : 1;
ExtractSlice(R_Q, q_in, s , 0);
tmp = Cshift(R_Q,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = G_s*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
tmp = where((lcoor<=tmax),tmp,zz);
L_Q = tmp;
tmp = R_Q*ph;
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd);// Adjoint link
tmp = -G_s*( Utmp + gmu*Utmp );
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time
tmp = where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
L_Q= L_Q+tmp;
InsertSlice(L_Q, q_out, s , 0);
}
#endif
#ifndef GRID_NVCC
////////////////////////////////////////////////
// GENERAL CAYLEY CASE
////////////////////////////////////////////////
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT,
Gamma::Algebra::Gamma5
};
Gamma gmu=Gamma(Gmu[mu]);
Gamma g5(Gamma::Algebra::Gamma5);
int Ls = this->Ls;
auto UGrid= this->GaugeGrid();
auto FGrid= this->FermionGrid();
std::vector<PropagatorField> R_Q(Ls,UGrid);
PropagatorField L_Q(UGrid);
PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid);
LatticeInteger zz (UGrid); zz=0.0;
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
for(int s=0;s<Ls;s++){
ExtractSlice(R_Q[s], q_in, s , 0);
}
PropagatorField R_TmLsGq0(UGrid);
PropagatorField R_TmLsTmp(UGrid);
{
PropagatorField TermA(UGrid);
PropagatorField TermB(UGrid);
PropagatorField TermC(UGrid);
PropagatorField TermD(UGrid);
TermA = (Pp(Q_4d(R_Q)));
TermB = (Pm(Q_4d(R_Q)));
TermC = (Pm(TopRowWithSource(R_Q)));
TermD = (Pp(TopRowWithSource(R_Q)));
R_TmLsGq0 = (TermD - TermA + TermB);
R_TmLsTmp = (TermC - TermB + TermA);
}
std::vector<PropagatorField> R_TmLsGq(Ls,UGrid);
for(int s=0;s<Ls;s++){
R_TmLsGq[s] = (Pm((R_Q)[(s)]) + Pp((R_Q)[((s)-1+Ls)%Ls]));
}
std::vector<RealD> G_s(Ls,1.0);
if ( curr_type == Current::Axial ) {
for(int s=0;s<Ls/2;s++){
G_s[s] = -1.0;
}
}
for(int s=0;s<Ls;s++){
int sp = (s+1)%Ls;
int sr = Ls-1-s;
int srp= (sr+1)%Ls;
// Mobius parameters
auto b=this->bs[s];
auto c=this->cs[s];
// auto bpc = G_s[s]*1.0/(b+c); // -0.5 factor in gauge links
if (s == 0) {
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
} else if (s == Ls-1) {
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
} else {
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp]));
}
tmp = Cshift(tmp,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated
if (s == 0) {
tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1]));
} else if (s == Ls-1) {
tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp ));
} else {
tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp])+ c*Pm(R_TmLsGq[sp]));
}
tmp = tmp *ph;
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -G_s[s]*( Utmp + gmu*Utmp );
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time
L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
InsertSlice(L_Q, q_out, s , 0);
}
#endif
}
#undef Pp
#undef Pm
#undef Q_4d
#undef TopRowWithSource
#if 0 #if 0
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv, void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,

View File

@ -611,6 +611,7 @@ template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
@ -620,11 +621,12 @@ void ImprovedStaggeredFermion5D<Impl>::ContractConservedCurrent(PropagatorField
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in, void ImprovedStaggeredFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,
unsigned int tmax, unsigned int tmax,
ComplexField &lattice_cmplx) ComplexField &lattice_cmplx)
{ {
assert(0); assert(0);

View File

@ -600,6 +600,7 @@ template <class Impl>
void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
@ -609,6 +610,7 @@ void ImprovedStaggeredFermion<Impl>::ContractConservedCurrent(PropagatorField &q
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in, void ImprovedStaggeredFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,

View File

@ -132,14 +132,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Even, CloverTermEven, CloverTerm);
pickCheckerboard(Odd, CloverTermOdd, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
} }
template <class Impl> template <class Impl>

View File

@ -861,7 +861,6 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
* Conserved current utilities for Wilson fermions, for contracting propagators * Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current * to make a conserved current sink or inserting the conserved current
* sequentially. * sequentially.
******************************************************************************/
// Helper macro to reverse Simd vector. Fixme: slow, generic implementation. // Helper macro to reverse Simd vector. Fixme: slow, generic implementation.
#define REVERSE_LS(qSite, qSiteRev, Nsimd) \ #define REVERSE_LS(qSite, qSiteRev, Nsimd) \
@ -877,220 +876,10 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
merge(qSiteRev, qSiteVec); \ merge(qSiteRev, qSiteVec); \
} }
// psi = chiralProjectPlus(Result_s[Ls/2-1]); ******************************************************************************/
// psi+= chiralProjectMinus(Result_s[Ls/2]);
// PJ5q+=localInnerProduct(psi,psi);
template<class vobj>
Lattice<vobj> spProj5p(const Lattice<vobj> & in)
{
GridBase *grid=in.Grid();
Gamma G5(Gamma::Algebra::Gamma5);
Lattice<vobj> ret(grid);
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] + G5*in_v[ss];
});
return ret;
}
template<class vobj>
Lattice<vobj> spProj5m(const Lattice<vobj> & in)
{
Gamma G5(Gamma::Algebra::Gamma5);
GridBase *grid=in.Grid();
Lattice<vobj> ret(grid);
auto ret_v = ret.View();
auto in_v = in.View();
thread_for(ss,grid->oSites(),{
ret_v[ss] = in_v[ss] - G5*in_v[ss];
});
return ret;
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(FermionField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
FermionField psi(GaugeGrid());
FermionField p_plus (GaugeGrid());
FermionField p_minus(GaugeGrid());
FermionField p(GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2 , 0);
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
p_plus = spProj5p(p_plus );
p_minus= spProj5m(p_minus);
p=p_plus+p_minus;
J5q = localInnerProduct(p,p);
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractJ5q(PropagatorField &q_in,ComplexField &J5q)
{
conformable(GaugeGrid(), J5q.Grid());
conformable(q_in.Grid(), FermionGrid());
// 4d field
int Ls = this->Ls;
PropagatorField psi(GaugeGrid());
PropagatorField p_plus (GaugeGrid());
PropagatorField p_minus(GaugeGrid());
PropagatorField p(GaugeGrid());
ExtractSlice(p_plus , q_in, Ls/2 , 0);
ExtractSlice(p_minus, q_in, Ls/2-1 , 0);
p_plus = spProj5p(p_plus );
p_minus= spProj5m(p_minus);
p=p_plus+p_minus;
J5q = localInnerProduct(p,p);
}
template <class Impl>
void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2,
PropagatorField &q_out,
Current curr_type,
unsigned int mu)
{
conformable(q_in_1.Grid(), FermionGrid());
conformable(q_in_1.Grid(), q_in_2.Grid());
conformable(_FourDimGrid, q_out.Grid());
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = Zero();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
tmp1 = Cshift(q_in_1, mu + 1, 1);
tmp2 = Cshift(q_in_2, mu + 1, 1);
auto q_in_1_v = q_in_1.View();
auto q_in_2_v = q_in_2.View();
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_for(sU, Umu.Grid()->oSites(),{
unsigned int sF1 = sU * LLs;
unsigned int sF2 = (sU + 1) * LLs - 1;
for (unsigned int s = 0; s < LLs; ++s)
{
bool axial_sign = ((curr_type == Current::Axial) && \
(s < (LLs / 2)));
SitePropagator qSite2, qmuSite2;
// If vectorised in 5th dimension, reverse q2 vector to match up
// sites correctly.
if (Impl::LsVectorised)
{
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
}
else
{
qSite2 = q_in_2_v[sF2];
qmuSite2 = tmp2_v[sF2];
}
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
qSite2,
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
qmuSite2,
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
sF1++;
sF2--;
}
});
}
template <class Impl>
void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out,
Current curr_type,
unsigned int mu,
unsigned int tmin,
unsigned int tmax,
ComplexField &lattice_cmplx)
{
conformable(q_in.Grid(), FermionGrid());
conformable(q_in.Grid(), q_out.Grid());
PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid());
unsigned int tshift = (mu == Tp) ? 1 : 0;
unsigned int LLs = q_in.Grid()->_rdimensions[0];
unsigned int LLt = GridDefaultLatt()[Tp];
q_out = Zero();
LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp);
auto q_out_v = q_out.View();
auto tmp2_v = tmp2.View();
auto coords_v= coords.View();
auto Umu_v = Umu.View();
for (unsigned int s = 0; s < LLs; ++s)
{
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
bool tadpole_sign = (curr_type == Current::Tadpole);
bool switch_sgn = tadpole_sign || axial_sign;
//forward direction: Need q(x + mu, s)*A(x)
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
tmp = Cshift(tmp2, mu, 1); //q(x+mu,s)
tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x)
thread_for(sU, Umu.Grid()->oSites(),{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vPredicate t_mask;
t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0)
{
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, switch_sgn);
}
});
//backward direction: Need q(x - mu, s)*A(x-mu)
ExtractSlice(tmp2, q_in, s, 0); //q(x,s)
tmp = lattice_cmplx*tmp2; //q(x,s)*A(x)
tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s)
thread_for(sU, Umu.Grid()->oSites(),
{
vPredicate t_mask;
t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 ));
Integer timeSlices = Reduce(t_mask());
if (timeSlices > 0) {
unsigned int sF = sU * LLs + s;
Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU],
q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign);
}
});
}
}
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -459,6 +459,7 @@ template <class Impl>
void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1, void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
PropagatorField &q_in_2, PropagatorField &q_in_2,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu) unsigned int mu)
{ {
@ -466,6 +467,7 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_1.Grid());
conformable(_grid, q_in_2.Grid()); conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
#if 0
PropagatorField tmp1(_grid), tmp2(_grid); PropagatorField tmp1(_grid), tmp2(_grid);
q_out = Zero(); q_out = Zero();
@ -489,12 +491,15 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
q_out_v[sU], q_out_v[sU],
Umu_v, sU, mu); Umu_v, sU, mu);
}); });
#else
#endif
} }
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in, void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField &q_out, PropagatorField &q_out,
PropagatorField &src,
Current curr_type, Current curr_type,
unsigned int mu, unsigned int mu,
unsigned int tmin, unsigned int tmin,
@ -503,6 +508,7 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
{ {
conformable(_grid, q_in.Grid()); conformable(_grid, q_in.Grid());
conformable(_grid, q_out.Grid()); conformable(_grid, q_out.Grid());
#if 0
// Lattice<iSinglet<Simd>> ph(_grid), coor(_grid); // Lattice<iSinglet<Simd>> ph(_grid), coor(_grid);
Complex i(0.0,1.0); Complex i(0.0,1.0);
@ -556,6 +562,8 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
Umu_v, sU, mu, t_mask); Umu_v, sU, mu, t_mask);
} }
}); });
#else
#endif
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -444,19 +444,19 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
#ifndef GRID_NVCC #ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
#endif #endif
} else if( interior ) { } else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
#ifndef GRID_NVCC #ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
#endif #endif
} else if( exterior ) { } else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
#ifndef GRID_NVCC #ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
#endif #endif
} }
assert(0 && " Kernel optimisation case not covered "); assert(0 && " Kernel optimisation case not covered ");
@ -493,131 +493,5 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
assert(0 && " Kernel optimisation case not covered "); assert(0 && " Kernel optimisation case not covered ");
} }
/*******************************************************************************
* Conserved current utilities for Wilson fermions, for contracting propagators
* to make a conserved current sink or inserting the conserved current
* sequentially. Common to both 4D and 5D.
******************************************************************************/
// N.B. Functions below assume a -1/2 factor within U.
#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr))
#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr))
/*******************************************************************************
* Name: ContractConservedCurrentSiteFwd
* Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_1 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: ContractConservedCurrentSiteBwd
* Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in_2 shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLink(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in +ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
* Name: SeqConservedCurrentSiteFwd
* Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu]
* Notes: - DoubledGaugeField U assumed to contain -1/2 factor.
* - Pass in q_in shifted in -ve mu direction.
******************************************************************************/
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vPredicate t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLink(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CayleyFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1,38 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class DomainWallEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class MobiusEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1,39 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class PartialFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
Copyright (C) 2017
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,74 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h>
NAMESPACE_BEGIN(Grid);
// Move these
#include "impl.h"
// G-parity requires more specialised implementation.
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
assert(0);
}
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int mu,
unsigned int sU,
bool switch_sign)
{
assert(0);
}
HAND_SPECIALISE_GPARITY(IMPLEMENTATION);
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1,37 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonTMFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CayleyFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1,38 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class DomainWallEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class MobiusEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1,39 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class PartialFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
Copyright (C) 2017
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,74 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h>
NAMESPACE_BEGIN(Grid);
// Move these
#include "impl.h"
// G-parity requires more specialised implementation.
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
assert(0);
}
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int mu,
unsigned int sU,
bool switch_sign)
{
assert(0);
}
HAND_SPECIALISE_GPARITY(IMPLEMENTATION);
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1,37 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonTMFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CayleyFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1,38 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/DomainWallEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class DomainWallEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc
Copyright (C) 2017
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: David Murphy <dmurphy@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/MobiusEOFAFermion.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h>
#include <Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class MobiusEOFAFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1,39 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class PartialFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc
Copyright (C) 2017
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonCloverFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,40 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,74 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h>
NAMESPACE_BEGIN(Grid);
// Move these
#include "impl.h"
// G-parity requires more specialised implementation.
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
assert(0);
}
template <>
void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeFieldView &U,
unsigned int mu,
unsigned int sU,
bool switch_sign)
{
assert(0);
}
HAND_SPECIALISE_GPARITY(IMPLEMENTATION);
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1,37 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/WilsonTMFermion.h>
#include <Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonTMFermion<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1,44 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h>
#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h>
//#include <Grid/qcd/action/fermion/implementation/CayleyFermion5Dgpu.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CayleyFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1,38 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class ContinuedFractionFermion5D<IMPLEMENTATION>;
NAMESPACE_END(Grid);

Some files were not shown because too many files have changed in this diff Show More