1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 14:57:05 +01:00

Compare commits

..

127 Commits

Author SHA1 Message Date
a976fa6746 expose gauge group in GImpl and generic Nc fix 2021-10-05 14:19:47 +01:00
6c66b8d997 deflated guesser can optionally be used with less vectors than provided 2021-09-30 19:25:12 +01:00
9523ad3d73 vector version of Schur solver use vector guesser 2021-09-28 12:45:47 +01:00
73a95fa96f LinearFunction loops over vectors by default, can be overloaded 2021-09-28 12:44:26 +01:00
67e08aa952 New file not run yet 2021-09-23 23:39:55 +02:00
ed1f20f3a1 Merge pull request #367 from mmphys/bugfix/H5NS
Hdf5 namespace
2021-09-23 12:36:11 -04:00
cffc736bb3 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-22 06:03:06 -07:00
c0d56a1c04 Perlmutter tune up 2021-09-22 06:02:34 -07:00
3206f69478 SYCL happy 2021-09-21 18:01:35 -07:00
b2ccaad761 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-21 12:18:05 -07:00
8eb1232683 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-21 09:25:07 -07:00
c6ce3ad03b Some properties 2021-09-21 09:20:21 -07:00
b3b033d343 Clean 2021-09-21 09:18:54 -07:00
ca9816bfbb Typo 2021-09-21 04:12:04 +02:00
814d5abc7e Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-21 04:05:51 +02:00
a29122e2bf Rebench 2021-09-21 04:05:04 +02:00
e188c0512e Udpdate 2021-09-21 01:04:30 +02:00
1fb6aaf150 Device 2 Device with cudaMemcpy 2021-09-21 01:03:07 +02:00
894654f7ef Simplificatoin, always gather faces 2021-09-21 01:02:34 +02:00
109507888b Option to force use of MPI over Nvlink 2021-09-21 00:53:25 +02:00
68650b61fe Options controlling behaviour 2021-09-21 00:51:01 +02:00
7ee66bf453 Make sure H5NS has empty definition if HDF5 built without C++ namespace. Add comment in Hdf5IO.cc indicating likely source of error using H5NS, i.e. lack of --enable-cxx in hdf5 configure. 2021-09-19 19:45:20 +01:00
8bd70ad8b5 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-16 10:22:38 -07:00
af98525766 Merge pull request #359 from paboyle/feature/serialisation-update
Feature/serialisation update
2021-09-16 10:24:52 -04:00
1c2f218519 Merge pull request #360 from pjgeorg/ld-nvcc-openmp
nvcc: Add -fopenmp to LDFLAGS
2021-09-16 10:24:30 -04:00
c9aa1f507c Merge pull request #363 from felixerben/feature/testMesonField
Feature/test meson field
2021-09-16 10:23:58 -04:00
ea7126496d Merge pull request #361 from edbennett/fix-setdevice-message
make message about setdevice consistent with configure script
2021-09-16 10:23:37 -04:00
f660dc67e4 Merge pull request #366 from lehner/feature/gpt
Avx512 mixed prec
2021-09-15 20:27:13 -04:00
ede8faea74 Merge branch 'paboyle:develop' into feature/gpt 2021-09-16 02:23:15 +02:00
1b750761c2 Merge pull request #26 from waterret/feature/gpt
AVX512 drop mixed precision as well
2021-09-16 02:22:52 +02:00
145acf2919 Perf results 2021-09-16 01:06:28 +01:00
cc4a27b9e6 Scripts and performance 2021-09-16 00:15:35 +01:00
b4690e6091 Adding build basics for different systems 2021-09-16 00:00:38 +01:00
4b24800132 AVX512 drop mixed precision as well 2021-09-15 16:29:47 -04:00
9d2238148c Merge branch 'develop' of https://www.github.com/paboyle/Grid into develop 2021-09-15 19:25:57 +01:00
c15493218d Two extra routines to break out SchurRedBlack on many RHS into stages to allow efficient deflation & split grid
Split grid solver still to do.
2021-09-15 19:24:39 +01:00
001a556a34 Merge pull request #365 from lehner/feature/gpt
Sync
2021-09-15 13:34:02 -04:00
3d0f88e702 A64FX drop mixed precision as well 2021-09-15 18:38:32 +02:00
dd091d0960 consistent pointer offloading instead of views 2021-09-15 16:58:05 +02:00
e2abbf9520 Merge pull request #25 from paboyle/develop
Sync
2021-09-15 10:02:43 +02:00
c7baeb5bae Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-09-14 08:31:11 -07:00
402d80e197 Merge branch 'develop' of https://www.github.com/paboyle/Grid into develop 2021-09-14 16:16:06 +01:00
86e33c8ab2 Significant GPU perf speed up finished 2021-09-14 16:14:23 +01:00
5dae6a6dac Deprecate half prec comms 2021-09-14 15:06:59 +01:00
361bb8a101 Remove half prec comms 2021-09-14 15:06:29 +01:00
7efdb3cd2b Remove half prec comms 2021-09-14 15:06:06 +01:00
65ef4ec29f Move tables to device memory 2021-09-14 15:05:01 +01:00
d5835c0222 Switch to coalesced stencil face gather 2021-09-14 15:04:14 +01:00
a7b943b33e Remove half prec comms 2021-09-14 05:05:33 +01:00
7440cde92f No half prec comms; coalesced access on GPU 2021-09-14 05:04:56 +01:00
0fc662bb24 Dirac cuda 11.4 happy ; force host for functions accessing mult table
ET runs these on host BEFORE lodging result in AST for kernel
2021-09-14 05:00:44 +01:00
8195890640 Force MPI over NVLINK 2021-09-14 05:00:17 +01:00
4c88104a73 Fix compile warns 2021-09-11 23:08:05 +01:00
73b944c152 Drop half prec comms for now. 2021-09-11 23:07:18 +01:00
d1b0b7f5c6 Half prec comms dropping 2021-09-11 23:05:40 +01:00
381d8797d0 Drop half prec comms for now 2021-09-11 23:05:02 +01:00
b06526bc1e Comment update 2021-08-30 21:15:39 -04:00
3044419111 Some sample code 2021-08-30 20:32:11 -04:00
bcfa9cf068 Improvement of output 2021-08-28 08:08:15 -07:00
114920b8de Some example clean up 2021-08-25 12:24:17 +01:00
0d588b95f4 Bug fix to Example_Laplacian test 2021-08-23 23:14:26 +01:00
5b3c530aa7 Return value 2021-08-23 15:30:45 +01:00
c6a5499c8b Fail on non-apple 2021-08-22 18:40:55 +01:00
ec9c3fe77a Remove the file 2021-08-22 18:28:39 +01:00
6135ad530e Extra examples / solutions 2021-08-22 18:25:07 +01:00
40098424c7 Examples 2021-08-22 14:17:12 +01:00
7163b31a26 Examples 2021-08-20 01:15:23 +01:00
ffbdd91e0e Apple happiness 2021-08-20 01:15:00 +01:00
5d29e175d8 Typo fix 2021-08-10 18:25:43 +01:00
417dbfa257 Fix 2021-08-10 08:55:35 -07:00
1eda4d8e0b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-08-10 05:41:18 -07:00
50181f16e5 Level 0 IPC set up 2021-08-10 05:35:15 -07:00
75030637cc Improved comms benchmark, same as benchmark_comms_host_device 2021-08-10 05:16:30 -07:00
fe5aaf7677 Make comms benchmark same as Benchmark_comms_host_device 2021-08-09 04:06:30 -07:00
80ac2a73ca Check is wrong (HtoD / DtoH) 2021-08-05 18:33:20 -04:00
d75a66a3e6 test done 2021-07-06 11:42:36 +01:00
fcc4374d7b i/o done 2021-07-05 14:52:00 +01:00
67c3c16fe5 working test 2021-07-05 14:41:52 +01:00
25e9be50b5 created test file 2021-07-02 15:51:19 +01:00
323cf6c038 make message consistent with configure script 2021-06-23 17:00:43 +01:00
29a22ae603 Simpler SYCL setup 2021-06-22 17:57:20 +00:00
403bff1a47 Force reqd subgroup size fo SYCL 2021-06-22 17:56:10 +00:00
c50f27e68b Make FFT play nice with split grid 2021-06-20 11:34:38 +02:00
80afacec5b nvcc: Add -fopenmp to LDFLAGS 2021-06-17 13:05:13 +02:00
6cd9224dd7 SYCL comms buffer allocate 2021-06-16 17:10:55 +00:00
4bf8196ff1 Merge branch 'develop' of https://www.github.com/paboyle/Grid into develop 2021-06-15 21:45:36 +00:00
4c5440fb06 const happy for sycl 2021-06-15 21:45:07 +00:00
a269a3d919 Merge pull request #358 from mmphys/feature/serialisation-test
Add a ragged std::vector to the serialisation test
2021-06-09 10:16:25 +01:00
0c4f585496 Test nested std::vector<grid tensor> 2021-06-08 00:05:35 +01:00
33d2df46a0 Merge branch 'develop' into feature/serialisation-test
* develop:
  Update README.md
  removing Travis CI constantly failing due to overtime (no way we can compile Grid on free time anymore)
2021-06-07 23:25:38 +01:00
2df308f649 Add a ragged vector to the serialisation tests. NB: Already had nested (regular) std::vector<std::vector<...>> 2021-06-07 23:25:07 +01:00
92def28bd3 Update README.md 2021-06-06 04:52:05 -04:00
ca10bfa1c7 removing Travis CI constantly failing due to overtime (no way we can compile Grid on free time anymore) 2021-06-04 11:12:22 +01:00
298a6ec51e Merge pull request #357 from mmphys/bugfix/ragged
Bugfix/ragged Multi-dimensional ragged vectors
2021-06-04 10:34:46 +01:00
e5dbe488a6 Merge branch 'develop' into bugfix/ragged
* develop:
  Remove synch
2021-06-03 08:25:56 +01:00
0e27e3847d Remove synch 2021-06-03 04:24:19 +00:00
393727b93b Documentation update (briefly) covering serialisation changes. For review 2021-06-01 15:49:37 +01:00
2b1fcd78c3 Fixes post review with Peter: a) Correct bug in isRegularShape - detect 3d matrix where 1st slice is 2x2 and second slice is 2x1; b) Synchronisation of EigenResizeCounter done by checking we're the OMP primary thread; c) Move definition of EigenResizeCounter to new file, BaseIO.cc 2021-05-31 22:24:54 +01:00
0a4e0b49a0 BaseIO: Added "EigenResizeCounter" to keep track of any allocations/deallocations to Eigen tensors during readback. On read, if the tensor is resized, EigenResizeCounter += delta memory (in bytes) 2021-05-31 12:49:56 +01:00
76af169f05 Add global namespace to Writer<T> and Reader<T> inside GRID_SERIALIZABLE_CLASS_MEMBERS (so that "using Grid" not necessary).
Fix issue with output of Grid::iMatrix so that M<3>{{148,149,150,} {151,152,153,} {154155156}} becomes M<3>{{148,149,150} {151,152,153} {154,155,156}}
2021-05-31 08:43:02 +01:00
7b89232251 Extended HDF5 serialisation of std::vector<T> where T now also includes Grid scalar/vector/matrix
Changed VectorUtils element traits to is_flattenable, because: a) contract changed on what it does; and b) no other Grid dependencies on element. Needs review.
Initial tests work ... needs proper regression testing.
2021-05-30 20:27:53 +01:00
b5aeae526f Make Cshift fields static to avoid repeated reallocaate overhead 2021-05-28 16:33:08 +02:00
ef0ddd5d04 std::vector serialisation in hdf5 uses a different format if the vector is ragged. When reading back std::vector we need to check which format we're reading (since we don't know a priori) and this involves looking for attributes that may not exist. The c++ API: a) throws; and b) prints voluminous logging. Switched to non-throwing, non-logging, C version of the API after code review. 2021-05-24 18:43:55 +01:00
9b73dacf50 First row might still be ragged if multi dimensional. attrExists() doesn't throw, but easier to wrap in try ... catch than to explain in comment. 2021-05-22 04:34:32 +01:00
244b4aa07f Serialise std::vector of numeric types as multidimensional object if size is regular ... or individually if ragged 2021-05-21 20:08:56 +01:00
8cfc7342cd staggered hand unroll read coalesce 2021-05-05 14:17:18 -07:00
15ae317858 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-05-04 08:40:38 -07:00
834f536b5f Fastest option on SyCL is now std::complex 2021-05-04 08:40:18 -07:00
c332d9f08b Merge pull request #356 from felixerben/bugfix/stoutSmearing
Jamie's fix
2021-04-27 14:10:49 -04:00
cf2923d5dd Jamie's fix 2021-04-27 16:53:37 +01:00
0e4413ddde Merge pull request #355 from felixerben/bugfix/stoutSmearing
bugfix 3D stout smearing
2021-04-27 08:01:55 -04:00
009ccd581e bugfix 3D stout smearing 2021-04-26 10:36:33 +01:00
8cd4263974 Tests compile 2021-04-25 22:20:37 -04:00
d45c868656 Change interface 2021-04-25 10:53:34 -04:00
955a8113de Expose label only to reduce number of parameters 2021-04-25 10:36:38 -04:00
dbe210dd53 Open the ens_id 2021-04-25 10:25:59 -04:00
86e11743ca set twists 2021-04-20 10:19:11 -04:00
980e721f6e Update MetaData.h 2021-04-13 09:33:01 -04:00
e2a0142d87 Merge pull request #348 from AndrewYongZhenNing/develop
Conserved Tadpole Implementation for Shamir Action Only
2021-04-06 10:49:00 -04:00
895244ecc3 Merge with upstream; implemented conserved tadpole for Shamir action. 2021-04-06 13:46:33 +01:00
addeb621a7 Implemented tadpole operator for Shamir action. 2021-04-06 13:45:37 +01:00
a7fb25adf6 Make Cshift fields static to avoid repeated reallocaate overhead 2021-03-29 21:44:14 +02:00
e947992957 Improved force terms 2021-03-29 20:04:06 +02:00
bb89a82a07 Staggered coalseced read 2021-03-29 20:01:15 +02:00
2bb374daea hip-friendly 2021-03-19 11:33:23 +01:00
49ecbc81d4 Merge pull request #24 from ThomasWurm/feature/gpt
Put GlobalSum outside the slice loop in sliceSum
2021-03-08 16:01:47 +01:00
9e5fb52eb9 Put GlobalSum outside the slice loop 2021-03-08 13:53:34 +01:00
158 changed files with 5034 additions and 1422 deletions

1
.gitignore vendored
View File

@ -88,6 +88,7 @@ Thumbs.db
# build directory # # build directory #
################### ###################
build*/* build*/*
Documentation/_build
# IDE related files # # IDE related files #
##################### #####################

View File

@ -1,56 +0,0 @@
language: cpp
cache:
directories:
- clang
matrix:
include:
- os: osx
osx_image: xcode8.3
compiler: clang
before_install:
- export GRIDDIR=`pwd`
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]] && [ ! -e clang/bin ]; then wget $CLANG_LINK; tar -xf `basename $CLANG_LINK`; mkdir clang; mv clang+*/* clang/; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export PATH="${GRIDDIR}/clang/bin:${PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc openssl; fi
install:
- export CWD=`pwd`
- echo $CWD
- export CC=$CC$VERSION
- export CXX=$CXX$VERSION
- echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC
- $CC --version
- which $CXX
- $CXX --version
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export EXTRACONF='--with-openssl=/usr/local/opt/openssl'; fi
script:
- ./bootstrap.sh
- mkdir build
- cd build
- mkdir lime
- cd lime
- mkdir build
- cd build
- wget http://usqcd-software.github.io/downloads/c-lime/lime-1.3.2.tar.gz
- tar xf lime-1.3.2.tar.gz
- cd lime-1.3.2
- ./configure --prefix=$CWD/build/lime/install
- make -j4
- make install
- cd $CWD/build
- ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check

View File

@ -442,6 +442,8 @@ public:
for(int p=0; p<geom.npoint; p++) for(int p=0; p<geom.npoint; p++)
points[p] = geom.points_dagger[p]; points[p] = geom.points_dagger[p];
auto points_p = &points[0];
RealD* dag_factor_p = &dag_factor[0]; RealD* dag_factor_p = &dag_factor[0];
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
@ -453,7 +455,7 @@ public:
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<geom_v.npoint;p++){ for(int p=0;p<geom_v.npoint;p++){
int point = points[p]; int point = points_p[p];
SE=Stencil_v.GetEntry(ptype,point,ss); SE=Stencil_v.GetEntry(ptype,point,ss);
@ -708,6 +710,8 @@ public:
for(int p=0; p<npoint; p++) for(int p=0; p<npoint; p++)
points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p; points[p] = (dag && !hermitian) ? geom.points_dagger[p] : p;
auto points_p = &points[0];
Vector<Aview> AcceleratorViewContainer; Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead)); for(int p=0;p<npoint;p++) AcceleratorViewContainer.push_back(a[p].View(AcceleratorRead));
Aview *Aview_p = & AcceleratorViewContainer[0]; Aview *Aview_p = & AcceleratorViewContainer[0];
@ -728,7 +732,7 @@ public:
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<npoint;p++){ for(int p=0;p<npoint;p++){
int point = points[p]; int point = points_p[p];
SE=st_v.GetEntry(ptype,point,ss); SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) { if(SE->_is_local) {
@ -754,7 +758,7 @@ public:
StencilEntry *SE; StencilEntry *SE;
for(int p=0;p<npoint;p++){ for(int p=0;p<npoint;p++){
int point = points[p]; int point = points_p[p];
SE=st_v.GetEntry(ptype,point,ss); SE=st_v.GetEntry(ptype,point,ss);
if(SE->_is_local) { if(SE->_is_local) {

View File

@ -136,7 +136,7 @@ public:
flops=0; flops=0;
usec =0; usec =0;
Coordinate layout(Nd,1); Coordinate layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors); sgrid = new GridCartesian(dimensions,layout,processors,*grid);
}; };
~FFT ( void) { ~FFT ( void) {
@ -182,7 +182,7 @@ public:
pencil_gd[dim] = G*processors[dim]; pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node // Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors); GridCartesian pencil_g(pencil_gd,layout,processors,*vgrid);
// Construct pencils // Construct pencils
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;

View File

@ -530,6 +530,16 @@ public:
template<class Field> class LinearFunction { template<class Field> class LinearFunction {
public: public:
virtual void operator() (const Field &in, Field &out) = 0; virtual void operator() (const Field &in, Field &out) = 0;
virtual void operator() (const std::vector<Field> &in, std::vector<Field> &out)
{
assert(in.size() == out.size());
for (unsigned int i = 0; i < in.size(); ++i)
{
(*this)(in[i], out[i]);
}
}
}; };
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {

View File

@ -54,15 +54,23 @@ class DeflatedGuesser: public LinearFunction<Field> {
private: private:
const std::vector<Field> &evec; const std::vector<Field> &evec;
const std::vector<RealD> &eval; const std::vector<RealD> &eval;
const unsigned int N;
public: public:
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval)
: DeflatedGuesser(_evec, _eval, _evec.size())
{}
DeflatedGuesser(const std::vector<Field> & _evec, const std::vector<RealD> & _eval, const unsigned int _N)
: evec(_evec), eval(_eval), N(_N)
{
assert(evec.size()==eval.size());
assert(N <= evec.size());
}
virtual void operator()(const Field &src,Field &guess) { virtual void operator()(const Field &src,Field &guess) {
guess = Zero(); guess = Zero();
assert(evec.size()==eval.size());
auto N = evec.size();
for (int i=0;i<N;i++) { for (int i=0;i<N;i++) {
const Field& tmp = evec[i]; const Field& tmp = evec[i];
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);

View File

@ -132,6 +132,31 @@ namespace Grid {
(*this)(_Matrix,in,out,guess); (*this)(_Matrix,in,out,guess);
} }
void RedBlackSource(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &src_o)
{
GridBase *grid = _Matrix.RedBlackGrid();
Field tmp(grid);
int nblock = in.size();
for(int b=0;b<nblock;b++){
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
}
}
// James can write his own deflated guesser
// with optimised code for the inner products
// RedBlackSolveSplitGrid();
// RedBlackSolve(_Matrix,src_o,sol_o);
void RedBlackSolution(Matrix &_Matrix, const std::vector<Field> &in, const std::vector<Field> &sol_o, std::vector<Field> &out)
{
GridBase *grid = _Matrix.RedBlackGrid();
Field tmp(grid);
int nblock = in.size();
for(int b=0;b<nblock;b++) {
pickCheckerboard(Even,tmp,in[b]);
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
}
}
template<class Guesser> template<class Guesser>
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess) void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
{ {
@ -150,24 +175,29 @@ namespace Grid {
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Prepare RedBlack source // Prepare RedBlack source
//////////////////////////////////////////////// ////////////////////////////////////////////////
for(int b=0;b<nblock;b++){ RedBlackSource(_Matrix,in,src_o);
RedBlackSource(_Matrix,in[b],tmp,src_o[b]); // for(int b=0;b<nblock;b++){
} // RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
// }
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Make the guesses // Make the guesses
//////////////////////////////////////////////// ////////////////////////////////////////////////
if ( subGuess ) guess_save.resize(nblock,grid); if ( subGuess ) guess_save.resize(nblock,grid);
for(int b=0;b<nblock;b++){
if(useSolnAsInitGuess) { if(useSolnAsInitGuess) {
for(int b=0;b<nblock;b++){
pickCheckerboard(Odd, sol_o[b], out[b]); pickCheckerboard(Odd, sol_o[b], out[b]);
} else {
guess(src_o[b],sol_o[b]);
} }
} else {
guess(src_o, sol_o);
}
if ( subGuess ) { if ( subGuess ) {
guess_save[b] = sol_o[b]; for(int b=0;b<nblock;b++){
} guess_save[b] = sol_o[b];
}
} }
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
// Call the block solver // Call the block solver

View File

@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
bool Stencil_force_mpi = true;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Info that is setup once and indept of cartesian layout // Info that is setup once and indept of cartesian layout
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////

View File

@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
extern bool Stencil_force_mpi ;
class CartesianCommunicator : public SharedMemory { class CartesianCommunicator : public SharedMemory {
public: public:

View File

@ -370,7 +370,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
double off_node_bytes=0.0; double off_node_bytes=0.0;
int tag; int tag;
if ( gfrom ==MPI_UNDEFINED) { if ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+from*32; tag= dir+from*32;
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq); ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,tag,communicator_halo[commdir],&rrq);
assert(ierr==0); assert(ierr==0);
@ -378,12 +378,18 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
off_node_bytes+=bytes; off_node_bytes+=bytes;
} }
if ( gdest == MPI_UNDEFINED ) { if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32; tag= dir+_processor*32;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
assert(ierr==0); assert(ierr==0);
list.push_back(xrq); list.push_back(xrq);
off_node_bytes+=bytes; off_node_bytes+=bytes;
} else {
// TODO : make a OMP loop on CPU, call threaded bcopy
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,bytes);
acceleratorCopySynchronise(); // MPI prob slower
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {

View File

@ -35,6 +35,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
#endif #endif
#ifdef GRID_HIP #ifdef GRID_HIP
#include <hip/hip_runtime_api.h> #include <hip/hip_runtime_api.h>
#endif
#ifdef GRID_SYCl
#endif #endif
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
@ -70,6 +73,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
WorldNodes = WorldSize/WorldShmSize; WorldNodes = WorldSize/WorldShmSize;
assert( (WorldNodes * WorldShmSize) == WorldSize ); assert( (WorldNodes * WorldShmSize) == WorldSize );
// FIXME: Check all WorldShmSize are the same ? // FIXME: Check all WorldShmSize are the same ?
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -446,7 +450,47 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended // Hugetlbfs mapping intended
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
#if defined(GRID_CUDA) ||defined(GRID_HIP) #if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL)
//if defined(GRID_SYCL)
#if 0
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the pointer array for shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE);
}
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
SharedMemoryZero(ShmCommBuf,bytes);
assert(WorldShmSize == 1);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = ShmCommBuf;
}
_ShmAllocBytes=bytes;
_ShmAlloc=1;
}
#endif
#if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
void * ShmCommBuf ; void * ShmCommBuf ;
@ -470,18 +514,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Each MPI rank should allocate our own buffer // Each MPI rank should allocate our own buffer
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes); ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) { if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl; std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
// if ( WorldRank == 0 ){ if ( WorldRank == 0 ){
if ( 1 ){
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
} }
SharedMemoryZero(ShmCommBuf,bytes); SharedMemoryZero(ShmCommBuf,bytes);
std::cout<< "Setting up IPC"<<std::endl;
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node // Loop over ranks/gpu's on our node
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -491,6 +533,29 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////// //////////////////////////////////////////////////
// If it is me, pass around the IPC access key // If it is me, pass around the IPC access key
////////////////////////////////////////////////// //////////////////////////////////////////////////
void * thisBuf = ShmCommBuf;
if(!Stencil_force_mpi) {
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
typedef struct { int fd; pid_t pid ; } clone_mem_t;
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
ze_ipc_mem_handle_t ihandle;
clone_mem_t handle;
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&ihandle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
memcpy((void *)&handle.fd,(void *)&ihandle,sizeof(int));
handle.pid = getpid();
}
#endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
cudaIpcMemHandle_t handle; cudaIpcMemHandle_t handle;
if ( r==WorldShmRank ) { if ( r==WorldShmRank ) {
@ -511,6 +576,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
} }
} }
#endif #endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Share this IPC handle across the Shm Comm // Share this IPC handle across the Shm Comm
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -526,7 +592,35 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// If I am not the source, overwrite thisBuf with remote buffer // If I am not the source, overwrite thisBuf with remote buffer
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void * thisBuf = ShmCommBuf;
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
std::cout<<"mapping seeking remote pid/fd "
<<handle.pid<<"/"
<<handle.fd<<std::endl;
int pidfd = syscall(SYS_pidfd_open,handle.pid,0);
std::cout<<"Using IpcHandle pidfd "<<pidfd<<"\n";
// int myfd = syscall(SYS_pidfd_getfd,pidfd,handle.fd,0);
int myfd = syscall(438,pidfd,handle.fd,0);
std::cout<<"Using IpcHandle myfd "<<myfd<<"\n";
memcpy((void *)&ihandle,(void *)&myfd,sizeof(int));
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,ihandle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cout << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<std::endl;
std::cout << "SharedMemoryMPI.cc zeMemOpenIpcHandle pointer is "<<std::hex<<thisBuf<<std::dec<<std::endl;
}
assert(thisBuf!=nullptr);
}
#endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
if ( r!=WorldShmRank ) { if ( r!=WorldShmRank ) {
auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
@ -548,6 +642,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Save a copy of the device buffers // Save a copy of the device buffers
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
}
WorldShmCommBufs[r] = thisBuf; WorldShmCommBufs[r] = thisBuf;
#else #else
WorldShmCommBufs[r] = ShmCommBuf; WorldShmCommBufs[r] = ShmCommBuf;
@ -557,6 +652,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
_ShmAllocBytes=bytes; _ShmAllocBytes=bytes;
_ShmAlloc=1; _ShmAlloc=1;
} }
#endif
#else #else
#ifdef GRID_MPI3_SHMMMAP #ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
@ -727,16 +824,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes) void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
{ {
#ifdef GRID_CUDA #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
cudaMemset(dest,0,bytes); acceleratorMemSet(dest,0,bytes);
#else #else
bzero(dest,bytes); bzero(dest,bytes);
#endif #endif
} }
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{ {
#ifdef GRID_CUDA #if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
cudaMemcpy(dest,src,bytes,cudaMemcpyDefault); acceleratorCopyToDevice(src,dest,bytes);
#else #else
bcopy(src,dest,bytes); bcopy(src,dest,bytes);
#endif #endif
@ -800,7 +897,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
} }
#endif #endif
SharedMemoryTest(); //SharedMemoryTest();
} }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// On node barrier // On node barrier

View File

@ -122,8 +122,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
assert(shift<fd); assert(shift<fd);
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
cshiftVector<vobj> send_buf(buffer_size); static cshiftVector<vobj> send_buf; send_buf.resize(buffer_size);
cshiftVector<vobj> recv_buf(buffer_size); static cshiftVector<vobj> recv_buf; recv_buf.resize(buffer_size);
int cb= (cbmask==0x2)? Odd : Even; int cb= (cbmask==0x2)? Odd : Even;
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
@ -198,8 +198,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
// int words = sizeof(vobj)/sizeof(vector_type); // int words = sizeof(vobj)/sizeof(vector_type);
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd); static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd); static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
scalar_object * recv_buf_extract_mpi; scalar_object * recv_buf_extract_mpi;
scalar_object * send_buf_extract_mpi; scalar_object * send_buf_extract_mpi;
@ -294,8 +294,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
assert(shift<fd); assert(shift<fd);
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
cshiftVector<vobj> send_buf_v(buffer_size); static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
cshiftVector<vobj> recv_buf_v(buffer_size); static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
vobj *send_buf; vobj *send_buf;
vobj *recv_buf; vobj *recv_buf;
{ {
@ -381,8 +381,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
// int words = sizeof(vobj)/sizeof(vector_type); // int words = sizeof(vobj)/sizeof(vector_type);
std::vector<cshiftVector<scalar_object> > send_buf_extract(Nsimd); static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
std::vector<cshiftVector<scalar_object> > recv_buf_extract(Nsimd); static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
scalar_object * recv_buf_extract_mpi; scalar_object * recv_buf_extract_mpi;
scalar_object * send_buf_extract_mpi; scalar_object * send_buf_extract_mpi;
{ {

View File

@ -225,7 +225,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
autoView( x_v , x, AcceleratorRead); autoView( x_v , x, AcceleratorRead);
autoView( y_v , y, AcceleratorRead); autoView( y_v , y, AcceleratorRead);
accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ accelerator_for(ss,x_v.size(),vobj::Nsimd(),{
auto tmp = a*x_v(ss)+y_v(ss); auto tmp = a*coalescedRead(x_v[ss])+coalescedRead(y_v[ss]);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);
}); });
} }

View File

@ -125,7 +125,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
for(int k=k0; k<k1; ++k){ for(int k=k0; k<k1; ++k){
auto tmp = coalescedRead(Bp[ss*nrot+j]); auto tmp = coalescedRead(Bp[ss*nrot+j]);
coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_v[k][sss])); coalescedWrite(Bp[ss*nrot+j],tmp+ Qt_p[jj*Nm+k] * coalescedRead(basis_vp[k][sss]));
} }
}); });
@ -134,7 +134,7 @@ void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm)
int jj =j0+j; int jj =j0+j;
int ss =sj/nrot; int ss =sj/nrot;
int sss=ss+s; int sss=ss+s;
coalescedWrite(basis_v[jj][sss],coalescedRead(Bp[ss*nrot+j])); coalescedWrite(basis_vp[jj][sss],coalescedRead(Bp[ss*nrot+j]));
}); });
} }
#endif #endif

View File

@ -361,6 +361,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
// But easily avoided by using double precision fields // But easily avoided by using double precision fields
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_object::scalar_type scalar_type;
GridBase *grid = Data.Grid(); GridBase *grid = Data.Grid();
assert(grid!=NULL); assert(grid!=NULL);
@ -419,20 +420,19 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
} }
// sum over nodes. // sum over nodes.
sobj gsum;
for(int t=0;t<fd;t++){ for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane int pt = t/ld; // processor plane
int lt = t%ld; int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) { if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt]; result[t]=lsSum[lt];
} else { } else {
gsum=Zero(); result[t]=Zero();
} }
grid->GlobalSum(gsum);
result[t]=gsum;
} }
scalar_type * ptr = (scalar_type *) &result[0];
int words = fd*sizeof(sobj)/sizeof(scalar_type);
grid->GlobalSumVector(ptr, words);
} }
template<class vobj> template<class vobj>

View File

@ -364,16 +364,22 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
autoView( coarseData_ , coarseData, AcceleratorWrite); autoView( coarseData_ , coarseData, AcceleratorWrite);
autoView( fineData_ , fineData, AcceleratorRead); autoView( fineData_ , fineData, AcceleratorRead);
auto coarseData_p = &coarseData_[0];
auto fineData_p = &fineData_[0];
Coordinate fine_rdimensions = fine->_rdimensions; Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions; Coordinate coarse_rdimensions = coarse->_rdimensions;
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{ accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block // One thread per sub block
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
coarseData_[sc]=Zero();
vobj cd = zz;
for(int sb=0;sb<blockVol;sb++){ for(int sb=0;sb<blockVol;sb++){
int sf; int sf;
@ -383,9 +389,11 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d]; for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions); Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
coarseData_[sc]=coarseData_[sc]+fineData_[sf]; cd=cd+fineData_p[sf];
} }
coarseData_p[sc] = cd;
}); });
return; return;
} }

View File

@ -128,7 +128,7 @@ inline void MachineCharacteristics(FieldMetaData &header)
std::time_t t = std::time(nullptr); std::time_t t = std::time(nullptr);
std::tm tm_ = *std::localtime(&t); std::tm tm_ = *std::localtime(&t);
std::ostringstream oss; std::ostringstream oss;
// oss << std::put_time(&tm_, "%c %Z"); oss << std::put_time(&tm_, "%c %Z");
header.creation_date = oss.str(); header.creation_date = oss.str();
header.archive_date = header.creation_date; header.archive_date = header.creation_date;

View File

@ -205,11 +205,20 @@ public:
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
} }
// Preferred interface
template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file,
std::string ens_label = std::string("DWF"))
{
writeConfiguration(Umu,file,0,1,ens_label);
}
template<class GaugeStats=PeriodicGaugeStatistics> template<class GaugeStats=PeriodicGaugeStatistics>
static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu, static inline void writeConfiguration(Lattice<vLorentzColourMatrixD > &Umu,
std::string file, std::string file,
int two_row, int two_row,
int bits32) int bits32,
std::string ens_label = std::string("DWF"))
{ {
typedef vLorentzColourMatrixD vobj; typedef vLorentzColourMatrixD vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -219,8 +228,8 @@ public:
// Following should become arguments // Following should become arguments
/////////////////////////////////////////// ///////////////////////////////////////////
header.sequence_number = 1; header.sequence_number = 1;
header.ensemble_id = "UKQCD"; header.ensemble_id = std::string("UKQCD");
header.ensemble_label = "DWF"; header.ensemble_label = ens_label;
typedef LorentzColourMatrixD fobj3D; typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D; typedef LorentzColour2x3D fobj2D;
@ -232,7 +241,7 @@ public:
GaugeStats Stats; Stats(Umu,header); GaugeStats Stats; Stats(Umu,header);
MachineCharacteristics(header); MachineCharacteristics(header);
uint64_t offset; uint64_t offset;
// Sod it -- always write 3x3 double // Sod it -- always write 3x3 double
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");

View File

@ -115,9 +115,9 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD; typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL; //typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH; //typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF; //typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
@ -158,41 +158,41 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL; //typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH; //typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF; //typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR; typedef DomainWallEOFAFermion<WilsonImplR> DomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF; typedef DomainWallEOFAFermion<WilsonImplF> DomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD; typedef DomainWallEOFAFermion<WilsonImplD> DomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL; //typedef DomainWallEOFAFermion<WilsonImplRL> DomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH; //typedef DomainWallEOFAFermion<WilsonImplFH> DomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF; //typedef DomainWallEOFAFermion<WilsonImplDF> DomainWallEOFAFermionDF;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL; //typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH; //typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF; //typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR; typedef MobiusEOFAFermion<WilsonImplR> MobiusEOFAFermionR;
typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF; typedef MobiusEOFAFermion<WilsonImplF> MobiusEOFAFermionF;
typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD; typedef MobiusEOFAFermion<WilsonImplD> MobiusEOFAFermionD;
typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL; //typedef MobiusEOFAFermion<WilsonImplRL> MobiusEOFAFermionRL;
typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH; //typedef MobiusEOFAFermion<WilsonImplFH> MobiusEOFAFermionFH;
typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF; //typedef MobiusEOFAFermion<WilsonImplDF> MobiusEOFAFermionDF;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL; //typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH; //typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF; //typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised // Ls vectorised
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
@ -235,49 +235,49 @@ typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF; typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD; typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL; //typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH; //typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF; //typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL; //typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH; //typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF; //typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR; typedef DomainWallEOFAFermion<GparityWilsonImplR> GparityDomainWallEOFAFermionR;
typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF; typedef DomainWallEOFAFermion<GparityWilsonImplF> GparityDomainWallEOFAFermionF;
typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD; typedef DomainWallEOFAFermion<GparityWilsonImplD> GparityDomainWallEOFAFermionD;
typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL; //typedef DomainWallEOFAFermion<GparityWilsonImplRL> GparityDomainWallEOFAFermionRL;
typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH; //typedef DomainWallEOFAFermion<GparityWilsonImplFH> GparityDomainWallEOFAFermionFH;
typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF; //typedef DomainWallEOFAFermion<GparityWilsonImplDF> GparityDomainWallEOFAFermionDF;
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL; //typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH; //typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF; //typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL; //typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH; //typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF; //typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR; typedef MobiusEOFAFermion<GparityWilsonImplR> GparityMobiusEOFAFermionR;
typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF; typedef MobiusEOFAFermion<GparityWilsonImplF> GparityMobiusEOFAFermionF;
typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD; typedef MobiusEOFAFermion<GparityWilsonImplD> GparityMobiusEOFAFermionD;
typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL; //typedef MobiusEOFAFermion<GparityWilsonImplRL> GparityMobiusEOFAFermionRL;
typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH; //typedef MobiusEOFAFermion<GparityWilsonImplFH> GparityMobiusEOFAFermionFH;
typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF; //typedef MobiusEOFAFermion<GparityWilsonImplDF> GparityMobiusEOFAFermionDF;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
@ -291,12 +291,6 @@ typedef ImprovedStaggeredFermion5D<StaggeredImplR> ImprovedStaggeredFermion5DR;
typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D<StaggeredImplF> ImprovedStaggeredFermion5DF;
typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD; typedef ImprovedStaggeredFermion5D<StaggeredImplD> ImprovedStaggeredFermion5DD;
#ifndef GRID_CUDA
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplR> ImprovedStaggeredFermionVec5dR;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplF> ImprovedStaggeredFermionVec5dF;
typedef ImprovedStaggeredFermion5D<StaggeredVec5dImplD> ImprovedStaggeredFermionVec5dD;
#endif
NAMESPACE_END(Grid); NAMESPACE_END(Grid);
//////////////////// ////////////////////

View File

@ -183,7 +183,8 @@ NAMESPACE_CHECK(ImplStaggered);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index. 5d vec // Single flavour one component spinors with colour index. 5d vec
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h> // Deprecate Vec5d
NAMESPACE_CHECK(ImplStaggered5dVec); //#include <Grid/qcd/action/fermion/StaggeredVec5dImpl.h>
//NAMESPACE_CHECK(ImplStaggered5dVec);

View File

@ -327,8 +327,8 @@ typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> Gparit
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec //typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float //typedef GparityWilsonImpl<vComplexF, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double //typedef GparityWilsonImpl<vComplexD, FundamentalRepresentation,CoeffRealHalfComms> GparityWilsonImplDF; // Double
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -72,19 +72,23 @@ public:
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
static accelerator_inline void multLink(SiteSpinor &phi, template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteSpinor &chi, const _Spinor &chi,
int mu) int mu)
{ {
mult(&phi(), &U(mu), &chi()); auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
} }
static accelerator_inline void multLinkAdd(SiteSpinor &phi, template<class _Spinor>
static accelerator_inline void multLinkAdd(_Spinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteSpinor &chi, const _Spinor &chi,
int mu) int mu)
{ {
mac(&phi(), &U(mu), &chi()); auto UU = coalescedRead(U(mu));
mac(&phi(), &UU, &chi());
} }
template <class ref> template <class ref>

View File

@ -68,11 +68,12 @@ public:
/*****************************************************/ /*****************************************************/
/* Compress includes precision change if mpi data is not same */ /* Compress includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
template<class _SiteHalfSpinor, class _SiteSpinor> accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { typedef decltype(coalescedRead(buf)) sobj;
_SiteHalfSpinor tmp; sobj sp;
projector::Proj(tmp,in,mu,dag); auto sin = coalescedRead(in);
vstream(buf[o],tmp); projector::Proj(sp,sin,mu,dag);
coalescedWrite(buf,sp);
} }
/*****************************************************/ /*****************************************************/
@ -82,13 +83,18 @@ public:
const SiteHalfSpinor * __restrict__ vp0, const SiteHalfSpinor * __restrict__ vp0,
const SiteHalfSpinor * __restrict__ vp1, const SiteHalfSpinor * __restrict__ vp1,
Integer type,Integer o) const { Integer type,Integer o) const {
#ifdef GRID_SIMT
exchangeSIMT(mp[2*o],mp[2*o+1],vp0[o],vp1[o],type);
#else
SiteHalfSpinor tmp1; SiteHalfSpinor tmp1;
SiteHalfSpinor tmp2; SiteHalfSpinor tmp2;
exchange(tmp1,tmp2,vp0[o],vp1[o],type); exchange(tmp1,tmp2,vp0[o],vp1[o],type);
vstream(mp[2*o ],tmp1); vstream(mp[2*o ],tmp1);
vstream(mp[2*o+1],tmp2); vstream(mp[2*o+1],tmp2);
#endif
} }
/*****************************************************/ /*****************************************************/
/* Have a decompression step if mpi data is not same */ /* Have a decompression step if mpi data is not same */
/*****************************************************/ /*****************************************************/
@ -105,6 +111,28 @@ public:
const SiteSpinor * __restrict__ in, const SiteSpinor * __restrict__ in,
Integer j,Integer k, Integer m,Integer type) const Integer j,Integer k, Integer m,Integer type) const
{ {
#ifdef GRID_SIMT
typedef SiteSpinor vobj;
typedef SiteHalfSpinor hvobj;
typedef decltype(coalescedRead(*in)) sobj;
typedef decltype(coalescedRead(*out0)) hsobj;
unsigned int Nsimd = vobj::Nsimd();
unsigned int mask = Nsimd >> (type + 1);
int lane = acceleratorSIMTlane(Nsimd);
int j0 = lane &(~mask); // inner coor zero
int j1 = lane |(mask) ; // inner coor one
const vobj *vp0 = &in[k];
const vobj *vp1 = &in[m];
const vobj *vp = (lane&mask) ? vp1:vp0;
auto sa = coalescedRead(*vp,j0);
auto sb = coalescedRead(*vp,j1);
hsobj psa, psb;
projector::Proj(psa,sa,mu,dag);
projector::Proj(psb,sb,mu,dag);
coalescedWrite(out0[j],psa);
coalescedWrite(out1[j],psb);
#else
SiteHalfSpinor temp1, temp2; SiteHalfSpinor temp1, temp2;
SiteHalfSpinor temp3, temp4; SiteHalfSpinor temp3, temp4;
projector::Proj(temp1,in[k],mu,dag); projector::Proj(temp1,in[k],mu,dag);
@ -112,6 +140,7 @@ public:
exchange(temp3,temp4,temp1,temp2,type); exchange(temp3,temp4,temp1,temp2,type);
vstream(out0[j],temp3); vstream(out0[j],temp3);
vstream(out1[j],temp4); vstream(out1[j],temp4);
#endif
} }
/*****************************************************/ /*****************************************************/
@ -121,6 +150,7 @@ public:
}; };
#if 0
template<class _HCspinor,class _Hspinor,class _Spinor, class projector> template<class _HCspinor,class _Hspinor,class _Spinor, class projector>
class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector,
typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type > typename std::enable_if<!std::is_same<_HCspinor,_Hspinor>::value>::type >
@ -149,13 +179,23 @@ public:
/*****************************************************/ /*****************************************************/
/* Compress includes precision change if mpi data is not same */ /* Compress includes precision change if mpi data is not same */
/*****************************************************/ /*****************************************************/
template<class _SiteHalfSpinor, class _SiteSpinor> accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) const { SiteHalfSpinor hsp;
_SiteHalfSpinor hsp;
SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf;
projector::Proj(hsp,in,mu,dag); projector::Proj(hsp,in,mu,dag);
precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw);
} }
accelerator_inline void Compress(SiteHalfSpinor &buf,const SiteSpinor &in) const {
#ifdef GRID_SIMT
typedef decltype(coalescedRead(buf)) sobj;
sobj sp;
auto sin = coalescedRead(in);
projector::Proj(sp,sin,mu,dag);
coalescedWrite(buf,sp);
#else
projector::Proj(buf,in,mu,dag);
#endif
}
/*****************************************************/ /*****************************************************/
/* Exchange includes precision change if mpi data is not same */ /* Exchange includes precision change if mpi data is not same */
@ -203,6 +243,7 @@ public:
accelerator_inline bool DecompressionStep(void) const { return true; } accelerator_inline bool DecompressionStep(void) const { return true; }
}; };
#endif
#define DECLARE_PROJ(Projector,Compressor,spProj) \ #define DECLARE_PROJ(Projector,Compressor,spProj) \
class Projector { \ class Projector { \
@ -253,33 +294,8 @@ public:
typedef typename Base::View_type View_type; typedef typename Base::View_type View_type;
typedef typename Base::StencilVector StencilVector; typedef typename Base::StencilVector StencilVector;
double timer0; void ZeroCountersi(void) { }
double timer1; void Reporti(int calls) { }
double timer2;
double timer3;
double timer4;
double timer5;
double timer6;
uint64_t callsi;
void ZeroCountersi(void)
{
timer0=0;
timer1=0;
timer2=0;
timer3=0;
timer4=0;
timer5=0;
timer6=0;
callsi=0;
}
void Reporti(int calls)
{
if ( timer0 ) std::cout << GridLogMessage << " timer0 (HaloGatherOpt) " <<timer0/calls <<std::endl;
if ( timer1 ) std::cout << GridLogMessage << " timer1 (Communicate) " <<timer1/calls <<std::endl;
if ( timer2 ) std::cout << GridLogMessage << " timer2 (CommsMerge ) " <<timer2/calls <<std::endl;
if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " <<timer3/calls <<std::endl;
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
}
std::vector<int> surface_list; std::vector<int> surface_list;
@ -321,26 +337,18 @@ public:
{ {
std::vector<std::vector<CommsRequest_t> > reqs; std::vector<std::vector<CommsRequest_t> > reqs;
this->HaloExchangeOptGather(source,compress); this->HaloExchangeOptGather(source,compress);
double t1=usecond();
// Asynchronous MPI calls multidirectional, Isend etc... // Asynchronous MPI calls multidirectional, Isend etc...
// Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways.
this->Communicate(); this->Communicate();
double t2=usecond(); timer1 += t2-t1;
this->CommsMerge(compress); this->CommsMerge(compress);
double t3=usecond(); timer2 += t3-t2;
this->CommsMergeSHM(compress); this->CommsMergeSHM(compress);
double t4=usecond(); timer3 += t4-t3;
} }
template <class compressor> template <class compressor>
void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress) void HaloExchangeOptGather(const Lattice<vobj> &source,compressor &compress)
{ {
this->Prepare(); this->Prepare();
double t0=usecond();
this->HaloGatherOpt(source,compress); this->HaloGatherOpt(source,compress);
double t1=usecond();
timer0 += t1-t0;
callsi++;
} }
template <class compressor> template <class compressor>
@ -352,12 +360,9 @@ public:
typedef typename compressor::SiteHalfSpinor SiteHalfSpinor; typedef typename compressor::SiteHalfSpinor SiteHalfSpinor;
typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor; typedef typename compressor::SiteHalfCommSpinor SiteHalfCommSpinor;
this->mpi3synctime_g-=usecond();
this->_grid->StencilBarrier(); this->_grid->StencilBarrier();
this->mpi3synctime_g+=usecond();
assert(source.Grid()==this->_grid); assert(source.Grid()==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0; this->u_comm_offset=0;
@ -393,7 +398,6 @@ public:
} }
this->face_table_computed=1; this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size); assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
accelerator_barrier(); accelerator_barrier();
} }

View File

@ -184,18 +184,22 @@ public:
mat = TraceIndex<SpinIndex>(P); mat = TraceIndex<SpinIndex>(P);
} }
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){ inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
{
for (int mu = 0; mu < Nd; mu++) for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu); mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu)
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ {
#undef USE_OLD_INSERT_FORCE
int Ls=Btilde.Grid()->_fdimensions[0]; int Ls=Btilde.Grid()->_fdimensions[0];
autoView( mat_v , mat, AcceleratorWrite);
#ifdef USE_OLD_INSERT_FORCE
GaugeLinkField tmp(mat.Grid()); GaugeLinkField tmp(mat.Grid());
tmp = Zero(); tmp = Zero();
{ {
const int Nsimd = SiteSpinor::Nsimd();
autoView( tmp_v , tmp, AcceleratorWrite); autoView( tmp_v , tmp, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead); autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead); autoView( Atilde_v , Atilde, AcceleratorRead);
@ -208,6 +212,29 @@ public:
}); });
} }
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
#else
{
const int Nsimd = SiteSpinor::Nsimd();
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
auto op = outerProduct(bb,aa);
sum = sum + op;
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
#endif
} }
}; };
@ -216,17 +243,17 @@ typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec //typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec //typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float //typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double //typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float

View File

@ -880,17 +880,29 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
} }
std::vector<RealD> G_s(Ls,1.0); std::vector<RealD> G_s(Ls,1.0);
RealD sign = 1; // sign flip for vector/tadpole
if ( curr_type == Current::Axial ) { if ( curr_type == Current::Axial ) {
for(int s=0;s<Ls/2;s++){ for(int s=0;s<Ls/2;s++){
G_s[s] = -1.0; G_s[s] = -1.0;
} }
} }
else if ( curr_type == Current::Tadpole ) {
auto b=this->_b;
auto c=this->_c;
if ( b == 1 && c == 0 ) {
sign = -1;
}
else {
std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
assert(b==1 && c==0);
}
}
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sp = (s+1)%Ls; int sp = (s+1)%Ls;
int sr = Ls-1-s; // int sr = Ls-1-s;
int srp= (sr+1)%Ls; // int srp= (sr+1)%Ls;
// Mobius parameters // Mobius parameters
auto b=this->bs[s]; auto b=this->bs[s];
@ -907,7 +919,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = Cshift(tmp,mu,1); tmp = Cshift(tmp,mu,1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu); Impl::multLinkField(Utmp,this->Umu,tmp,mu);
tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop tmp = sign*G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop
tmp = where((lcoor>=tmin),tmp,zz); // Mask the time tmp = where((lcoor>=tmin),tmp,zz); // Mask the time
L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated

View File

@ -680,7 +680,8 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilView &st,
gauge2 =(uint64_t)&UU[sU]( Z ); \ gauge2 =(uint64_t)&UU[sU]( Z ); \
gauge3 =(uint64_t)&UU[sU]( T ); gauge3 =(uint64_t)&UU[sU]( T );
#undef STAG_VEC5D
#ifdef STAG_VEC5D
// This is the single precision 5th direction vectorised kernel // This is the single precision 5th direction vectorised kernel
#include <Grid/simd/Intel512single.h> #include <Grid/simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st, template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilView &st,
@ -790,7 +791,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilView
#endif #endif
} }
#endif
#define PERMUTE_DIR3 __asm__ ( \ #define PERMUTE_DIR3 __asm__ ( \

View File

@ -32,25 +32,50 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#define LOAD_CHI(b) \ #ifdef GRID_SIMT
#define LOAD_CHI(ptype,b) \
const SiteSpinor & ref (b[offset]); \
Chi_0=coalescedReadPermute<ptype>(ref()()(0),perm,lane); \
Chi_1=coalescedReadPermute<ptype>(ref()()(1),perm,lane); \
Chi_2=coalescedReadPermute<ptype>(ref()()(2),perm,lane);
#define LOAD_CHI_COMMS(b) \
const SiteSpinor & ref (b[offset]); \ const SiteSpinor & ref (b[offset]); \
Chi_0=ref()()(0);\ Chi_0=coalescedRead(ref()()(0),lane); \
Chi_1=ref()()(1);\ Chi_1=coalescedRead(ref()()(1),lane); \
Chi_2=ref()()(2); Chi_2=coalescedRead(ref()()(2),lane);
#define PERMUTE_DIR(dir) ;
#else
#define LOAD_CHI(ptype,b) LOAD_CHI_COMMS(b)
#define LOAD_CHI_COMMS(b) \
const SiteSpinor & ref (b[offset]); \
Chi_0=ref()()(0); \
Chi_1=ref()()(1); \
Chi_2=ref()()(2);
#define PERMUTE_DIR(dir) \
permute##dir(Chi_0,Chi_0); \
permute##dir(Chi_1,Chi_1); \
permute##dir(Chi_2,Chi_2);
#endif
// To splat or not to splat depends on the implementation // To splat or not to splat depends on the implementation
#define MULT(A,UChi) \ #define MULT(A,UChi) \
auto & ref(U[sU](A)); \ auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \ U_00=coalescedRead(ref()(0,0),lane); \
Impl::loadLinkElement(U_10,ref()(1,0)); \ U_10=coalescedRead(ref()(1,0),lane); \
Impl::loadLinkElement(U_20,ref()(2,0)); \ U_20=coalescedRead(ref()(2,0),lane); \
Impl::loadLinkElement(U_01,ref()(0,1)); \ U_01=coalescedRead(ref()(0,1),lane); \
Impl::loadLinkElement(U_11,ref()(1,1)); \ U_11=coalescedRead(ref()(1,1),lane); \
Impl::loadLinkElement(U_21,ref()(2,1)); \ U_21=coalescedRead(ref()(2,1),lane); \
Impl::loadLinkElement(U_02,ref()(0,2)); \ U_02=coalescedRead(ref()(0,2),lane); \
Impl::loadLinkElement(U_12,ref()(1,2)); \ U_12=coalescedRead(ref()(1,2),lane); \
Impl::loadLinkElement(U_22,ref()(2,2)); \ U_22=coalescedRead(ref()(2,2),lane); \
UChi ## _0 = U_00*Chi_0; \ UChi ## _0 = U_00*Chi_0; \
UChi ## _1 = U_10*Chi_0;\ UChi ## _1 = U_10*Chi_0;\
UChi ## _2 = U_20*Chi_0;\ UChi ## _2 = U_20*Chi_0;\
@ -63,15 +88,15 @@ NAMESPACE_BEGIN(Grid);
#define MULT_ADD(U,A,UChi) \ #define MULT_ADD(U,A,UChi) \
auto & ref(U[sU](A)); \ auto & ref(U[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \ U_00=coalescedRead(ref()(0,0),lane); \
Impl::loadLinkElement(U_10,ref()(1,0)); \ U_10=coalescedRead(ref()(1,0),lane); \
Impl::loadLinkElement(U_20,ref()(2,0)); \ U_20=coalescedRead(ref()(2,0),lane); \
Impl::loadLinkElement(U_01,ref()(0,1)); \ U_01=coalescedRead(ref()(0,1),lane); \
Impl::loadLinkElement(U_11,ref()(1,1)); \ U_11=coalescedRead(ref()(1,1),lane); \
Impl::loadLinkElement(U_21,ref()(2,1)); \ U_21=coalescedRead(ref()(2,1),lane); \
Impl::loadLinkElement(U_02,ref()(0,2)); \ U_02=coalescedRead(ref()(0,2),lane); \
Impl::loadLinkElement(U_12,ref()(1,2)); \ U_12=coalescedRead(ref()(1,2),lane); \
Impl::loadLinkElement(U_22,ref()(2,2)); \ U_22=coalescedRead(ref()(2,2),lane); \
UChi ## _0 += U_00*Chi_0; \ UChi ## _0 += U_00*Chi_0; \
UChi ## _1 += U_10*Chi_0;\ UChi ## _1 += U_10*Chi_0;\
UChi ## _2 += U_20*Chi_0;\ UChi ## _2 += U_20*Chi_0;\
@ -83,24 +108,18 @@ NAMESPACE_BEGIN(Grid);
UChi ## _2 += U_22*Chi_2; UChi ## _2 += U_22*Chi_2;
#define PERMUTE_DIR(dir) \
permute##dir(Chi_0,Chi_0); \
permute##dir(Chi_1,Chi_1); \
permute##dir(Chi_2,Chi_2);
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \ #define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
SE=st.GetEntry(ptype,Dir+skew,sF); \ SE=st.GetEntry(ptype,Dir+skew,sF); \
offset = SE->_offset; \ offset = SE->_offset; \
local = SE->_is_local; \ local = SE->_is_local; \
perm = SE->_permute; \ perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHI(in); \ LOAD_CHI(Perm,in); \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(Perm); \ PERMUTE_DIR(Perm); \
} \ } \
} else { \ } else { \
LOAD_CHI(buf); \ LOAD_CHI_COMMS(buf); \
} }
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \ #define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
@ -116,19 +135,18 @@ NAMESPACE_BEGIN(Grid);
} }
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \ #define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
SE=st.GetEntry(ptype,Dir+skew,sF); \ SE=st.GetEntry(ptype,Dir+skew,sF); \
offset = SE->_offset; \ offset = SE->_offset; \
local = SE->_is_local; \ local = SE->_is_local; \
perm = SE->_permute; \ perm = SE->_permute; \
if ( local ) { \ if ( local ) { \
LOAD_CHI(in); \ LOAD_CHI(Perm,in); \
if ( perm) { \ if ( perm) { \
PERMUTE_DIR(Perm); \ PERMUTE_DIR(Perm); \
} \ } \
} else if ( st.same_node[Dir] ) { \ } else if ( st.same_node[Dir] ) { \
LOAD_CHI(buf); \ LOAD_CHI_COMMS(buf); \
} \ } \
if (local || st.same_node[Dir] ) { \ if (local || st.same_node[Dir] ) { \
MULT_ADD(U,Dir,even); \ MULT_ADD(U,Dir,even); \
@ -140,10 +158,32 @@ NAMESPACE_BEGIN(Grid);
local = SE->_is_local; \ local = SE->_is_local; \
if ((!local) && (!st.same_node[Dir]) ) { \ if ((!local) && (!st.same_node[Dir]) ) { \
nmu++; \ nmu++; \
{ LOAD_CHI(buf); } \ { LOAD_CHI_COMMS(buf); } \
{ MULT_ADD(U,Dir,even); } \ { MULT_ADD(U,Dir,even); } \
} }
#define HAND_DECLARATIONS(Simd) \
Simd even_0; \
Simd even_1; \
Simd even_2; \
Simd odd_0; \
Simd odd_1; \
Simd odd_2; \
\
Simd Chi_0; \
Simd Chi_1; \
Simd Chi_2; \
\
Simd U_00; \
Simd U_10; \
Simd U_20; \
Simd U_01; \
Simd U_11; \
Simd U_21; \
Simd U_02; \
Simd U_12; \
Simd U_22;
template <class Impl> template <class Impl>
template <int Naik> accelerator_inline template <int Naik> accelerator_inline
@ -155,28 +195,14 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
Simd even_0; // 12 regs on knc
Simd even_1;
Simd even_2;
Simd odd_0; // 12 regs on knc
Simd odd_1;
Simd odd_2;
Simd Chi_0; // two spinor; 6 regs const int Nsimd = SiteHalfSpinor::Nsimd();
Simd Chi_1; const int lane=acceleratorSIMTlane(Nsimd);
Simd Chi_2; typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
HAND_DECLARATIONS(Simt);
Simd U_00; // two rows of U matrix
Simd U_10;
Simd U_20;
Simd U_01;
Simd U_11;
Simd U_21; // 2 reg left.
Simd U_02;
Simd U_12;
Simd U_22;
SiteSpinor result; typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
calcSiteSpinor result;
int offset,local,perm, ptype; int offset,local,perm, ptype;
StencilEntry *SE; StencilEntry *SE;
@ -215,7 +241,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
result()()(1) = even_1 + odd_1; result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2; result()()(2) = even_2 + odd_2;
} }
vstream(out[sF],result); coalescedWrite(out[sF],result);
} }
} }
@ -230,28 +256,13 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
Simd even_0; // 12 regs on knc const int Nsimd = SiteHalfSpinor::Nsimd();
Simd even_1; const int lane=acceleratorSIMTlane(Nsimd);
Simd even_2; typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
Simd odd_0; // 12 regs on knc HAND_DECLARATIONS(Simt);
Simd odd_1;
Simd odd_2;
Simd Chi_0; // two spinor; 6 regs typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
Simd Chi_1; calcSiteSpinor result;
Simd Chi_2;
Simd U_00; // two rows of U matrix
Simd U_10;
Simd U_20;
Simd U_01;
Simd U_11;
Simd U_21; // 2 reg left.
Simd U_02;
Simd U_12;
Simd U_22;
SiteSpinor result;
int offset, ptype, local, perm; int offset, ptype, local, perm;
StencilEntry *SE; StencilEntry *SE;
@ -261,8 +272,8 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
// int sF=s+LLs*sU; // int sF=s+LLs*sU;
{ {
even_0 = Zero(); even_1 = Zero(); even_2 = Zero(); zeroit(even_0); zeroit(even_1); zeroit(even_2);
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero(); zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
skew = 0; skew = 0;
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even); HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
@ -294,7 +305,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
result()()(1) = even_1 + odd_1; result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2; result()()(2) = even_2 + odd_2;
} }
vstream(out[sF],result); coalescedWrite(out[sF],result);
} }
} }
@ -309,28 +320,13 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
Simd even_0; // 12 regs on knc const int Nsimd = SiteHalfSpinor::Nsimd();
Simd even_1; const int lane=acceleratorSIMTlane(Nsimd);
Simd even_2; typedef decltype( coalescedRead( in[0]()()(0) )) Simt;
Simd odd_0; // 12 regs on knc HAND_DECLARATIONS(Simt);
Simd odd_1;
Simd odd_2;
Simd Chi_0; // two spinor; 6 regs typedef decltype( coalescedRead( in[0] )) calcSiteSpinor;
Simd Chi_1; calcSiteSpinor result;
Simd Chi_2;
Simd U_00; // two rows of U matrix
Simd U_10;
Simd U_20;
Simd U_01;
Simd U_11;
Simd U_21; // 2 reg left.
Simd U_02;
Simd U_12;
Simd U_22;
SiteSpinor result;
int offset, ptype, local; int offset, ptype, local;
StencilEntry *SE; StencilEntry *SE;
@ -340,8 +336,8 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
// int sF=s+LLs*sU; // int sF=s+LLs*sU;
{ {
even_0 = Zero(); even_1 = Zero(); even_2 = Zero(); zeroit(even_0); zeroit(even_1); zeroit(even_2);
odd_0 = Zero(); odd_1 = Zero(); odd_2 = Zero(); zeroit(odd_0); zeroit(odd_1); zeroit(odd_2);
int nmu=0; int nmu=0;
skew = 0; skew = 0;
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even); HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
@ -374,7 +370,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
result()()(1) = even_1 + odd_1; result()()(1) = even_1 + odd_1;
result()()(2) = even_2 + odd_2; result()()(2) = even_2 + odd_2;
} }
out[sF] = out[sF] + result; coalescedWrite(out[sF] , out(sF)+ result);
} }
} }
} }
@ -397,6 +393,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
const FermionFieldView &in, FermionFieldView &out, int dag); \ const FermionFieldView &in, FermionFieldView &out, int dag); \
*/ */
#undef LOAD_CHI #undef LOAD_CHI
#undef HAND_DECLARATIONS
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid);
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \ if (SE->_is_local ) { \
if (SE->_permute) { \ int perm= SE->_permute; \
chi_p = &chi; \ chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in[SE->_offset]; \
} \
} else { \ } else { \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
} \ } \
multLink(Uchi, U[sU], *chi_p, Dir); acceleratorSynchronise(); \
multLink(Uchi, U[sU], chi, Dir);
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \ if (SE->_is_local ) { \
if (SE->_permute) { \ int perm= SE->_permute; \
chi_p = &chi; \ chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in[SE->_offset]; \
} \
} else if ( st.same_node[Dir] ) { \ } else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
} \ } \
if (SE->_is_local || st.same_node[Dir] ) { \ if (SE->_is_local || st.same_node[Dir] ) { \
multLink(Uchi, U[sU], *chi_p, Dir); \ multLink(Uchi, U[sU], chi, Dir); \
} }
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
nmu++; \ nmu++; \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
multLink(Uchi, U[sU], *chi_p, Dir); \ multLink(Uchi, U[sU], chi, Dir); \
} }
template <class Impl> template <class Impl>
@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out, int dag) const FermionFieldView &in, FermionFieldView &out, int dag)
{ {
const SiteSpinor *chi_p; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor chi; calcSpinor chi;
SiteSpinor Uchi; calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int skew; int skew;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// //
@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
if ( dag ) { if ( dag ) {
Uchi = - Uchi; Uchi = - Uchi;
} }
vstream(out[sF], Uchi); coalescedWrite(out[sF], Uchi,lane);
} }
}; };
@ -130,13 +125,16 @@ template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) { const FermionFieldView &in, FermionFieldView &out,int dag)
const SiteSpinor *chi_p; {
SiteSpinor chi; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor Uchi; calcSpinor chi;
calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int skew ; int skew ;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// int sF=LLs*sU+s; // int sF=LLs*sU+s;
@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
if ( dag ) { if ( dag ) {
Uchi = - Uchi; Uchi = - Uchi;
} }
vstream(out[sF], Uchi); coalescedWrite(out[sF], Uchi,lane);
} }
}; };
@ -178,14 +176,17 @@ template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) { const FermionFieldView &in, FermionFieldView &out,int dag)
const SiteSpinor *chi_p; {
// SiteSpinor chi; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor Uchi; calcSpinor chi;
calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int nmu=0; int nmu=0;
int skew ; int skew ;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// int sF=LLs*sU+s; // int sF=LLs*sU+s;
@ -211,11 +212,12 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
} }
if ( nmu ) { if ( nmu ) {
if ( dag ) { auto _out = coalescedRead(out[sF],lane);
out[sF] = out[sF] - Uchi; if ( dag ) {
coalescedWrite(out[sF], _out-Uchi,lane);
} else { } else {
out[sF] = out[sF] + Uchi; coalescedWrite(out[sF], _out+Uchi,lane);
} }
} }
} }
@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
GridBase *FGrid=in.Grid(); GridBase *FGrid=in.Grid();
GridBase *UGrid=U.Grid(); GridBase *UGrid=U.Grid();
typedef StaggeredKernels<Impl> ThisKernel; typedef StaggeredKernels<Impl> ThisKernel;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
autoView( UUU_v , UUU, AcceleratorRead); autoView( UUU_v , UUU, AcceleratorRead);
autoView( U_v , U, AcceleratorRead); autoView( U_v , U, AcceleratorRead);
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
GridBase *FGrid=in.Grid(); GridBase *FGrid=in.Grid();
GridBase *UGrid=U.Grid(); GridBase *UGrid=U.Grid();
typedef StaggeredKernels<Impl> ThisKernel; typedef StaggeredKernels<Impl> ThisKernel;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
autoView( UUU_v , U, AcceleratorRead); autoView( UUU_v , U, AcceleratorRead);
autoView( U_v , U, AcceleratorRead); autoView( U_v , U, AcceleratorRead);
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);

View File

@ -73,17 +73,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -102,17 +102,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -131,17 +131,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
@ -165,17 +165,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -194,17 +194,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -223,17 +223,17 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") //#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
@ -280,17 +280,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -309,17 +309,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -338,17 +338,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
@ -371,17 +371,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -400,17 +400,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -429,17 +429,17 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>
#pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2") // #pragma GCC optimize ("-O3", "-fno-schedule-insns", "-fno-schedule-insns2")
template<> void // template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, // WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h> // #include <qcd/action/fermion/implementation/WilsonKernelsAsmBodyA64FX.h>

View File

@ -74,15 +74,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -97,15 +97,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
@ -121,15 +121,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single // XYZT vectorised, dag Kernel, single
@ -148,15 +148,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -171,15 +171,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -194,15 +194,15 @@ WilsonKernels<ZWilsonImplF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM #undef MAYBEPERM
#undef MULT_2SPIN #undef MULT_2SPIN
@ -228,14 +228,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSite(StencilView &st, DoubledGaugeF
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -249,14 +249,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteInt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -273,15 +273,15 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteExt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
//
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single // Ls vectorised, dag Kernel, single
@ -299,14 +299,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDag(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -320,14 +320,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagInt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -341,14 +341,14 @@ WilsonKernels<ZDomainWallVec5dImplF>::AsmDhopSiteDagExt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplFH>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D #endif // VEC 5D
@ -392,14 +392,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -413,14 +413,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -434,14 +434,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// XYZT vectorised, dag Kernel, single // XYZT vectorised, dag Kernel, single
@ -459,14 +459,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldVi
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -480,14 +480,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -501,14 +501,14 @@ WilsonKernels<ZWilsonImplD>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFiel
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<WilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZWilsonImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef MAYBEPERM #undef MAYBEPERM
#undef MULT_2SPIN #undef MULT_2SPIN
@ -533,14 +533,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSite(StencilView &st, DoubledGaugeF
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -554,14 +554,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteInt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -577,14 +577,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteExt(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Ls vectorised, dag Kernel, single // Ls vectorised, dag Kernel, single
@ -602,14 +602,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDag(StencilView &st, DoubledGau
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#define INTERIOR #define INTERIOR
@ -623,14 +623,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagInt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#undef INTERIOR_AND_EXTERIOR #undef INTERIOR_AND_EXTERIOR
#undef INTERIOR #undef INTERIOR
@ -645,14 +645,14 @@ WilsonKernels<ZDomainWallVec5dImplD>::AsmDhopSiteDagExt(StencilView &st, Doubled
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<DomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
template<> void //template<> void
WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, //WilsonKernels<ZDomainWallVec5dImplDF>::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) // int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h> //#include <qcd/action/fermion/implementation/WilsonKernelsAsmBody.h>
#endif // VEC 5D #endif // VEC 5D

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION GparityWilsonImplDF

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonKernelsInstantiationGparity.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION GparityWilsonImplFH

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION WilsonImplDF

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonCloverFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermionInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
../WilsonTMFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION WilsonImplFH

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1,51 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015, 2020
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid);

View File

@ -1 +0,0 @@
#define IMPLEMENTATION ZWilsonImplDF

View File

@ -1 +0,0 @@
../CayleyFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../ContinuedFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../DomainWallEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../MobiusEOFAFermionInstantiation.cc.master

View File

@ -1 +0,0 @@
../PartialFractionFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
../WilsonFermion5DInstantiation.cc.master

View File

@ -1 +0,0 @@
#define IMPLEMENTATION ZWilsonImplFH

View File

@ -9,8 +9,6 @@ STAG5_IMPL_LIST=""
WILSON_IMPL_LIST=" \ WILSON_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
WilsonImplFH \
WilsonImplDF \
WilsonAdjImplF \ WilsonAdjImplF \
WilsonAdjImplD \ WilsonAdjImplD \
WilsonTwoIndexSymmetricImplF \ WilsonTwoIndexSymmetricImplF \
@ -18,26 +16,17 @@ WILSON_IMPL_LIST=" \
WilsonTwoIndexAntiSymmetricImplF \ WilsonTwoIndexAntiSymmetricImplF \
WilsonTwoIndexAntiSymmetricImplD \ WilsonTwoIndexAntiSymmetricImplD \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD \ GparityWilsonImplD "
GparityWilsonImplFH \
GparityWilsonImplDF"
DWF_IMPL_LIST=" \ DWF_IMPL_LIST=" \
WilsonImplF \ WilsonImplF \
WilsonImplD \ WilsonImplD \
WilsonImplFH \
WilsonImplDF \
ZWilsonImplF \ ZWilsonImplF \
ZWilsonImplD \ ZWilsonImplD "
ZWilsonImplFH \
ZWilsonImplDF "
GDWF_IMPL_LIST=" \ GDWF_IMPL_LIST=" \
GparityWilsonImplF \ GparityWilsonImplF \
GparityWilsonImplD \ GparityWilsonImplD "
GparityWilsonImplFH \
GparityWilsonImplDF"
IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST" IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"

View File

@ -78,6 +78,8 @@ public:
typedef Lattice<SiteLink> LinkField; typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field; typedef Lattice<SiteField> Field;
typedef SU<Nrepresentation> Group;
// Guido: we can probably separate the types from the HMC functions // Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations // this will create 2 kind of implementations
// probably confusing the users // probably confusing the users
@ -118,7 +120,7 @@ public:
LinkField Pmu(P.Grid()); LinkField Pmu(P.Grid());
Pmu = Zero(); Pmu = Zero();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ; RealD scale = ::sqrt(HMC_MOMENTUM_DENOMINATOR) ;
Pmu = Pmu*scale; Pmu = Pmu*scale;
PokeIndex<LorentzIndex>(P, Pmu, mu); PokeIndex<LorentzIndex>(P, Pmu, mu);
@ -159,15 +161,15 @@ public:
} }
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U); Group::HotConfiguration(pRNG, U);
} }
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::TepidConfiguration(pRNG, U); Group::TepidConfiguration(pRNG, U);
} }
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::ColdConfiguration(pRNG, U); Group::ColdConfiguration(pRNG, U);
} }
}; };

View File

@ -85,21 +85,18 @@ public:
std::cout << GridLogDebug << "Stout smearing started\n"; std::cout << GridLogDebug << "Stout smearing started\n";
// Smear the configurations // C contains the staples multiplied by some rho
u_smr = U ; // set the smeared field to the current gauge field
SmearBase->smear(C, U); SmearBase->smear(C, U);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
if( mu == OrthogDim ) if( mu == OrthogDim ) continue ;
tmp = 1.0; // Don't smear in the orthogonal direction // u_smr = exp(iQ_mu)*U_mu apart from Orthogdim
else { Umu = peekLorentz(U, mu);
tmp = peekLorentz(C, mu); tmp = peekLorentz(C, mu);
Umu = peekLorentz(U, mu); iq_mu = Ta( tmp * adj(Umu));
iq_mu = Ta( exponentiate_iQ(tmp, iq_mu);
tmp * pokeLorentz(u_smr, tmp * Umu, mu);
adj(Umu)); // iq_mu = Ta(Omega_mu) to match the signs with the paper
exponentiate_iQ(tmp, iq_mu);
}
pokeLorentz(u_smr, tmp * Umu, mu); // u_smr = exp(iQ_mu)*U_mu
} }
std::cout << GridLogDebug << "Stout smearing completed\n"; std::cout << GridLogDebug << "Stout smearing completed\n";
}; };

View File

@ -40,7 +40,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
// Dirac algebra adjoint operator (not in to overload other adj) // Dirac algebra adjoint operator (not in to overload other adj)
accelerator_inline Gamma adj(const Gamma &g) inline Gamma adj(const Gamma &g)
{ {
return Gamma (Gamma::adj[g.g]); return Gamma (Gamma::adj[g.g]);
} }
@ -48,7 +48,7 @@ accelerator_inline Gamma adj(const Gamma &g)
// Dirac algebra mutliplication operator // Dirac algebra mutliplication operator
accelerator_inline Gamma operator*(const Gamma &g1, const Gamma &g2) inline Gamma operator*(const Gamma &g1, const Gamma &g2)
{ {
return Gamma (Gamma::mul[g1.g][g2.g]); return Gamma (Gamma::mul[g1.g][g2.g]);
} }

View File

@ -2,14 +2,11 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc Source file: ./lib/serialisation/BaseIO.h
Copyright (C) 2015, 2020 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Michael Marshall <michael.marshall@ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,27 +22,14 @@ You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h>
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h>
#ifndef AVX512 #include <Grid/GridCore.h>
#ifndef QPX
#ifndef A64FX
#ifndef A64FXFIXEDSIZE
#include <Grid/qcd/action/fermion/implementation/WilsonKernelsAsmImplementation.h>
#endif
#endif
#endif
#endif
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid)
#include "impl.h" std::uint64_t EigenIO::EigenResizeCounter(0);
template class WilsonKernels<IMPLEMENTATION>;
NAMESPACE_END(Grid); NAMESPACE_END(Grid)

View File

@ -9,6 +9,7 @@
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -30,6 +31,7 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H #ifndef GRID_SERIALISATION_ABSTRACT_READER_H
#define GRID_SERIALISATION_ABSTRACT_READER_H #define GRID_SERIALISATION_ABSTRACT_READER_H
#include <atomic>
#include <type_traits> #include <type_traits>
#include <Grid/tensors/Tensors.h> #include <Grid/tensors/Tensors.h>
#include <Grid/serialisation/VectorUtils.h> #include <Grid/serialisation/VectorUtils.h>
@ -110,6 +112,10 @@ namespace Grid {
template <typename ET> template <typename ET>
inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); } getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
// Counter for resized EigenTensors (poor man's substitute for allocator)
// Defined in BinaryIO.cc
extern std::uint64_t EigenResizeCounter;
} }
// Abstract writer/reader classes //////////////////////////////////////////// // Abstract writer/reader classes ////////////////////////////////////////////
@ -497,8 +503,14 @@ namespace Grid {
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims ) Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
{ {
#ifdef GRID_OMP
// The memory counter is the reason this must be done from the primary thread
assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread");
#endif
EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
//t.reshape( dims ); //t.reshape( dims );
t.resize( dims ); t.resize( dims );
EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
} }
template <typename T> template <typename T>

View File

@ -1,8 +1,39 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace Grid; using namespace Grid;
#ifndef H5_NO_NAMESPACE #ifndef H5_NO_NAMESPACE
using namespace H5NS; using namespace H5NS; // Compile error here? Try adding --enable-cxx to hdf5 configure
#endif #endif
// Writer implementation /////////////////////////////////////////////////////// // Writer implementation ///////////////////////////////////////////////////////

View File

@ -1,3 +1,34 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ed.ac.uk>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SERIALISATION_HDF5_H #ifndef GRID_SERIALISATION_HDF5_H
#define GRID_SERIALISATION_HDF5_H #define GRID_SERIALISATION_HDF5_H
@ -9,10 +40,6 @@
#include <Grid/tensors/Tensors.h> #include <Grid/tensors/Tensors.h>
#include "Hdf5Type.h" #include "Hdf5Type.h"
#ifndef H5_NO_NAMESPACE
#define H5NS H5
#endif
// default thresold above which datasets are used instead of attributes // default thresold above which datasets are used instead of attributes
#ifndef HDF5_DEF_DATASET_THRES #ifndef HDF5_DEF_DATASET_THRES
#define HDF5_DEF_DATASET_THRES 6u #define HDF5_DEF_DATASET_THRES 6u
@ -34,11 +61,13 @@ namespace Grid
template <typename U> template <typename U>
void writeDefault(const std::string &s, const U &x); void writeDefault(const std::string &s, const U &x);
template <typename U> template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type void writeRagged(const std::string &s, const std::vector<U> &x);
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
writeDefault(const std::string &s, const std::vector<U> &x); writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
writeDefault(const std::string &s, const std::vector<U> &x); writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
template <typename U> template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements); void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
H5NS::Group & getGroup(void); H5NS::Group & getGroup(void);
@ -64,11 +93,13 @@ namespace Grid
template <typename U> template <typename U>
void readDefault(const std::string &s, U &output); void readDefault(const std::string &s, U &output);
template <typename U> template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type void readRagged(const std::string &s, std::vector<U> &x);
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
readDefault(const std::string &s, std::vector<U> &x); readDefault(const std::string &s, std::vector<U> &x);
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
readDefault(const std::string &s, std::vector<U> &x); readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
template <typename U> template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim); void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
H5NS::Group & getGroup(void); H5NS::Group & getGroup(void);
@ -176,24 +207,30 @@ namespace Grid
} }
template <typename U> template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x) Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
{ {
// alias to element type if (isRegularShape(x))
typedef typename element<std::vector<U>>::type Element; {
// alias to element type
// flatten the vector and getting dimensions using Scalar = typename is_flattenable<std::vector<U>>::type;
Flatten<std::vector<U>> flat(x);
std::vector<size_t> dim; // flatten the vector and getting dimensions
const auto &flatx = flat.getFlatVector(); Flatten<std::vector<U>> flat(x);
for (auto &d: flat.getDim()) std::vector<size_t> dim;
dim.push_back(d); const auto &flatx = flat.getFlatVector();
writeMultiDim<Element>(s, dim, &flatx[0], flatx.size()); for (auto &d: flat.getDim())
dim.push_back(d);
writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
}
else
{
writeRagged(s, x);
}
} }
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
{ {
push(s); push(s);
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size", writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
@ -229,7 +266,7 @@ namespace Grid
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim) void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{ {
// alias to element type // alias to element type
typedef typename element<std::vector<U>>::type Element; using Scalar = typename is_flattenable<std::vector<U>>::type;
// read the dimensions // read the dimensions
H5NS::DataSpace dataSpace; H5NS::DataSpace dataSpace;
@ -260,37 +297,44 @@ namespace Grid
H5NS::DataSet dataSet; H5NS::DataSet dataSet;
dataSet = group_.openDataSet(s); dataSet = group_.openDataSet(s);
dataSet.read(buf.data(), Hdf5Type<Element>::type()); dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
} }
else else
{ {
H5NS::Attribute attribute; H5NS::Attribute attribute;
attribute = group_.openAttribute(s); attribute = group_.openAttribute(s);
attribute.read(Hdf5Type<Element>::type(), buf.data()); attribute.read(Hdf5Type<Scalar>::type(), buf.data());
} }
} }
template <typename U> template <typename U>
typename std::enable_if<element<std::vector<U>>::is_number, void>::type typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x) Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
{ {
// alias to element type if (H5Lexists (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
typedef typename element<std::vector<U>>::type Element; && H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
{
readRagged(s, x);
}
else
{
// alias to element type
using Scalar = typename is_flattenable<std::vector<U>>::type;
std::vector<size_t> dim; std::vector<size_t> dim;
std::vector<Element> buf; std::vector<Scalar> buf;
readMultiDim( s, buf, dim ); readMultiDim( s, buf, dim );
// reconstruct the multidimensional vector // reconstruct the multidimensional vector
Reconstruct<std::vector<U>> r(buf, dim); Reconstruct<std::vector<U>> r(buf, dim);
x = r.getVector(); x = r.getVector();
}
} }
template <typename U> template <typename U>
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
{ {
uint64_t size; uint64_t size;

View File

@ -5,7 +5,9 @@
#include <complex> #include <complex>
#include <memory> #include <memory>
#ifndef H5_NO_NAMESPACE #ifdef H5_NO_NAMESPACE
#define H5NS
#else
#define H5NS H5 #define H5NS H5
#endif #endif

View File

@ -118,13 +118,13 @@ static inline std::string SerialisableClassName(void) {return std::string(#cname
static constexpr bool isEnum = false; \ static constexpr bool isEnum = false; \
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
template <typename T>\ template <typename T>\
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \ static inline void write(::Grid::Writer<T> &WR,const std::string &s, const cname &obj){ \
push(WR,s);\ push(WR,s);\
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
pop(WR);\ pop(WR);\
}\ }\
template <typename T>\ template <typename T>\
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){ \ static inline void read(::Grid::Reader<T> &RD,const std::string &s, cname &obj){ \
if (!push(RD,s))\ if (!push(RD,s))\
{\ {\
std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \ std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \

View File

@ -9,7 +9,8 @@
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -236,21 +237,36 @@ namespace Grid {
} }
} }
// Vector element trait ////////////////////////////////////////////////////// // is_flattenable<T>::value is true if T is a std::vector<> which can be flattened //////////////////////
template <typename T> template <typename T, typename V = void>
struct element struct is_flattenable : std::false_type
{ {
typedef T type; using type = T;
static constexpr bool is_number = false; using grid_type = T;
static constexpr int vecRank = 0;
static constexpr bool isGridTensor = false;
static constexpr bool children_flattenable = std::is_arithmetic<T>::value or is_complex<T>::value;
}; };
template <typename T> template <typename T>
struct element<std::vector<T>> struct is_flattenable<T, typename std::enable_if<isGridTensor<T>::value>::type> : std::false_type
{ {
typedef typename element<T>::type type; using type = typename GridTypeMapper<T>::scalar_type;
static constexpr bool is_number = std::is_arithmetic<T>::value using grid_type = T;
or is_complex<T>::value static constexpr int vecRank = 0;
or element<T>::is_number; static constexpr bool isGridTensor = true;
static constexpr bool children_flattenable = true;
};
template <typename T>
struct is_flattenable<std::vector<T>, typename std::enable_if<is_flattenable<T>::children_flattenable>::type>
: std::true_type
{
using type = typename is_flattenable<T>::type;
using grid_type = typename is_flattenable<T>::grid_type;
static constexpr bool isGridTensor = is_flattenable<T>::isGridTensor;
static constexpr int vecRank = is_flattenable<T>::vecRank + 1;
static constexpr bool children_flattenable = true;
}; };
// Vector flattening utility class //////////////////////////////////////////// // Vector flattening utility class ////////////////////////////////////////////
@ -259,23 +275,30 @@ namespace Grid {
class Flatten class Flatten
{ {
public: public:
typedef typename element<V>::type Element; using Scalar = typename is_flattenable<V>::type;
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
public: public:
explicit Flatten(const V &vector); explicit Flatten(const V &vector);
const V & getVector(void); const V & getVector(void) const { return vector_; }
const std::vector<Element> & getFlatVector(void); const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
const std::vector<size_t> & getDim(void); const std::vector<size_t> & getDim(void) const { return dim_; }
private: private:
void accumulate(const Element &e); template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
template <typename W> accumulate(const W &e);
void accumulate(const W &v); template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
void accumulateDim(const Element &e); accumulate(const W &e);
template <typename W> template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
void accumulateDim(const W &v); accumulate(const W &v);
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
accumulateDim(const W &e) {} // Innermost is a scalar - do nothing
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
accumulateDim(const W &e);
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
accumulateDim(const W &v);
private: private:
const V &vector_; const V &vector_;
std::vector<Element> flatVector_; std::vector<Scalar> flatVector_;
std::vector<size_t> dim_; std::vector<size_t> dim_;
}; };
// Class to reconstruct a multidimensional std::vector // Class to reconstruct a multidimensional std::vector
@ -283,38 +306,57 @@ namespace Grid {
class Reconstruct class Reconstruct
{ {
public: public:
typedef typename element<V>::type Element; using Scalar = typename is_flattenable<V>::type;
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
public: public:
Reconstruct(const std::vector<Element> &flatVector, Reconstruct(const std::vector<Scalar> &flatVector,
const std::vector<size_t> &dim); const std::vector<size_t> &dim);
const V & getVector(void); const V & getVector(void) const { return vector_; }
const std::vector<Element> & getFlatVector(void); const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
const std::vector<size_t> & getDim(void); const std::vector<size_t> & getDim(void) const { return dim_; }
private: private:
void fill(std::vector<Element> &v); template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
template <typename W> fill(W &v);
void fill(W &v); template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
void resize(std::vector<Element> &v, const unsigned int dim); fill(W &v);
template <typename W> template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
void resize(W &v, const unsigned int dim); fill(W &v);
template <typename W> typename std::enable_if< is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
resize(W &v, const unsigned int dim);
template <typename W> typename std::enable_if< is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
resize(W &v, const unsigned int dim);
template <typename W> typename std::enable_if<!is_flattenable<W>::isGridTensor>::type
checkInnermost(const W &e) {} // Innermost is a scalar - do nothing
template <typename W> typename std::enable_if< is_flattenable<W>::isGridTensor>::type
checkInnermost(const W &e);
private: private:
V vector_; V vector_;
const std::vector<Element> &flatVector_; const std::vector<Scalar> &flatVector_;
std::vector<size_t> dim_; std::vector<size_t> dim_;
size_t ind_{0}; size_t ind_{0};
unsigned int dimInd_{0}; unsigned int dimInd_{0};
}; };
// Flatten class template implementation // Flatten class template implementation
template <typename V> template <typename V>
void Flatten<V>::accumulate(const Element &e) template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
Flatten<V>::accumulate(const W &e)
{ {
flatVector_.push_back(e); flatVector_.push_back(e);
} }
template <typename V> template <typename V>
template <typename W> template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
void Flatten<V>::accumulate(const W &v) Flatten<V>::accumulate(const W &e)
{
for (const Scalar &x: e) {
flatVector_.push_back(x);
}
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
Flatten<V>::accumulate(const W &v)
{ {
for (auto &e: v) for (auto &e: v)
{ {
@ -323,11 +365,17 @@ namespace Grid {
} }
template <typename V> template <typename V>
void Flatten<V>::accumulateDim(const Element &e) {}; template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
Flatten<V>::accumulateDim(const W &e)
{
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
for (int rank=0; rank < Traits::Rank; ++rank)
dim_.push_back(Traits::Dimension(rank));
}
template <typename V> template <typename V>
template <typename W> template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
void Flatten<V>::accumulateDim(const W &v) Flatten<V>::accumulateDim(const W &v)
{ {
dim_.push_back(v.size()); dim_.push_back(v.size());
accumulateDim(v[0]); accumulateDim(v[0]);
@ -337,42 +385,36 @@ namespace Grid {
Flatten<V>::Flatten(const V &vector) Flatten<V>::Flatten(const V &vector)
: vector_(vector) : vector_(vector)
{ {
accumulate(vector_);
accumulateDim(vector_); accumulateDim(vector_);
} std::size_t TotalSize{ dim_[0] };
for (int i = 1; i < dim_.size(); ++i) {
template <typename V> TotalSize *= dim_[i];
const V & Flatten<V>::getVector(void) }
{ flatVector_.reserve(TotalSize);
return vector_; accumulate(vector_);
}
template <typename V>
const std::vector<typename Flatten<V>::Element> &
Flatten<V>::getFlatVector(void)
{
return flatVector_;
}
template <typename V>
const std::vector<size_t> & Flatten<V>::getDim(void)
{
return dim_;
} }
// Reconstruct class template implementation // Reconstruct class template implementation
template <typename V> template <typename V>
void Reconstruct<V>::fill(std::vector<Element> &v) template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::fill(W &v)
{
v = flatVector_[ind_++];
}
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::fill(W &v)
{ {
for (auto &e: v) for (auto &e: v)
{ {
e = flatVector_[ind_++]; e = flatVector_[ind_++];
} }
} }
template <typename V> template <typename V>
template <typename W> template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
void Reconstruct<V>::fill(W &v) Reconstruct<V>::fill(W &v)
{ {
for (auto &e: v) for (auto &e: v)
{ {
@ -381,14 +423,15 @@ namespace Grid {
} }
template <typename V> template <typename V>
void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim) template <typename W> typename std::enable_if<is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
Reconstruct<V>::resize(W &v, const unsigned int dim)
{ {
v.resize(dim_[dim]); v.resize(dim_[dim]);
} }
template <typename V> template <typename V>
template <typename W> template <typename W> typename std::enable_if<is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
void Reconstruct<V>::resize(W &v, const unsigned int dim) Reconstruct<V>::resize(W &v, const unsigned int dim)
{ {
v.resize(dim_[dim]); v.resize(dim_[dim]);
for (auto &e: v) for (auto &e: v)
@ -398,34 +441,31 @@ namespace Grid {
} }
template <typename V> template <typename V>
Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector, template <typename W> typename std::enable_if<is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::checkInnermost(const W &)
{
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
const int gridRank{Traits::Rank};
const int dimRank{static_cast<int>(dim_.size())};
assert(dimRank >= gridRank && "Tensor rank too low for Grid tensor");
for (int i=0; i<gridRank; ++i) {
assert(dim_[dimRank - gridRank + i] == Traits::Dimension(i) && "Tensor dimension doesn't match Grid tensor");
}
dim_.resize(dimRank - gridRank);
}
template <typename V>
Reconstruct<V>::Reconstruct(const std::vector<Scalar> &flatVector,
const std::vector<size_t> &dim) const std::vector<size_t> &dim)
: flatVector_(flatVector) : flatVector_(flatVector)
, dim_(dim) , dim_(dim)
{ {
checkInnermost(vector_);
assert(dim_.size() == is_flattenable<V>::vecRank && "Tensor rank doesn't match nested std::vector rank");
resize(vector_, 0); resize(vector_, 0);
fill(vector_); fill(vector_);
} }
template <typename V>
const V & Reconstruct<V>::getVector(void)
{
return vector_;
}
template <typename V>
const std::vector<typename Reconstruct<V>::Element> &
Reconstruct<V>::getFlatVector(void)
{
return flatVector_;
}
template <typename V>
const std::vector<size_t> & Reconstruct<V>::getDim(void)
{
return dim_;
}
// Vector IO utilities /////////////////////////////////////////////////////// // Vector IO utilities ///////////////////////////////////////////////////////
// helper function to read space-separated values // helper function to read space-separated values
template <typename T> template <typename T>
@ -459,6 +499,64 @@ namespace Grid {
return os; return os;
} }
// In general, scalar types are considered "flattenable" (regularly shaped)
template <typename T>
bool isRegularShapeHelper(const std::vector<T> &, std::vector<std::size_t> &, int, bool)
{
return true;
}
template <typename T>
bool isRegularShapeHelper(const std::vector<std::vector<T>> &v, std::vector<std::size_t> &Dims, int Depth, bool bFirst)
{
if( bFirst)
{
assert( Dims.size() == Depth && "Bug: Delete this message after testing" );
Dims.push_back(v[0].size());
if (!Dims[Depth])
return false;
}
else
{
assert( Dims.size() >= Depth + 1 && "Bug: Delete this message after testing" );
}
for (std::size_t i = 0; i < v.size(); ++i)
{
if (v[i].size() != Dims[Depth] || !isRegularShapeHelper(v[i], Dims, Depth + 1, bFirst && i==0))
{
return false;
}
}
return true;
}
template <typename T>
bool isRegularShape(const T &t) { return true; }
template <typename T>
bool isRegularShape(const std::vector<T> &v) { return !v.empty(); }
// Return non-zero if all dimensions of this std::vector<std::vector<T>> are regularly shaped
template <typename T>
bool isRegularShape(const std::vector<std::vector<T>> &v)
{
if (v.empty() || v[0].empty())
return false;
// Make sure all of my rows are the same size
std::vector<std::size_t> Dims;
Dims.reserve(is_flattenable<T>::vecRank);
Dims.push_back(v.size());
Dims.push_back(v[0].size());
for (std::size_t i = 0; i < Dims[0]; ++i)
{
if (v[i].size() != Dims[1] || !isRegularShapeHelper(v[i], Dims, 2, i==0))
{
return false;
}
}
return true;
}
} }
// helper function to read space-separated values // helper function to read space-separated values

Some files were not shown because too many files have changed in this diff Show More