1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

OMP dslash working

This commit is contained in:
Peter Boyle 2015-05-13 10:59:22 +01:00
parent e6e72d23df
commit 0097b81778
4 changed files with 64 additions and 53 deletions

77
TODO
View File

@ -8,60 +8,39 @@
* const audit * const audit
Insert/Extract
* Replace vset with a call to merge.; * Replace vset with a call to merge.;
* care in Gmerge,Gextract over vset . * care in Gmerge,Gextract over vset .
* extract / merge extra implementation removal * extract / merge extra implementation removal
* Optimise the extract/merge SIMD routines; Azusa??
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there - I have collated into single location at least.
- Need to use _mm_*insert/extract routines.
* Thread scaling tests Xeon, XeonPhi * Thread scaling tests Xeon, XeonPhi
================================================================
*** New Functionality
================================================================
* Implement where within expression template scheme.
* - BinaryWriter, TextWriter etc...
- use protocol buffers? replace xmlReader/Writer ec..
- Binary use htonll, htonl
* Expression template engine: -- DONE
-- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
** Make the Tensor types and Complex etc... play more nicely. ** Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this. want to introduce a syntax that does not require this.
- Reductions that contract indices on a site should always demote the tensor structure. - Reductions that contract indices on a site should always demote the tensor structure.
norm2(), innerProduct. norm2(), innerProduct.
- Result of Sum(), SliceSum // spatial sums - Result of Sum(), SliceSum // spatial sums
trace, traceIndex etc.. do not. trace, traceIndex etc.. do not.
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar, - problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
pure scalar. pure scalar.
* Optimise the extract/merge SIMD routines; Azusa?? *** Expression template engine: -- DONE
- I have collated into single location at least.
- Need to use _mm_*insert/extract routines.
* Flavour matrices?
* Pauli, SU subgroup, etc..
* su3 exponentiation & log etc.. [Jamie's code?]
* TaProj
* FFTnD ?
* Parallel io improvements
- optional parallel MPI2 IO
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
* rb4d support for 5th dimension in Mobius.
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
* Implement where within expression template scheme.
* Check for missing functionality - partially audited against QDP++ layout * Check for missing functionality - partially audited against QDP++ layout
// Unary functions // Unary functions
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
// exp, log, sqrt, fabs // exp, log, sqrt, fabs
@ -69,7 +48,21 @@
// adjColor, adjSpin, // adjColor, adjSpin,
// copyMask. // copyMask.
// localMaxAbs // localMaxAbs
// Fourier transform equivalent. // Fourier transform equivalent.]
================================================================
*** New Functionality
================================================================
* - BinaryWriter, TextWriter etc...
- use protocol buffers? replace xmlReader/Writer ec..
- Binary use htonll, htonl
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
* Parallel io improvements
- optional parallel MPI2 IO
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
Actions -- coherent framework for implementing actions and their forces. Actions -- coherent framework for implementing actions and their forces.
@ -80,9 +73,6 @@ Actions -- coherent framework for implementing actions and their forces.
- Mobius - Mobius
- z-Mobius - z-Mobius
* Gauge
- Wilson, symanzik, iwasaki
Algorithms (lots of reuse/port from BFM) Algorithms (lots of reuse/port from BFM)
* LinearOperator * LinearOperator
* LinearSolver * LinearSolver
@ -97,8 +87,21 @@ Algorithms (lots of reuse/port from BFM)
* HDCG * HDCG
* HMC, * HMC,
* Heatbath * Heatbath
* Integrators, leapfrog, omelyan, force gradient etc...
* etc.. * etc..
* Gauge
- Wilson, symanzik, iwasaki
* rb4d support for 5th dimension in Mobius.
* Flavour matrices?
* Pauli, SU subgroup, etc..
* su3 exponentiation & log etc.. [Jamie's code?]
* TaProj
* FFTnD ?
====================================================================================================== ======================================================================================================
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
====================================================================================================== ======================================================================================================

View File

@ -80,7 +80,7 @@ int main (int argc, char ** argv)
int ncall=1000; int ncall=1000;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
Dw.multiply(src,result); Dw.M(src,result);
} }
double t1=usecond(); double t1=usecond();
double flops=1320*volume*ncall; double flops=1320*volume*ncall;

View File

@ -85,7 +85,17 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
} }
} }
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out) void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
return;
}
void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
{
Dhop(in,out);
return;
}
void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
{ {
Dhop(in,out); Dhop(in,out);
return; return;
@ -96,18 +106,18 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
WilsonCompressor compressor; WilsonCompressor compressor;
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor); Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
vHalfSpinColourVector tmp;
vHalfSpinColourVector chi;
vSpinColourVector result;
vHalfSpinColourVector Uchi;
int offset,local,perm, ptype;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int sss=0;sss<grid->oSites();sss++){ for(int sss=0;sss<grid->oSites();sss++){
vHalfSpinColourVector tmp;
vHalfSpinColourVector chi;
vSpinColourVector result;
vHalfSpinColourVector Uchi;
int offset,local,perm, ptype;
// int ss = Stencil._LebesgueReorder[sss];
int ss = sss; int ss = sss;
int ssu= ss; int ssu= ss;
// int ss = Stencil._LebesgueReorder[sss];
// Xp // Xp
offset = Stencil._offsets [Xp][ss]; offset = Stencil._offsets [Xp][ss];

View File

@ -3,16 +3,12 @@
#include <Grid.h> #include <Grid.h>
#include <algorithms/LinearOperator.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
template<class vtype> class LinearOperatorBase {
public:
void multiply(const Lattice<vtype> &in, Lattice<vtype> &out){ assert(0);}
};
class WilsonMatrix : public LinearOperatorBase<LatticeFermion> class WilsonMatrix : public LinearOperatorBase<LatticeFermion>
{ {
//NB r=1; //NB r=1;
@ -40,7 +36,9 @@ namespace Grid {
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu); void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
// override multiply // override multiply
void multiply(const LatticeFermion &in, LatticeFermion &out); virtual void M (const LatticeFermion &in, LatticeFermion &out);
virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
// non-hermitian hopping term; half cb or both // non-hermitian hopping term; half cb or both
void Dhop(const LatticeFermion &in, LatticeFermion &out); void Dhop(const LatticeFermion &in, LatticeFermion &out);