mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
OMP dslash working
This commit is contained in:
parent
e6e72d23df
commit
0097b81778
77
TODO
77
TODO
@ -8,60 +8,39 @@
|
|||||||
|
|
||||||
* const audit
|
* const audit
|
||||||
|
|
||||||
|
Insert/Extract
|
||||||
* Replace vset with a call to merge.;
|
* Replace vset with a call to merge.;
|
||||||
* care in Gmerge,Gextract over vset .
|
* care in Gmerge,Gextract over vset .
|
||||||
* extract / merge extra implementation removal
|
* extract / merge extra implementation removal
|
||||||
|
* Optimise the extract/merge SIMD routines; Azusa??
|
||||||
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
|
- I have collated into single location at least.
|
||||||
|
- Need to use _mm_*insert/extract routines.
|
||||||
|
|
||||||
* Thread scaling tests Xeon, XeonPhi
|
* Thread scaling tests Xeon, XeonPhi
|
||||||
|
|
||||||
================================================================
|
|
||||||
*** New Functionality
|
|
||||||
================================================================
|
|
||||||
|
|
||||||
* Implement where within expression template scheme.
|
|
||||||
|
|
||||||
* - BinaryWriter, TextWriter etc...
|
|
||||||
- use protocol buffers? replace xmlReader/Writer ec..
|
|
||||||
- Binary use htonll, htonl
|
|
||||||
|
|
||||||
* Expression template engine: -- DONE
|
|
||||||
-- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
|
||||||
|
|
||||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
|
||||||
|
|
||||||
** Make the Tensor types and Complex etc... play more nicely.
|
** Make the Tensor types and Complex etc... play more nicely.
|
||||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||||
|
|
||||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||||
want to introduce a syntax that does not require this.
|
want to introduce a syntax that does not require this.
|
||||||
|
|
||||||
- Reductions that contract indices on a site should always demote the tensor structure.
|
- Reductions that contract indices on a site should always demote the tensor structure.
|
||||||
norm2(), innerProduct.
|
norm2(), innerProduct.
|
||||||
|
|
||||||
- Result of Sum(), SliceSum // spatial sums
|
- Result of Sum(), SliceSum // spatial sums
|
||||||
trace, traceIndex etc.. do not.
|
trace, traceIndex etc.. do not.
|
||||||
|
|
||||||
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
|
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
|
||||||
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
|
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
|
||||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||||
pure scalar.
|
pure scalar.
|
||||||
|
|
||||||
* Optimise the extract/merge SIMD routines; Azusa??
|
*** Expression template engine: -- DONE
|
||||||
- I have collated into single location at least.
|
|
||||||
- Need to use _mm_*insert/extract routines.
|
|
||||||
|
|
||||||
* Flavour matrices?
|
|
||||||
* Pauli, SU subgroup, etc..
|
|
||||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
|
||||||
* TaProj
|
|
||||||
* FFTnD ?
|
|
||||||
|
|
||||||
* Parallel io improvements
|
|
||||||
- optional parallel MPI2 IO
|
|
||||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
|
||||||
|
|
||||||
* rb4d support for 5th dimension in Mobius.
|
|
||||||
|
|
||||||
|
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||||
|
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
|
||||||
|
* Implement where within expression template scheme.
|
||||||
* Check for missing functionality - partially audited against QDP++ layout
|
* Check for missing functionality - partially audited against QDP++ layout
|
||||||
|
|
||||||
// Unary functions
|
// Unary functions
|
||||||
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
|
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
|
||||||
// exp, log, sqrt, fabs
|
// exp, log, sqrt, fabs
|
||||||
@ -69,7 +48,21 @@
|
|||||||
// adjColor, adjSpin,
|
// adjColor, adjSpin,
|
||||||
// copyMask.
|
// copyMask.
|
||||||
// localMaxAbs
|
// localMaxAbs
|
||||||
// Fourier transform equivalent.
|
// Fourier transform equivalent.]
|
||||||
|
|
||||||
|
================================================================
|
||||||
|
*** New Functionality
|
||||||
|
================================================================
|
||||||
|
|
||||||
|
* - BinaryWriter, TextWriter etc...
|
||||||
|
- use protocol buffers? replace xmlReader/Writer ec..
|
||||||
|
- Binary use htonll, htonl
|
||||||
|
|
||||||
|
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||||
|
|
||||||
|
* Parallel io improvements
|
||||||
|
- optional parallel MPI2 IO
|
||||||
|
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
||||||
|
|
||||||
Actions -- coherent framework for implementing actions and their forces.
|
Actions -- coherent framework for implementing actions and their forces.
|
||||||
|
|
||||||
@ -80,9 +73,6 @@ Actions -- coherent framework for implementing actions and their forces.
|
|||||||
- Mobius
|
- Mobius
|
||||||
- z-Mobius
|
- z-Mobius
|
||||||
|
|
||||||
* Gauge
|
|
||||||
- Wilson, symanzik, iwasaki
|
|
||||||
|
|
||||||
Algorithms (lots of reuse/port from BFM)
|
Algorithms (lots of reuse/port from BFM)
|
||||||
* LinearOperator
|
* LinearOperator
|
||||||
* LinearSolver
|
* LinearSolver
|
||||||
@ -97,8 +87,21 @@ Algorithms (lots of reuse/port from BFM)
|
|||||||
* HDCG
|
* HDCG
|
||||||
* HMC,
|
* HMC,
|
||||||
* Heatbath
|
* Heatbath
|
||||||
|
* Integrators, leapfrog, omelyan, force gradient etc...
|
||||||
* etc..
|
* etc..
|
||||||
|
|
||||||
|
* Gauge
|
||||||
|
- Wilson, symanzik, iwasaki
|
||||||
|
|
||||||
|
* rb4d support for 5th dimension in Mobius.
|
||||||
|
|
||||||
|
* Flavour matrices?
|
||||||
|
* Pauli, SU subgroup, etc..
|
||||||
|
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||||
|
* TaProj
|
||||||
|
* FFTnD ?
|
||||||
|
|
||||||
|
|
||||||
======================================================================================================
|
======================================================================================================
|
||||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||||
======================================================================================================
|
======================================================================================================
|
||||||
|
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
|||||||
int ncall=1000;
|
int ncall=1000;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.multiply(src,result);
|
Dw.M(src,result);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
double flops=1320*volume*ncall;
|
double flops=1320*volume*ncall;
|
||||||
|
@ -85,7 +85,17 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
|
||||||
|
{
|
||||||
|
Dhop(in,out);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
|
||||||
|
{
|
||||||
|
Dhop(in,out);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
|
||||||
{
|
{
|
||||||
Dhop(in,out);
|
Dhop(in,out);
|
||||||
return;
|
return;
|
||||||
@ -96,18 +106,18 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
|||||||
WilsonCompressor compressor;
|
WilsonCompressor compressor;
|
||||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||||
|
|
||||||
vHalfSpinColourVector tmp;
|
|
||||||
vHalfSpinColourVector chi;
|
|
||||||
vSpinColourVector result;
|
|
||||||
vHalfSpinColourVector Uchi;
|
|
||||||
int offset,local,perm, ptype;
|
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int sss=0;sss<grid->oSites();sss++){
|
for(int sss=0;sss<grid->oSites();sss++){
|
||||||
|
|
||||||
|
vHalfSpinColourVector tmp;
|
||||||
|
vHalfSpinColourVector chi;
|
||||||
|
vSpinColourVector result;
|
||||||
|
vHalfSpinColourVector Uchi;
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
|
||||||
|
// int ss = Stencil._LebesgueReorder[sss];
|
||||||
int ss = sss;
|
int ss = sss;
|
||||||
int ssu= ss;
|
int ssu= ss;
|
||||||
// int ss = Stencil._LebesgueReorder[sss];
|
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
offset = Stencil._offsets [Xp][ss];
|
offset = Stencil._offsets [Xp][ss];
|
||||||
|
@ -3,16 +3,12 @@
|
|||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
#include <algorithms/LinearOperator.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
|
|
||||||
template<class vtype> class LinearOperatorBase {
|
|
||||||
public:
|
|
||||||
void multiply(const Lattice<vtype> &in, Lattice<vtype> &out){ assert(0);}
|
|
||||||
};
|
|
||||||
|
|
||||||
class WilsonMatrix : public LinearOperatorBase<LatticeFermion>
|
class WilsonMatrix : public LinearOperatorBase<LatticeFermion>
|
||||||
{
|
{
|
||||||
//NB r=1;
|
//NB r=1;
|
||||||
@ -40,7 +36,9 @@ namespace Grid {
|
|||||||
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
||||||
|
|
||||||
// override multiply
|
// override multiply
|
||||||
void multiply(const LatticeFermion &in, LatticeFermion &out);
|
virtual void M (const LatticeFermion &in, LatticeFermion &out);
|
||||||
|
virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
|
||||||
|
virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
|
||||||
|
|
||||||
// non-hermitian hopping term; half cb or both
|
// non-hermitian hopping term; half cb or both
|
||||||
void Dhop(const LatticeFermion &in, LatticeFermion &out);
|
void Dhop(const LatticeFermion &in, LatticeFermion &out);
|
||||||
|
Loading…
Reference in New Issue
Block a user