mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
OMP dslash working
This commit is contained in:
parent
457cc0d5a3
commit
7f3ae64a31
77
TODO
77
TODO
@ -8,60 +8,39 @@
|
||||
|
||||
* const audit
|
||||
|
||||
Insert/Extract
|
||||
* Replace vset with a call to merge.;
|
||||
* care in Gmerge,Gextract over vset .
|
||||
* extract / merge extra implementation removal
|
||||
|
||||
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
|
||||
* Optimise the extract/merge SIMD routines; Azusa??
|
||||
- I have collated into single location at least.
|
||||
- Need to use _mm_*insert/extract routines.
|
||||
|
||||
* Thread scaling tests Xeon, XeonPhi
|
||||
|
||||
================================================================
|
||||
*** New Functionality
|
||||
================================================================
|
||||
|
||||
* Implement where within expression template scheme.
|
||||
|
||||
* - BinaryWriter, TextWriter etc...
|
||||
- use protocol buffers? replace xmlReader/Writer ec..
|
||||
- Binary use htonll, htonl
|
||||
|
||||
* Expression template engine: -- DONE
|
||||
-- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||
|
||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||
|
||||
** Make the Tensor types and Complex etc... play more nicely.
|
||||
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
|
||||
|
||||
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
|
||||
want to introduce a syntax that does not require this.
|
||||
|
||||
- Reductions that contract indices on a site should always demote the tensor structure.
|
||||
norm2(), innerProduct.
|
||||
|
||||
- Result of Sum(), SliceSum // spatial sums
|
||||
trace, traceIndex etc.. do not.
|
||||
|
||||
- problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
|
||||
and then Sum and SliceSum to not remove the Scalars. This would be fixed if we
|
||||
template specialize the scalar scalar scalar sum and SliceSum, on the basis of being
|
||||
pure scalar.
|
||||
|
||||
* Optimise the extract/merge SIMD routines; Azusa??
|
||||
- I have collated into single location at least.
|
||||
- Need to use _mm_*insert/extract routines.
|
||||
|
||||
* Flavour matrices?
|
||||
* Pauli, SU subgroup, etc..
|
||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||
* TaProj
|
||||
* FFTnD ?
|
||||
|
||||
* Parallel io improvements
|
||||
- optional parallel MPI2 IO
|
||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
||||
|
||||
* rb4d support for 5th dimension in Mobius.
|
||||
*** Expression template engine: -- DONE
|
||||
|
||||
[ -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
|
||||
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
|
||||
* Implement where within expression template scheme.
|
||||
* Check for missing functionality - partially audited against QDP++ layout
|
||||
|
||||
// Unary functions
|
||||
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
|
||||
// exp, log, sqrt, fabs
|
||||
@ -69,7 +48,21 @@
|
||||
// adjColor, adjSpin,
|
||||
// copyMask.
|
||||
// localMaxAbs
|
||||
// Fourier transform equivalent.
|
||||
// Fourier transform equivalent.]
|
||||
|
||||
================================================================
|
||||
*** New Functionality
|
||||
================================================================
|
||||
|
||||
* - BinaryWriter, TextWriter etc...
|
||||
- use protocol buffers? replace xmlReader/Writer ec..
|
||||
- Binary use htonll, htonl
|
||||
|
||||
* CovariantShift support -----Use a class to store gauge field? (parallel transport?)
|
||||
|
||||
* Parallel io improvements
|
||||
- optional parallel MPI2 IO
|
||||
- move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
|
||||
|
||||
Actions -- coherent framework for implementing actions and their forces.
|
||||
|
||||
@ -80,9 +73,6 @@ Actions -- coherent framework for implementing actions and their forces.
|
||||
- Mobius
|
||||
- z-Mobius
|
||||
|
||||
* Gauge
|
||||
- Wilson, symanzik, iwasaki
|
||||
|
||||
Algorithms (lots of reuse/port from BFM)
|
||||
* LinearOperator
|
||||
* LinearSolver
|
||||
@ -97,8 +87,21 @@ Algorithms (lots of reuse/port from BFM)
|
||||
* HDCG
|
||||
* HMC,
|
||||
* Heatbath
|
||||
* Integrators, leapfrog, omelyan, force gradient etc...
|
||||
* etc..
|
||||
|
||||
* Gauge
|
||||
- Wilson, symanzik, iwasaki
|
||||
|
||||
* rb4d support for 5th dimension in Mobius.
|
||||
|
||||
* Flavour matrices?
|
||||
* Pauli, SU subgroup, etc..
|
||||
* su3 exponentiation & log etc.. [Jamie's code?]
|
||||
* TaProj
|
||||
* FFTnD ?
|
||||
|
||||
|
||||
======================================================================================================
|
||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
|
||||
======================================================================================================
|
||||
|
@ -80,7 +80,7 @@ int main (int argc, char ** argv)
|
||||
int ncall=1000;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
Dw.multiply(src,result);
|
||||
Dw.M(src,result);
|
||||
}
|
||||
double t1=usecond();
|
||||
double flops=1320*volume*ncall;
|
||||
|
@ -85,7 +85,17 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
|
||||
}
|
||||
}
|
||||
|
||||
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
|
||||
void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out);
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out);
|
||||
return;
|
||||
}
|
||||
void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
|
||||
{
|
||||
Dhop(in,out);
|
||||
return;
|
||||
@ -96,18 +106,18 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
|
||||
WilsonCompressor compressor;
|
||||
Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<grid->oSites();sss++){
|
||||
|
||||
vHalfSpinColourVector tmp;
|
||||
vHalfSpinColourVector chi;
|
||||
vSpinColourVector result;
|
||||
vHalfSpinColourVector Uchi;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<grid->oSites();sss++){
|
||||
|
||||
// int ss = Stencil._LebesgueReorder[sss];
|
||||
int ss = sss;
|
||||
int ssu= ss;
|
||||
// int ss = Stencil._LebesgueReorder[sss];
|
||||
|
||||
// Xp
|
||||
offset = Stencil._offsets [Xp][ss];
|
||||
|
@ -3,16 +3,12 @@
|
||||
|
||||
#include <Grid.h>
|
||||
|
||||
#include <algorithms/LinearOperator.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template<class vtype> class LinearOperatorBase {
|
||||
public:
|
||||
void multiply(const Lattice<vtype> &in, Lattice<vtype> &out){ assert(0);}
|
||||
};
|
||||
|
||||
class WilsonMatrix : public LinearOperatorBase<LatticeFermion>
|
||||
{
|
||||
//NB r=1;
|
||||
@ -40,7 +36,9 @@ namespace Grid {
|
||||
void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
|
||||
|
||||
// override multiply
|
||||
void multiply(const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void M (const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
|
||||
virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
|
||||
|
||||
// non-hermitian hopping term; half cb or both
|
||||
void Dhop(const LatticeFermion &in, LatticeFermion &out);
|
||||
|
Loading…
Reference in New Issue
Block a user