From afda4598869ffada09b8342e47c50e847e2e7b3d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 16 May 2015 04:33:10 +0100 Subject: [PATCH] strong inline --- lib/lattice/Grid_lattice_arith.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/lattice/Grid_lattice_arith.h b/lib/lattice/Grid_lattice_arith.h index c9599582..c0bbb2b6 100644 --- a/lib/lattice/Grid_lattice_arith.h +++ b/lib/lattice/Grid_lattice_arith.h @@ -7,7 +7,7 @@ namespace Grid { ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// - template + template strong_inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); PARALLEL_FOR_LOOP @@ -15,10 +15,11 @@ PARALLEL_FOR_LOOP obj1 tmp; mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]); vstream(ret._odata[ss],tmp); + // mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); } } - template + template strong_inline void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); PARALLEL_FOR_LOOP @@ -29,7 +30,7 @@ PARALLEL_FOR_LOOP } } - template + template strong_inline void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); PARALLEL_FOR_LOOP @@ -39,7 +40,7 @@ PARALLEL_FOR_LOOP vstream(ret._odata[ss],tmp); } } - template + template strong_inline void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); PARALLEL_FOR_LOOP @@ -53,7 +54,7 @@ PARALLEL_FOR_LOOP ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// - template + template strong_inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); PARALLEL_FOR_LOOP @@ -64,7 +65,7 @@ PARALLEL_FOR_LOOP } } - template + template strong_inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); PARALLEL_FOR_LOOP @@ -75,7 +76,7 @@ PARALLEL_FOR_LOOP } } - template + template strong_inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); PARALLEL_FOR_LOOP @@ -85,7 +86,7 @@ PARALLEL_FOR_LOOP vstream(ret._odata[ss],tmp); } } - template + template strong_inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); PARALLEL_FOR_LOOP @@ -99,7 +100,7 @@ PARALLEL_FOR_LOOP ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// - template + template strong_inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); PARALLEL_FOR_LOOP @@ -110,7 +111,7 @@ PARALLEL_FOR_LOOP } } - template + template strong_inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); PARALLEL_FOR_LOOP @@ -121,7 +122,7 @@ PARALLEL_FOR_LOOP } } - template + template strong_inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); PARALLEL_FOR_LOOP @@ -131,7 +132,7 @@ PARALLEL_FOR_LOOP vstream(ret._odata[ss],tmp); } } - template + template strong_inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); PARALLEL_FOR_LOOP @@ -142,8 +143,8 @@ PARALLEL_FOR_LOOP } } - template - inline void axpy(Lattice &ret,sobj a,const Lattice &lhs,const Lattice &rhs){ + template strong_inline + void axpy(Lattice &ret,sobj a,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){