diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 9af347e8..0a5bd458 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -1058,19 +1058,10 @@ public: //Convert a lattice of one precision to another. The input workspace contains the mapping data. template void precisionChange(Lattice &out, const Lattice &in, const precisionChangeWorkspace &workspace){ + static_assert( std::is_same::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + out.Checkerboard() = in.Checkerboard(); - - typedef typename VobjOut::scalar_object SobjOut; - typedef typename VobjIn::scalar_object SobjIn; - - typedef typename SobjIn::scalar_type SfundIn; //"fundamental" complex/real data types - typedef typename SobjOut::scalar_type SfundOut; - constexpr int Nsimd_out = VobjOut::Nsimd(); - constexpr int Nfund_in = sizeof(SobjIn)/sizeof(SfundIn); - constexpr int Nfund_out = sizeof(SobjOut)/sizeof(SfundOut); //these should be the same! - - static_assert(Nfund_in == Nfund_out, "Expect input and output object types to contain the same number of fundamental data but with different precision!"); std::pair const* fmap_device = workspace.getMap(); @@ -1083,18 +1074,7 @@ void precisionChange(Lattice &out, const Lattice &in, const pre for(int out_lane=0; out_lane < Nsimd_out; out_lane++){ int in_oidx = fmap_osite[out_lane].first; int in_lane = fmap_osite[out_lane].second; - - //Room for optimization here by combining the precision change with the read/write to avoid the intermediate scalar objects - SobjIn sobj_in = extractLane(in_lane, in_v[in_oidx]); - SobjOut sobj_out; - SfundIn tmp_in; - SfundOut tmp_out; - for(int i=0;i &extracted, int offset) } + +////////////////////////////////////////////////////////////////////////////////// +//Copy a single lane of a SIMD tensor type from one object to another +//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type) +/////////////////////////////////////////////////////////////////////////////////// +template +accelerator_inline +void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in) +{ + static_assert( std::is_same::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same + + typedef typename vobjOut::vector_type ovector_type; + typedef typename vobjIn::vector_type ivector_type; + constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type); + constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type); + static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" ); + + typedef typename vobjOut::scalar_type oscalar_type; + typedef typename vobjIn::scalar_type iscalar_type; + typedef typename ExtractTypeMap::extract_type oextract_type; + typedef typename ExtractTypeMap::extract_type iextract_type; + + typedef oextract_type * opointer; + typedef iextract_type * ipointer; + + constexpr int oNsimd=ovector_type::Nsimd(); + constexpr int iNsimd=ivector_type::Nsimd(); + + iscalar_type itmp; + oscalar_type otmp; + + opointer __restrict__ op = (opointer)&vecOut; + ipointer __restrict__ ip = (ipointer)&vecIn; + for(int w=0;w