mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 15:57:05 +01:00
@ -97,6 +97,20 @@ accelerator_inline void convertType(ComplexF & out, const std::complex<float> &
|
||||
out = in;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
accelerator_inline EnableIf<isGridFundamental<T>> convertType(T & out, const T & in) {
|
||||
out = in;
|
||||
}
|
||||
|
||||
// This would allow for conversions between GridFundamental types, but is not strictly needed as yet
|
||||
/*template<typename T1, typename T2>
|
||||
accelerator_inline typename std::enable_if<isGridFundamental<T1>::value && isGridFundamental<T2>::value>::type
|
||||
// Or to make this very broad, conversions between anything that's not a GridTensor could be allowed
|
||||
//accelerator_inline typename std::enable_if<!isGridTensor<T1>::value && !isGridTensor<T2>::value>::type
|
||||
convertType(T1 & out, const T2 & in) {
|
||||
out = in;
|
||||
}*/
|
||||
|
||||
#ifdef GRID_SIMT
|
||||
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
|
||||
((ComplexF*)&out)[acceleratorSIMTlane(vComplexF::Nsimd())] = in;
|
||||
@ -117,23 +131,18 @@ accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) {
|
||||
Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in);
|
||||
template<typename T1,typename T2,int N>
|
||||
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in);
|
||||
|
||||
template<typename T1,typename T2, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
|
||||
accelerator_inline void convertType(T1 & out, const iScalar<T2> & in) {
|
||||
convertType(out,in._internal);
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(iScalar<T1> & out, const iScalar<T2> & in) {
|
||||
convertType(out._internal,in._internal);
|
||||
}
|
||||
|
||||
template<typename T1, typename std::enable_if<!isGridScalar<T1>::value, T1>::type* = nullptr>
|
||||
accelerator_inline void convertType(T1 & out, const iScalar<T1> & in) {
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline NotEnableIf<isGridScalar<T1>> convertType(T1 & out, const iScalar<T2> & in) {
|
||||
convertType(out,in._internal);
|
||||
}
|
||||
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(iScalar<T1> & out, const T2 & in) {
|
||||
accelerator_inline NotEnableIf<isGridScalar<T2>> convertType(iScalar<T1> & out, const T2 & in) {
|
||||
convertType(out._internal,in);
|
||||
}
|
||||
|
||||
@ -150,11 +159,6 @@ accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & i
|
||||
convertType(out._internal[i],in._internal[i]);
|
||||
}
|
||||
|
||||
template<typename T, typename std::enable_if<isGridFundamental<T>::value, T>::type* = nullptr>
|
||||
accelerator_inline void convertType(T & out, const T & in) {
|
||||
out = in;
|
||||
}
|
||||
|
||||
template<typename T1,typename T2>
|
||||
accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
|
||||
autoView( out_v , out,AcceleratorWrite);
|
||||
|
@ -43,7 +43,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
||||
conformable(iftrue,predicate);
|
||||
conformable(iftrue,ret);
|
||||
|
||||
GridBase *grid=iftrue._grid;
|
||||
GridBase *grid=iftrue.Grid();
|
||||
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
@ -52,22 +52,23 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
std::vector<Integer> mask(Nsimd);
|
||||
std::vector<scalar_object> truevals (Nsimd);
|
||||
std::vector<scalar_object> falsevals(Nsimd);
|
||||
|
||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
||||
|
||||
extract(iftrue._odata[ss] ,truevals);
|
||||
extract(iffalse._odata[ss] ,falsevals);
|
||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
if (mask[s]) falsevals[s]=truevals[s];
|
||||
autoView(iftrue_v,iftrue,CpuRead);
|
||||
autoView(iffalse_v,iffalse,CpuRead);
|
||||
autoView(predicate_v,predicate,CpuRead);
|
||||
autoView(ret_v,ret,CpuWrite);
|
||||
Integer NN= grid->oSites();
|
||||
thread_for(ss,NN,{
|
||||
Integer mask;
|
||||
scalar_object trueval;
|
||||
scalar_object falseval;
|
||||
for(int l=0;l<Nsimd;l++){
|
||||
trueval =extractLane(l,iftrue_v[ss]);
|
||||
falseval=extractLane(l,iffalse_v[ss]);
|
||||
mask =extractLane(l,predicate_v[ss]);
|
||||
if (mask) falseval=trueval;
|
||||
insertLane(l,ret_v[ss],falseval);
|
||||
}
|
||||
|
||||
merge(ret._odata[ss],falsevals);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template<class vobj,class iobj>
|
||||
@ -76,9 +77,9 @@ inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &ift
|
||||
conformable(iftrue,iffalse);
|
||||
conformable(iftrue,predicate);
|
||||
|
||||
Lattice<vobj> ret(iftrue._grid);
|
||||
Lattice<vobj> ret(iftrue.Grid());
|
||||
|
||||
where(ret,predicate,iftrue,iffalse);
|
||||
whereWolf(ret,predicate,iftrue,iffalse);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user