mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Possible fallback plan for Fionn's compiler bbug in nvcc
This commit is contained in:
parent
f786ff8d69
commit
ce1fc1f48a
@ -43,7 +43,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
|||||||
conformable(iftrue,predicate);
|
conformable(iftrue,predicate);
|
||||||
conformable(iftrue,ret);
|
conformable(iftrue,ret);
|
||||||
|
|
||||||
GridBase *grid=iftrue._grid;
|
GridBase *grid=iftrue.Grid();
|
||||||
|
|
||||||
typedef typename vobj::scalar_object scalar_object;
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
@ -52,22 +52,24 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
|||||||
|
|
||||||
const int Nsimd = grid->Nsimd();
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
std::vector<Integer> mask(Nsimd);
|
Integer mask;
|
||||||
std::vector<scalar_object> truevals (Nsimd);
|
scalar_object trueval;
|
||||||
std::vector<scalar_object> falsevals(Nsimd);
|
scalar_object falseval;
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<iftrue._grid->oSites(); ss++){
|
autoView(iftrue_v,iftrue,CpuRead);
|
||||||
|
autoView(iffalse_v,iffalse,CpuRead);
|
||||||
extract(iftrue._odata[ss] ,truevals);
|
autoView(predicate_v,predicate,CpuRead);
|
||||||
extract(iffalse._odata[ss] ,falsevals);
|
autoView(ret_v,ret,CpuWrite);
|
||||||
extract<vInteger,Integer>(TensorRemove(predicate._odata[ss]),mask);
|
Integer NN= grid->oSites();
|
||||||
|
thread_for(ss,NN,{
|
||||||
for(int s=0;s<Nsimd;s++){
|
for(int l=0;l<Nsimd;l++){
|
||||||
if (mask[s]) falsevals[s]=truevals[s];
|
trueval =extractLane(l,iftrue_v[ss]);
|
||||||
|
falseval=extractLane(l,iffalse_v[ss]);
|
||||||
|
mask =extractLane(l,predicate_v[ss]);
|
||||||
|
if (mask) falseval=trueval;
|
||||||
|
insertLane(l,ret_v[ss],falseval);
|
||||||
}
|
}
|
||||||
|
});
|
||||||
merge(ret._odata[ss],falsevals);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class iobj>
|
template<class vobj,class iobj>
|
||||||
@ -76,9 +78,9 @@ inline Lattice<vobj> whereWolf(const Lattice<iobj> &predicate,Lattice<vobj> &ift
|
|||||||
conformable(iftrue,iffalse);
|
conformable(iftrue,iffalse);
|
||||||
conformable(iftrue,predicate);
|
conformable(iftrue,predicate);
|
||||||
|
|
||||||
Lattice<vobj> ret(iftrue._grid);
|
Lattice<vobj> ret(iftrue.Grid());
|
||||||
|
|
||||||
where(ret,predicate,iftrue,iffalse);
|
whereWolf(ret,predicate,iftrue,iffalse);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user