mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Compile fix, multishift mixed prec support
This commit is contained in:
parent
5b128a6f9f
commit
e4c117aabf
@ -92,9 +92,7 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
|
||||
ssum = ssum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object ssobj;
|
||||
ssobj ret = ssum;
|
||||
return ret;
|
||||
return ssum;
|
||||
}
|
||||
/*
|
||||
Threaded max, don't use for now
|
||||
|
@ -1080,6 +1080,25 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
});
|
||||
}
|
||||
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChangeFast(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
||||
{
|
||||
typedef typename VobjOut::scalar_object SobjOut;
|
||||
typedef typename VobjIn::scalar_object SobjIn;
|
||||
conformable(out.Grid(),in.Grid());
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
int nsimd = out.Grid()->Nsimd();
|
||||
autoView( out_v , out, AcceleratorWrite);
|
||||
autoView( in_v , in, AcceleratorRead);
|
||||
accelerator_for(idx,out.Grid()->oSites(),nsimd,{
|
||||
auto itmp = coalescedRead(in_v[idx]);
|
||||
auto otmp = coalescedRead(out_v[idx]);
|
||||
#ifdef GRID_SIMT
|
||||
otmp=itmp;
|
||||
#endif
|
||||
coalescedWrite(out_v[idx],otmp);
|
||||
});
|
||||
}
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
|
||||
|
Loading…
x
Reference in New Issue
Block a user