mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
Pack the stencil smaller for 128 bit access
This commit is contained in:
parent
1fd08c21ac
commit
da17a015c7
@ -94,13 +94,20 @@ void Gather_plane_exchange_table(std::vector<std::pair<int,int> >& table,const L
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct StencilEntry {
|
struct StencilEntry {
|
||||||
uint64_t _offset;
|
#ifdef GRID_NVCC
|
||||||
uint64_t _byte_offset;
|
uint64_t _byte_offset; // 8 bytes
|
||||||
uint16_t _is_local;
|
uint32_t _offset; // 8 bytes
|
||||||
uint16_t _permute;
|
#else
|
||||||
uint16_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline
|
uint64_t _byte_offset; // 8 bytes
|
||||||
uint16_t _pad;
|
uint64_t _offset; // 4 bytes (8 ever required?)
|
||||||
|
#endif
|
||||||
|
uint8_t _is_local; // 1 bytes
|
||||||
|
uint8_t _permute; // 1 bytes
|
||||||
|
uint8_t _around_the_world; // 1 bytes
|
||||||
|
uint8_t _pad; // 1 bytes
|
||||||
};
|
};
|
||||||
|
// Could pack to 8 + 4 + 4 = 128 bit and use
|
||||||
|
|
||||||
template<class vobj,class cobj>
|
template<class vobj,class cobj>
|
||||||
class CartesianStencilView {
|
class CartesianStencilView {
|
||||||
public:
|
public:
|
||||||
|
Loading…
Reference in New Issue
Block a user