1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 07:17:06 +01:00

AddSub based alternate SU3 routine

This commit is contained in:
paboyle
2016-03-28 17:55:22 -06:00
parent b6c3bc574b
commit c77b7ee897
9 changed files with 116 additions and 1086 deletions

View File

@ -28,18 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ASM_AV512_ADDSUB_H
#define GRID_ASM_AV512_ADDSUB_H
////////////////////////////////////////////////////////////
// Knights Landing specials
////////////////////////////////////////////////////////////
#define VMOVRDUPd(OFF,A,DEST) "vpshufd $0x44," #OFF"*64("#A ")," #DEST ";\n" // 32 bit level: 1,0,3,2
#define VMOVIDUPd(OFF,A,DEST) "vpshufd $0xee," #OFF"*64("#A ")," #DEST ";\n" // 32 bit level: 3,2,3,2
#define VMOVRDUPf(O,P,DEST) "vmovsldup " #OFF "*64(" #PTR "), " #DEST ";\n"
#define VMOVIDUPf(O,P,DEST) "vmovshdup " #OFF "*64(" #PTR "), " #DEST ";\n"
#define VMADDSUBf(Aii,Bri,accum) "vfmaddsub231ps " #A "," #B "," #accum ";\n"
#define VMADDSUBd(Aii,Bri,accum) "vfmaddsub231pd " #A "," #B "," #accum ";\n"
////////////////////////////////////////////////////////////////
// Building blocks for SU3 x 2spinor
@ -48,7 +36,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// 6 Chi shuffles ir,ri
// 6muls, 30 fmaddsubs
////////////////////////////////////////////////////////////////
#define MULT_ADDSUB_2SPIN_PF(ptr) \
#define MULT_ADDSUB_2SPIN(ptr) \
LOAD64(%r8,ptr) \
__asm__ ( \
VMOVIDUPf(0,%r8,Z0 ) \