mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-27 22:25:56 +01:00
Formatting emacs compliant
This commit is contained in:
parent
b815f5f764
commit
bd15c38ae8
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_ASM_AV512_H
|
#ifndef GRID_ASM_AV512_H
|
||||||
#define GRID_ASM_AV512_H
|
#define GRID_ASM_AV512_H
|
||||||
|
|
||||||
@ -44,21 +44,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
|
#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
|
||||||
#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
|
#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
|
||||||
|
|
||||||
#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMf(O,P,tmp) \
|
VSHUFMEMf(O,P,tmp) \
|
||||||
VMULMEMf(O,P,B,Biirr) \
|
VMULMEMf(O,P,B,Biirr) \
|
||||||
VMULMEMf(O,P,C,Ciirr) \
|
VMULMEMf(O,P,C,Ciirr) \
|
||||||
VMULf(tmp,B,Briir) \
|
VMULf(tmp,B,Briir) \
|
||||||
VMULf(tmp,C,Criir)
|
VMULf(tmp,C,Criir)
|
||||||
|
|
||||||
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMd(O,P,tmp) \
|
VSHUFMEMd(O,P,tmp) \
|
||||||
VMULMEMd(O,P,B,Biirr) \
|
VMULMEMd(O,P,B,Biirr) \
|
||||||
VMULMEMd(O,P,C,Ciirr) \
|
VMULMEMd(O,P,C,Ciirr) \
|
||||||
VMULd(tmp,B,Briir) \
|
VMULd(tmp,B,Briir) \
|
||||||
VMULd(tmp,C,Criir)
|
VMULd(tmp,C,Criir)
|
||||||
|
|
||||||
#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMf(O,P,tmp) \
|
VSHUFMEMf(O,P,tmp) \
|
||||||
VMADDMEMf(O,P,B,Biirr) \
|
VMADDMEMf(O,P,B,Biirr) \
|
||||||
VMADDMEMf(O,P,C,Ciirr) \
|
VMADDMEMf(O,P,C,Ciirr) \
|
||||||
@ -73,16 +73,16 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
VMADDd(tmp,C,Criir)
|
VMADDd(tmp,C,Criir)
|
||||||
|
|
||||||
// Merges accumulation for complex dot chain; less efficient under avx512
|
// Merges accumulation for complex dot chain; less efficient under avx512
|
||||||
#define ZEND1f(Criir,Ciirr, tmp) "vshufps $0xb1," #Criir "," #Criir "," #tmp ";\n"\
|
#define ZEND1f(Criir,Ciirr, tmp) "vshufps $0xb1," #Criir "," #Criir "," #tmp ";\n" \
|
||||||
"vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n"
|
"vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n"
|
||||||
|
|
||||||
#define ZEND2f(Criir,Ciirr, tmp) "vshufps $0xb1," #Ciirr "," #Ciirr "," #tmp ";\n"\
|
#define ZEND2f(Criir,Ciirr, tmp) "vshufps $0xb1," #Ciirr "," #Ciirr "," #tmp ";\n" \
|
||||||
"vsubps " #tmp "," #Ciirr "," #Criir"{%k7}" ";\n"
|
"vsubps " #tmp "," #Ciirr "," #Criir"{%k7}" ";\n"
|
||||||
|
|
||||||
#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\
|
#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\
|
||||||
"vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n"
|
"vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n"
|
||||||
|
|
||||||
#define ZEND2d(Criir,Ciirr, tmp) "vshufpd $0x55," #Ciirr "," #Ciirr "," #tmp ";\n"\
|
#define ZEND2d(Criir,Ciirr, tmp) "vshufpd $0x55," #Ciirr "," #Ciirr "," #tmp ";\n" \
|
||||||
"vsubpd " #tmp "," #Ciirr "," #Criir"{%k7};\n" // ri+ir ; ri+ir,rr-ii
|
"vsubpd " #tmp "," #Ciirr "," #Criir"{%k7};\n" // ri+ir ; ri+ir,rr-ii
|
||||||
|
|
||||||
#define VMOVRDUPd(OFF,A,DEST) "vpshufd $0x44," #OFF "*64(" #A ")," #DEST ";\n" // 32 bit level: 1,0,3,2
|
#define VMOVRDUPd(OFF,A,DEST) "vpshufd $0x44," #OFF "*64(" #A ")," #DEST ";\n" // 32 bit level: 1,0,3,2
|
||||||
@ -123,7 +123,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VMADDSUBIDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMADDSUBIDUPd(O,P,B,accum) "vfmaddsub231pd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
#define VMULRDUPd(O,P,B,accum) "vmulpd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMULRDUPd(O,P,B,accum) "vmulpd (" #O"*16+0)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
#define VMULIDUPd(O,P,B,accum) "vmulpd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
#define VMULIDUPd(O,P,B,accum) "vmulpd (" #O"*16+8)("#P "){1to8},"#B "," #accum ";\n"
|
||||||
/*
|
/*
|
||||||
* TimesI is used only in the XP recon
|
* TimesI is used only in the XP recon
|
||||||
* Could zero the regs and use RECON_ACCUM
|
* Could zero the regs and use RECON_ACCUM
|
||||||
*/
|
*/
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_ASM_INTEL_COMMON_512_H
|
#ifndef GRID_ASM_INTEL_COMMON_512_H
|
||||||
#define GRID_ASM_INTEL_COMMON_512_H
|
#define GRID_ASM_INTEL_COMMON_512_H
|
||||||
|
|
||||||
@ -37,9 +37,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
// Opcodes common
|
// Opcodes common
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#define MASK_REGS \
|
#define MASK_REGS \
|
||||||
__asm__ ("mov $0xAAAA, %%eax \n"\
|
__asm__ ("mov $0xAAAA, %%eax \n" \
|
||||||
"kmovw %%eax, %%k6 \n"\
|
"kmovw %%eax, %%k6 \n" \
|
||||||
"mov $0x5555, %%eax \n"\
|
"mov $0x5555, %%eax \n" \
|
||||||
"kmovw %%eax, %%k7 \n" : : : "%eax");
|
"kmovw %%eax, %%k7 \n" : : : "%eax");
|
||||||
|
|
||||||
//#define label(B) __asm__ ( __func__ _LINE__ #B ":\n" );
|
//#define label(B) __asm__ ( __func__ _LINE__ #B ":\n" );
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
// No guard can be multiply included as undef clearage
|
// No guard can be multiply included as undef clearage
|
||||||
#undef VZERO
|
#undef VZERO
|
||||||
#undef VMOV
|
#undef VMOV
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_ASM_AV512_H
|
#ifndef GRID_ASM_AV512_H
|
||||||
#define GRID_ASM_AV512_H
|
#define GRID_ASM_AV512_H
|
||||||
|
|
||||||
@ -44,21 +44,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
|
#define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp)
|
||||||
#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
|
#define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp)
|
||||||
|
|
||||||
#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMf(O,P,tmp) \
|
VSHUFMEMf(O,P,tmp) \
|
||||||
VMULMEMf(O,P,B,Biirr) \
|
VMULMEMf(O,P,B,Biirr) \
|
||||||
VMULMEMf(O,P,C,Ciirr) \
|
VMULMEMf(O,P,C,Ciirr) \
|
||||||
VMULf(tmp,B,Briir) \
|
VMULf(tmp,B,Briir) \
|
||||||
VMULf(tmp,C,Criir)
|
VMULf(tmp,C,Criir)
|
||||||
|
|
||||||
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMd(O,P,tmp) \
|
VSHUFMEMd(O,P,tmp) \
|
||||||
VMULMEMd(O,P,B,Biirr) \
|
VMULMEMd(O,P,B,Biirr) \
|
||||||
VMULMEMd(O,P,C,Ciirr) \
|
VMULMEMd(O,P,C,Ciirr) \
|
||||||
VMULd(tmp,B,Briir) \
|
VMULd(tmp,B,Briir) \
|
||||||
VMULd(tmp,C,Criir)
|
VMULd(tmp,C,Criir)
|
||||||
|
|
||||||
#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
#define ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) \
|
||||||
VSHUFMEMf(O,P,tmp) \
|
VSHUFMEMf(O,P,tmp) \
|
||||||
VMADDMEMf(O,P,B,Biirr) \
|
VMADDMEMf(O,P,B,Biirr) \
|
||||||
VMADDMEMf(O,P,C,Ciirr) \
|
VMADDMEMf(O,P,C,Ciirr) \
|
||||||
@ -106,7 +106,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
|
#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
|
||||||
#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
|
#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
|
||||||
|
|
||||||
// Acc = Acc - i A
|
// Acc = Acc - i A
|
||||||
#define VACCTIMESMINUSI0d(A,ACC,tmp)
|
#define VACCTIMESMINUSI0d(A,ACC,tmp)
|
||||||
#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
|
#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n"
|
||||||
#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
|
#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
// No guard can be multiply included as undef clearge of macros
|
// No guard can be multiply included as undef clearge of macros
|
||||||
#undef VZERO
|
#undef VZERO
|
||||||
#undef VMOV
|
#undef VMOV
|
||||||
|
@ -23,8 +23,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GRID_ASM_INTEL_512_QCD_H
|
#ifndef GRID_ASM_INTEL_512_QCD_H
|
||||||
#define GRID_ASM_INTEL_512_QCD_H
|
#define GRID_ASM_INTEL_512_QCD_H
|
||||||
|
|
||||||
@ -143,7 +143,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
VLOAD(10,%r8,Chimu_31) \
|
VLOAD(10,%r8,Chimu_31) \
|
||||||
VLOAD(11,%r8,Chimu_32)
|
VLOAD(11,%r8,Chimu_32)
|
||||||
|
|
||||||
#define SHUF_CHIMU23i\
|
#define SHUF_CHIMU23i \
|
||||||
VSHUFMEM(6,%r8,Chimu_20) \
|
VSHUFMEM(6,%r8,Chimu_20) \
|
||||||
VSHUFMEM(7,%r8,Chimu_21) \
|
VSHUFMEM(7,%r8,Chimu_21) \
|
||||||
VSHUFMEM(8,%r8,Chimu_22) \
|
VSHUFMEM(8,%r8,Chimu_22) \
|
||||||
@ -250,21 +250,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
// hspin(0)=fspin(0)-timesI(fspin(3))
|
// hspin(0)=fspin(0)-timesI(fspin(3))
|
||||||
// hspin(1)=fspin(1)-timesI(fspin(2))
|
// hspin(1)=fspin(1)-timesI(fspin(2))
|
||||||
#define XM_PROJMEM(PTR) \
|
#define XM_PROJMEM(PTR) \
|
||||||
LOAD64(%r8,PTR)\
|
LOAD64(%r8,PTR) \
|
||||||
__asm__ ( \
|
__asm__ ( \
|
||||||
LOAD_CHIi \
|
LOAD_CHIi \
|
||||||
SHUF_CHIMU23i \
|
SHUF_CHIMU23i \
|
||||||
VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_30)\
|
VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_30) \
|
||||||
VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_31)\
|
VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_31) \
|
||||||
VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_32)\
|
VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_32) \
|
||||||
VACCTIMESMINUSI1(Chi_10,Chi_10,Chimu_20)\
|
VACCTIMESMINUSI1(Chi_10,Chi_10,Chimu_20) \
|
||||||
VACCTIMESMINUSI1(Chi_11,Chi_11,Chimu_21)\
|
VACCTIMESMINUSI1(Chi_11,Chi_11,Chimu_21) \
|
||||||
VACCTIMESMINUSI1(Chi_12,Chi_12,Chimu_22)\
|
VACCTIMESMINUSI1(Chi_12,Chi_12,Chimu_22) \
|
||||||
VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_30)\
|
VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_30) \
|
||||||
VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_31)\
|
VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_31) \
|
||||||
VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_32)\
|
VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_32) \
|
||||||
VACCTIMESMINUSI2(Chi_10,Chi_10,Chimu_20)\
|
VACCTIMESMINUSI2(Chi_10,Chi_10,Chimu_20) \
|
||||||
VACCTIMESMINUSI2(Chi_11,Chi_11,Chimu_21)\
|
VACCTIMESMINUSI2(Chi_11,Chi_11,Chimu_21) \
|
||||||
VACCTIMESMINUSI2(Chi_12,Chi_12,Chimu_22) );
|
VACCTIMESMINUSI2(Chi_12,Chi_12,Chimu_22) );
|
||||||
|
|
||||||
#define YM_PROJMEM(ptr) \
|
#define YM_PROJMEM(ptr) \
|
||||||
@ -283,17 +283,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
__asm__ ( \
|
__asm__ ( \
|
||||||
LOAD_CHIi \
|
LOAD_CHIi \
|
||||||
SHUF_CHIMU23i \
|
SHUF_CHIMU23i \
|
||||||
VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_20)\
|
VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_20) \
|
||||||
VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_21)\
|
VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_21) \
|
||||||
VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_22)\
|
VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_22) \
|
||||||
VACCTIMESI1(Chi_10,Chi_10,Chimu_30)\
|
VACCTIMESI1(Chi_10,Chi_10,Chimu_30) \
|
||||||
VACCTIMESI1(Chi_11,Chi_11,Chimu_31)\
|
VACCTIMESI1(Chi_11,Chi_11,Chimu_31) \
|
||||||
VACCTIMESI1(Chi_12,Chi_12,Chimu_32)\
|
VACCTIMESI1(Chi_12,Chi_12,Chimu_32) \
|
||||||
VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_20)\
|
VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_20) \
|
||||||
VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_21)\
|
VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_21) \
|
||||||
VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_22)\
|
VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_22) \
|
||||||
VACCTIMESI2(Chi_10,Chi_10,Chimu_30)\
|
VACCTIMESI2(Chi_10,Chi_10,Chimu_30) \
|
||||||
VACCTIMESI2(Chi_11,Chi_11,Chimu_31)\
|
VACCTIMESI2(Chi_11,Chi_11,Chimu_31) \
|
||||||
VACCTIMESI2(Chi_12,Chi_12,Chimu_32) );
|
VACCTIMESI2(Chi_12,Chi_12,Chimu_32) );
|
||||||
|
|
||||||
#define TM_PROJMEM(ptr) \
|
#define TM_PROJMEM(ptr) \
|
||||||
@ -338,200 +338,200 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
VTIMESMINUSI2(UChi_01,psi_31,TMP) \
|
VTIMESMINUSI2(UChi_01,psi_31,TMP) \
|
||||||
VTIMESMINUSI2(UChi_02,psi_32,TMP) \
|
VTIMESMINUSI2(UChi_02,psi_32,TMP) \
|
||||||
);
|
);
|
||||||
// NB could save 6 ops using addsub => 12 cycles
|
// NB could save 6 ops using addsub => 12 cycles
|
||||||
#define XP_RECON_ACCUM __asm__ ( \
|
#define XP_RECON_ACCUM __asm__ ( \
|
||||||
VZERO(TMP)\
|
VZERO(TMP) \
|
||||||
VACCTIMESMINUSI0(UChi_00,psi_30,Z3)\
|
VACCTIMESMINUSI0(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI0(UChi_10,psi_20,Z0)\
|
VACCTIMESMINUSI0(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI0(UChi_01,psi_31,Z4)\
|
VACCTIMESMINUSI0(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI0(UChi_11,psi_21,Z1)\
|
VACCTIMESMINUSI0(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI0(UChi_02,psi_32,Z5)\
|
VACCTIMESMINUSI0(UChi_02,psi_32,Z5) \
|
||||||
VACCTIMESMINUSI0(UChi_12,psi_22,Z2)\
|
VACCTIMESMINUSI0(UChi_12,psi_22,Z2) \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VACCTIMESMINUSI1(UChi_00,psi_30,Z3)\
|
VACCTIMESMINUSI1(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI1(UChi_10,psi_20,Z0)\
|
VACCTIMESMINUSI1(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI1(UChi_01,psi_31,Z4)\
|
VACCTIMESMINUSI1(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI1(UChi_11,psi_21,Z1)\
|
VACCTIMESMINUSI1(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI1(UChi_02,psi_32,Z5)\
|
VACCTIMESMINUSI1(UChi_02,psi_32,Z5) \
|
||||||
VACCTIMESMINUSI1(UChi_12,psi_22,Z2)\
|
VACCTIMESMINUSI1(UChi_12,psi_22,Z2) \
|
||||||
VACCTIMESMINUSI2(UChi_10,psi_20,Z0)\
|
VACCTIMESMINUSI2(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI2(UChi_11,psi_21,Z1)\
|
VACCTIMESMINUSI2(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI2(UChi_12,psi_22,Z2)\
|
VACCTIMESMINUSI2(UChi_12,psi_22,Z2) \
|
||||||
VACCTIMESMINUSI2(UChi_00,psi_30,Z3)\
|
VACCTIMESMINUSI2(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI2(UChi_01,psi_31,Z4)\
|
VACCTIMESMINUSI2(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI2(UChi_02,psi_32,Z5)\
|
VACCTIMESMINUSI2(UChi_02,psi_32,Z5) \
|
||||||
);
|
);
|
||||||
|
|
||||||
#define XM_RECON __asm__ ( \
|
#define XM_RECON __asm__ ( \
|
||||||
VZERO(TMP)\
|
VZERO(TMP) \
|
||||||
VTIMESI0(UChi_00,psi_30,TMP)\
|
VTIMESI0(UChi_00,psi_30,TMP) \
|
||||||
VTIMESI0(UChi_10,psi_20,TMP)\
|
VTIMESI0(UChi_10,psi_20,TMP) \
|
||||||
VTIMESI0(UChi_01,psi_31,TMP)\
|
VTIMESI0(UChi_01,psi_31,TMP) \
|
||||||
VTIMESI0(UChi_11,psi_21,TMP)\
|
VTIMESI0(UChi_11,psi_21,TMP) \
|
||||||
VTIMESI0(UChi_02,psi_32,TMP)\
|
VTIMESI0(UChi_02,psi_32,TMP) \
|
||||||
VTIMESI0(UChi_12,psi_22,TMP)\
|
VTIMESI0(UChi_12,psi_22,TMP) \
|
||||||
VMOV(UChi_00,psi_00)\
|
VMOV(UChi_00,psi_00) \
|
||||||
VMOV(UChi_10,psi_10)\
|
VMOV(UChi_10,psi_10) \
|
||||||
VMOV(UChi_01,psi_01)\
|
VMOV(UChi_01,psi_01) \
|
||||||
VMOV(UChi_11,psi_11)\
|
VMOV(UChi_11,psi_11) \
|
||||||
VMOV(UChi_02,psi_02)\
|
VMOV(UChi_02,psi_02) \
|
||||||
VMOV(UChi_12,psi_12)\
|
VMOV(UChi_12,psi_12) \
|
||||||
VTIMESI1(UChi_00,psi_30,TMP)\
|
VTIMESI1(UChi_00,psi_30,TMP) \
|
||||||
VTIMESI1(UChi_10,psi_20,TMP)\
|
VTIMESI1(UChi_10,psi_20,TMP) \
|
||||||
VTIMESI1(UChi_01,psi_31,TMP)\
|
VTIMESI1(UChi_01,psi_31,TMP) \
|
||||||
VTIMESI1(UChi_11,psi_21,TMP)\
|
VTIMESI1(UChi_11,psi_21,TMP) \
|
||||||
VTIMESI1(UChi_02,psi_32,TMP)\
|
VTIMESI1(UChi_02,psi_32,TMP) \
|
||||||
VTIMESI1(UChi_12,psi_22,TMP)\
|
VTIMESI1(UChi_12,psi_22,TMP) \
|
||||||
VTIMESI2(UChi_10,psi_20,TMP)\
|
VTIMESI2(UChi_10,psi_20,TMP) \
|
||||||
VTIMESI2(UChi_11,psi_21,TMP)\
|
VTIMESI2(UChi_11,psi_21,TMP) \
|
||||||
VTIMESI2(UChi_12,psi_22,TMP)\
|
VTIMESI2(UChi_12,psi_22,TMP) \
|
||||||
VTIMESI2(UChi_00,psi_30,TMP)\
|
VTIMESI2(UChi_00,psi_30,TMP) \
|
||||||
VTIMESI2(UChi_01,psi_31,TMP)\
|
VTIMESI2(UChi_01,psi_31,TMP) \
|
||||||
VTIMESI2(UChi_02,psi_32,TMP)\
|
VTIMESI2(UChi_02,psi_32,TMP) \
|
||||||
);
|
);
|
||||||
|
|
||||||
#define XM_RECON_ACCUM __asm__ ( \
|
#define XM_RECON_ACCUM __asm__ ( \
|
||||||
VACCTIMESI0(UChi_10,psi_20,Z0)\
|
VACCTIMESI0(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESI0(UChi_00,psi_30,Z3)\
|
VACCTIMESI0(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESI0(UChi_11,psi_21,Z1)\
|
VACCTIMESI0(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESI0(UChi_01,psi_31,Z4)\
|
VACCTIMESI0(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESI0(UChi_12,psi_22,Z2)\
|
VACCTIMESI0(UChi_12,psi_22,Z2) \
|
||||||
VACCTIMESI0(UChi_02,psi_32,Z5)\
|
VACCTIMESI0(UChi_02,psi_32,Z5) \
|
||||||
\
|
\
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
\
|
\
|
||||||
VACCTIMESI1(UChi_10,psi_20,Z0)\
|
VACCTIMESI1(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESI1(UChi_00,psi_30,Z3)\
|
VACCTIMESI1(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESI1(UChi_11,psi_21,Z1)\
|
VACCTIMESI1(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESI1(UChi_01,psi_31,Z4)\
|
VACCTIMESI1(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESI1(UChi_12,psi_22,Z2)\
|
VACCTIMESI1(UChi_12,psi_22,Z2) \
|
||||||
VACCTIMESI1(UChi_02,psi_32,Z5)\
|
VACCTIMESI1(UChi_02,psi_32,Z5) \
|
||||||
VACCTIMESI2(UChi_10,psi_20,Z0)\
|
VACCTIMESI2(UChi_10,psi_20,Z0) \
|
||||||
VACCTIMESI2(UChi_11,psi_21,Z1)\
|
VACCTIMESI2(UChi_11,psi_21,Z1) \
|
||||||
VACCTIMESI2(UChi_12,psi_22,Z2)\
|
VACCTIMESI2(UChi_12,psi_22,Z2) \
|
||||||
VACCTIMESI2(UChi_00,psi_30,Z3)\
|
VACCTIMESI2(UChi_00,psi_30,Z3) \
|
||||||
VACCTIMESI2(UChi_01,psi_31,Z4)\
|
VACCTIMESI2(UChi_01,psi_31,Z4) \
|
||||||
VACCTIMESI2(UChi_02,psi_32,Z5)\
|
VACCTIMESI2(UChi_02,psi_32,Z5) \
|
||||||
);
|
);
|
||||||
|
|
||||||
#define YP_RECON_ACCUM __asm__ ( \
|
#define YP_RECON_ACCUM __asm__ ( \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VADD(UChi_10,psi_20,psi_20)\
|
VADD(UChi_10,psi_20,psi_20) \
|
||||||
VADD(UChi_11,psi_21,psi_21)\
|
VADD(UChi_11,psi_21,psi_21) \
|
||||||
VADD(UChi_12,psi_22,psi_22)\
|
VADD(UChi_12,psi_22,psi_22) \
|
||||||
VSUB(UChi_00,psi_30,psi_30)\
|
VSUB(UChi_00,psi_30,psi_30) \
|
||||||
VSUB(UChi_01,psi_31,psi_31)\
|
VSUB(UChi_01,psi_31,psi_31) \
|
||||||
VSUB(UChi_02,psi_32,psi_32) );
|
VSUB(UChi_02,psi_32,psi_32) );
|
||||||
|
|
||||||
#define YM_RECON_ACCUM __asm__ ( \
|
#define YM_RECON_ACCUM __asm__ ( \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VSUB(UChi_10,psi_20,psi_20)\
|
VSUB(UChi_10,psi_20,psi_20) \
|
||||||
VSUB(UChi_11,psi_21,psi_21)\
|
VSUB(UChi_11,psi_21,psi_21) \
|
||||||
VSUB(UChi_12,psi_22,psi_22)\
|
VSUB(UChi_12,psi_22,psi_22) \
|
||||||
VADD(UChi_00,psi_30,psi_30)\
|
VADD(UChi_00,psi_30,psi_30) \
|
||||||
VADD(UChi_01,psi_31,psi_31)\
|
VADD(UChi_01,psi_31,psi_31) \
|
||||||
VADD(UChi_02,psi_32,psi_32) );
|
VADD(UChi_02,psi_32,psi_32) );
|
||||||
|
|
||||||
#define ZP_RECON_ACCUM __asm__ ( \
|
#define ZP_RECON_ACCUM __asm__ ( \
|
||||||
VACCTIMESMINUSI0(UChi_00,psi_20,Z0)\
|
VACCTIMESMINUSI0(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESI0(UChi_10,psi_30,Z3)\
|
VACCTIMESI0(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI0(UChi_01,psi_21,Z1)\
|
VACCTIMESMINUSI0(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESI0(UChi_11,psi_31,Z4)\
|
VACCTIMESI0(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI0(UChi_02,psi_22,Z2)\
|
VACCTIMESMINUSI0(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESI0(UChi_12,psi_32,Z5)\
|
VACCTIMESI0(UChi_12,psi_32,Z5) \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VACCTIMESMINUSI1(UChi_00,psi_20,Z0)\
|
VACCTIMESMINUSI1(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESI1(UChi_10,psi_30,Z3)\
|
VACCTIMESI1(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI1(UChi_01,psi_21,Z1)\
|
VACCTIMESMINUSI1(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESI1(UChi_11,psi_31,Z4)\
|
VACCTIMESI1(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI1(UChi_02,psi_22,Z2)\
|
VACCTIMESMINUSI1(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESI1(UChi_12,psi_32,Z5)\
|
VACCTIMESI1(UChi_12,psi_32,Z5) \
|
||||||
VACCTIMESMINUSI2(UChi_00,psi_20,Z0)\
|
VACCTIMESMINUSI2(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI2(UChi_01,psi_21,Z1)\
|
VACCTIMESMINUSI2(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI2(UChi_02,psi_22,Z2)\
|
VACCTIMESMINUSI2(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESI2(UChi_10,psi_30,Z3)\
|
VACCTIMESI2(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESI2(UChi_11,psi_31,Z4)\
|
VACCTIMESI2(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESI2(UChi_12,psi_32,Z5)\
|
VACCTIMESI2(UChi_12,psi_32,Z5) \
|
||||||
);
|
);
|
||||||
|
|
||||||
#define ZM_RECON_ACCUM __asm__ ( \
|
#define ZM_RECON_ACCUM __asm__ ( \
|
||||||
VACCTIMESI0(UChi_00,psi_20,Z0)\
|
VACCTIMESI0(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI0(UChi_10,psi_30,Z3)\
|
VACCTIMESMINUSI0(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESI0(UChi_01,psi_21,Z1)\
|
VACCTIMESI0(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI0(UChi_11,psi_31,Z4)\
|
VACCTIMESMINUSI0(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESI0(UChi_02,psi_22,Z2)\
|
VACCTIMESI0(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESMINUSI0(UChi_12,psi_32,Z5)\
|
VACCTIMESMINUSI0(UChi_12,psi_32,Z5) \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VACCTIMESI1(UChi_00,psi_20,Z0)\
|
VACCTIMESI1(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESMINUSI1(UChi_10,psi_30,Z3)\
|
VACCTIMESMINUSI1(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESI1(UChi_01,psi_21,Z1)\
|
VACCTIMESI1(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESMINUSI1(UChi_11,psi_31,Z4)\
|
VACCTIMESMINUSI1(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESI1(UChi_02,psi_22,Z2)\
|
VACCTIMESI1(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESMINUSI1(UChi_12,psi_32,Z5)\
|
VACCTIMESMINUSI1(UChi_12,psi_32,Z5) \
|
||||||
VACCTIMESI2(UChi_00,psi_20,Z0)\
|
VACCTIMESI2(UChi_00,psi_20,Z0) \
|
||||||
VACCTIMESI2(UChi_01,psi_21,Z1)\
|
VACCTIMESI2(UChi_01,psi_21,Z1) \
|
||||||
VACCTIMESI2(UChi_02,psi_22,Z2)\
|
VACCTIMESI2(UChi_02,psi_22,Z2) \
|
||||||
VACCTIMESMINUSI2(UChi_10,psi_30,Z3)\
|
VACCTIMESMINUSI2(UChi_10,psi_30,Z3) \
|
||||||
VACCTIMESMINUSI2(UChi_11,psi_31,Z4)\
|
VACCTIMESMINUSI2(UChi_11,psi_31,Z4) \
|
||||||
VACCTIMESMINUSI2(UChi_12,psi_32,Z5)\
|
VACCTIMESMINUSI2(UChi_12,psi_32,Z5) \
|
||||||
);
|
);
|
||||||
|
|
||||||
#define TP_RECON_ACCUM __asm__ ( \
|
#define TP_RECON_ACCUM __asm__ ( \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VADD(UChi_00,psi_20,psi_20)\
|
VADD(UChi_00,psi_20,psi_20) \
|
||||||
VADD(UChi_10,psi_30,psi_30)\
|
VADD(UChi_10,psi_30,psi_30) \
|
||||||
VADD(UChi_01,psi_21,psi_21)\
|
VADD(UChi_01,psi_21,psi_21) \
|
||||||
VADD(UChi_11,psi_31,psi_31)\
|
VADD(UChi_11,psi_31,psi_31) \
|
||||||
VADD(UChi_02,psi_22,psi_22)\
|
VADD(UChi_02,psi_22,psi_22) \
|
||||||
VADD(UChi_12,psi_32,psi_32) );
|
VADD(UChi_12,psi_32,psi_32) );
|
||||||
|
|
||||||
#define TM_RECON_ACCUM __asm__ ( \
|
#define TM_RECON_ACCUM __asm__ ( \
|
||||||
VADD(UChi_00,psi_00,psi_00)\
|
VADD(UChi_00,psi_00,psi_00) \
|
||||||
VADD(UChi_10,psi_10,psi_10)\
|
VADD(UChi_10,psi_10,psi_10) \
|
||||||
VADD(UChi_01,psi_01,psi_01)\
|
VADD(UChi_01,psi_01,psi_01) \
|
||||||
VADD(UChi_11,psi_11,psi_11)\
|
VADD(UChi_11,psi_11,psi_11) \
|
||||||
VADD(UChi_02,psi_02,psi_02)\
|
VADD(UChi_02,psi_02,psi_02) \
|
||||||
VADD(UChi_12,psi_12,psi_12)\
|
VADD(UChi_12,psi_12,psi_12) \
|
||||||
VSUB(UChi_00,psi_20,psi_20)\
|
VSUB(UChi_00,psi_20,psi_20) \
|
||||||
VSUB(UChi_10,psi_30,psi_30)\
|
VSUB(UChi_10,psi_30,psi_30) \
|
||||||
VSUB(UChi_01,psi_21,psi_21)\
|
VSUB(UChi_01,psi_21,psi_21) \
|
||||||
VSUB(UChi_11,psi_31,psi_31)\
|
VSUB(UChi_11,psi_31,psi_31) \
|
||||||
VSUB(UChi_02,psi_22,psi_22)\
|
VSUB(UChi_02,psi_22,psi_22) \
|
||||||
VSUB(UChi_12,psi_32,psi_32) );
|
VSUB(UChi_12,psi_32,psi_32) );
|
||||||
|
|
||||||
#define AVX512_PF_L1
|
#define AVX512_PF_L1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user