2016-03-31 14:51:32 +01:00
|
|
|
/*************************************************************************************
|
|
|
|
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
|
|
|
|
Source file: ./lib/simd/Avx512Asm.h
|
|
|
|
|
|
|
|
Copyright (C) 2015
|
|
|
|
|
|
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
|
|
*************************************************************************************/
|
|
|
|
/* END LEGAL */
|
|
|
|
// No guard can be multiply included as undef clearge of macros
|
|
|
|
#undef VZERO
|
|
|
|
#undef VMOV
|
|
|
|
#undef VLOAD
|
|
|
|
#undef VSTORE
|
|
|
|
#define VZERO(A) VZEROf(A)
|
|
|
|
#define VMOV(A,B) VMOVf(A,B)
|
|
|
|
#define VLOAD(OFF,PTR,DEST) VLOADf(OFF,PTR,DEST)
|
|
|
|
#define VSTORE(OFF,PTR,SRC) VSTOREf(OFF,PTR,SRC)
|
|
|
|
|
|
|
|
#undef VADD
|
|
|
|
#undef VSUB
|
|
|
|
#undef VMUL
|
|
|
|
#undef VMADD
|
|
|
|
#define VADD(A,B,C) VADDf(A,B,C)
|
|
|
|
#define VSUB(A,B,C) VSUBf(A,B,C)
|
|
|
|
#define VMUL(Uri,Uir,Chi) VMULf(Uri,Uir,Chi)
|
|
|
|
#define VMADD(Uri,Uir,Chi) VMADDf(Uri,Uir,Chi)
|
|
|
|
|
|
|
|
|
|
|
|
#undef VTIMESI
|
|
|
|
#undef VTIMESI0
|
|
|
|
#undef VTIMESI1
|
|
|
|
#undef VTIMESI2
|
|
|
|
#define VTIMESI(A,B,C) VTIMESIf(A,B,C)
|
|
|
|
#define VTIMESI0(A,B,C) VTIMESI0f(A,B,C)
|
|
|
|
#define VTIMESI1(A,B,C) VTIMESI1f(A,B,C)
|
|
|
|
#define VTIMESI2(A,B,C) VTIMESI2f(A,B,C)
|
|
|
|
|
|
|
|
#undef VTIMESMINUSI
|
|
|
|
#undef VTIMESMINUSI0
|
|
|
|
#undef VTIMESMINUSI1
|
|
|
|
#undef VTIMESMINUSI2
|
|
|
|
#define VTIMESMINUSI(A,B,C) VTIMESMINUSIf(A,B,C)
|
|
|
|
#define VTIMESMINUSI0(A,B,C) VTIMESMINUSI0f(A,B,C)
|
|
|
|
#define VTIMESMINUSI1(A,B,C) VTIMESMINUSI1f(A,B,C)
|
|
|
|
#define VTIMESMINUSI2(A,B,C) VTIMESMINUSI2f(A,B,C)
|
|
|
|
|
|
|
|
#undef VACCTIMESI
|
|
|
|
#undef VACCTIMESI0
|
|
|
|
#undef VACCTIMESI1
|
|
|
|
#undef VACCTIMESI2
|
|
|
|
#define VACCTIMESI(A,B,C) VACCTIMESIf(A,B,C)
|
|
|
|
#define VACCTIMESI0(A,B,C) VACCTIMESI0f(A,B,C)
|
|
|
|
#define VACCTIMESI1(A,B,C) VACCTIMESI1f(A,B,C)
|
|
|
|
#define VACCTIMESI2(A,B,C) VACCTIMESI2f(A,B,C)
|
|
|
|
|
|
|
|
#undef VACCTIMESMINUSI
|
|
|
|
#undef VACCTIMESMINUSI0
|
|
|
|
#undef VACCTIMESMINUSI1
|
|
|
|
#undef VACCTIMESMINUSI2
|
|
|
|
#define VACCTIMESMINUSI(A,B,C) VACCTIMESMINUSIf(A,B,C)
|
|
|
|
#define VACCTIMESMINUSI0(A,B,C) VACCTIMESMINUSI0f(A,B,C)
|
|
|
|
#define VACCTIMESMINUSI1(A,B,C) VACCTIMESMINUSI1f(A,B,C)
|
|
|
|
#define VACCTIMESMINUSI2(A,B,C) VACCTIMESMINUSI2f(A,B,C)
|
|
|
|
|
|
|
|
#undef VACCTIMESI1MEM
|
|
|
|
#undef VACCTIMESI2MEM
|
|
|
|
#define VACCTIMESI1MEM(A,ACC,O,P) VACCTIMESI1MEMf(A,ACC,O,P)
|
|
|
|
#define VACCTIMESI2MEM(A,ACC,O,P) VACCTIMESI2MEMf(A,ACC,O,P)
|
|
|
|
|
|
|
|
#undef VACCTIMESMINUSI1MEM
|
|
|
|
#undef VACCTIMESMINUSI2MEM
|
|
|
|
#define VACCTIMESMINUSI1MEM(A,ACC,O,P) VACCTIMESMINUSI1MEMf(A,ACC,O,P)
|
|
|
|
#define VACCTIMESMINUSI2MEM(A,ACC,O,P) VACCTIMESMINUSI2MEMf(A,ACC,O,P)
|
|
|
|
|
|
|
|
#undef VPERM0
|
|
|
|
#undef VPERM1
|
|
|
|
#undef VPERM2
|
|
|
|
#undef VPERM3
|
|
|
|
#define VPERM0(A,B) VPERM0f(A,B)
|
|
|
|
#define VPERM1(A,B) VPERM1f(A,B)
|
|
|
|
#define VPERM2(A,B) VPERM2f(A,B)
|
|
|
|
#define VPERM3(A,B) VPERM3f(A,B)
|
|
|
|
|
|
|
|
#undef VSHUFMEM
|
|
|
|
#undef VADDMEM
|
|
|
|
#undef VSUBMEM
|
|
|
|
#define VSHUFMEM(OFF,A,DEST) VSHUFMEMf(OFF,A,DEST)
|
|
|
|
#define VADDMEM(O,A,B,C) VADDMEMf(O,A,B,C)
|
|
|
|
#define VSUBMEM(O,A,B,C) VSUBMEMf(O,A,B,C)
|
|
|
|
|
|
|
|
#undef VMOVIDUP
|
|
|
|
#undef VMOVRDUP
|
2016-03-31 15:47:42 +01:00
|
|
|
#undef VMADDSUB
|
2016-03-31 14:51:32 +01:00
|
|
|
#undef VSHUF
|
|
|
|
#define VMOVIDUP(A,B,C) VMOVIDUPf(A,B,C)
|
|
|
|
#define VMOVRDUP(A,B,C) VMOVRDUPf(A,B,C)
|
|
|
|
#define VMADDSUB(A,B,accum) VMADDSUBf(A,B,accum)
|
|
|
|
#define VSHUF(A,B) VSHUFf(A,B)
|
|
|
|
|
|
|
|
#undef ZEND1
|
|
|
|
#undef ZEND2
|
|
|
|
#undef ZLOAD
|
|
|
|
#undef ZMUL
|
|
|
|
#undef ZMADD
|
|
|
|
#undef ZMULMEM2SP
|
|
|
|
#undef ZMADDMEM2SP
|
|
|
|
|
|
|
|
#define ZEND1(A,B,C) ZEND1f(A,B,C)
|
|
|
|
#define ZEND2(A,B,C) ZEND2f(A,B,C)
|
|
|
|
#define ZLOAD(A,B,C,D) ZLOADf(A,B,C,D)
|
|
|
|
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
|
|
|
|
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
|
|
|
|
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
|
|
|
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
|
|
|
|
2016-06-03 11:24:26 +01:00
|
|
|
#undef VRDUP
|
|
|
|
#undef VIDUP
|
|
|
|
#undef VMADDSUBMEM
|
|
|
|
#undef VMADDMEM
|
|
|
|
#undef VMULMEM
|
|
|
|
|
|
|
|
#define VRDUP(SRC,DEST) VRDUPf(SRC,DEST)
|
|
|
|
#define VIDUP(SRC,DEST) VIDUPf(SRC,DEST)
|
|
|
|
#define VMADDSUBMEM(O,P,B,accum) VMADDSUBMEMf(O,P,B,accum)
|
|
|
|
#define VMADDMEM(O,P,B,accum) VMADDMEMf(O,P,B,accum)
|
|
|
|
#define VMULMEM(O,P,B,accum) VMULMEMf(O,P,B,accum)
|
|
|
|
|
2016-12-08 16:43:28 +00:00
|
|
|
#undef VMADDRDUP
|
2016-06-03 11:24:26 +01:00
|
|
|
#undef VMADDSUBRDUP
|
|
|
|
#undef VMADDSUBIDUP
|
|
|
|
#undef VMULRDUP
|
|
|
|
#undef VMULIDUP
|
2016-12-08 16:43:28 +00:00
|
|
|
#define VMADDRDUP(O,P,B,accum) VMADDRDUPf(O,P,B,accum)
|
2016-06-03 11:24:26 +01:00
|
|
|
#define VMADDSUBRDUP(O,P,B,accum) VMADDSUBRDUPf(O,P,B,accum)
|
|
|
|
#define VMADDSUBIDUP(O,P,B,accum) VMADDSUBIDUPf(O,P,B,accum)
|
|
|
|
#define VMULRDUP(O,P,B,accum) VMULRDUPf(O,P,B,accum)
|
|
|
|
#define VMULIDUP(O,P,B,accum) VMULIDUPf(O,P,B,accum)
|
|
|
|
|