mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
This commit is contained in:
parent
46238d9845
commit
0c2f7fc6db
8 changed files with 44 additions and 35 deletions
|
@ -124,7 +124,7 @@ vpdpbusd(xm0, xm1, xm2); // VEX
|
|||
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
|
||||
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
|
||||
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
|
||||
setDefaultEncoding(VexEncoding, AVX10p2Encoding); // use 2nd argument.
|
||||
setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
|
||||
```
|
||||
|
||||
|
@ -133,9 +133,10 @@ Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.
|
|||
|
||||
param|vnniEnc|avx10Enc
|
||||
-|-|-
|
||||
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
|
||||
VexEncoding|AVX-VNNI|-
|
||||
EvexEncoding|AVX512-VNNI|-
|
||||
AVX10p2Encoding|-|AVX10.2
|
||||
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
|
||||
AVX10v2Encoding|-|AVX10.2
|
||||
default|EvexEncoding|VexEncoding
|
||||
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
||||
|
||||
|
|
|
@ -264,7 +264,6 @@ void putM_X()
|
|||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
|
||||
{ 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -1079,12 +1078,6 @@ void putFP16_2()
|
|||
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
|
||||
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
|
||||
}
|
||||
{
|
||||
uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||
std::string s = type2String(type);
|
||||
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
|
||||
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16()
|
||||
|
|
|
@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
|
|||
avx10_test: avx10_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
|
||||
|
||||
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt
|
||||
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
|
||||
xed_test:
|
||||
@set -e; \
|
||||
for target in $(addprefix avx10/, $(TEST_FILES)); do \
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// AVX10 integer and FP16 VNNI, media and zero-extending
|
||||
vdpphps(xm1, xm2, xm3);
|
||||
vdpphps(xm1, xm2, ptr[rax+128]);
|
||||
vdpphps(xm1, xm2, ptr_b[rax+128]);
|
||||
|
@ -168,5 +169,11 @@ vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
|
|||
|
||||
//
|
||||
vmovd(xm10, xm20);
|
||||
vmovd(xm1, xm2);
|
||||
vmovd(xm10, ptr[rax+128]);
|
||||
vmovd(ptr[rax+128], xm30);
|
||||
//
|
||||
vmovw(xm1, xm20);
|
||||
vmovw(xm1, xm2);
|
||||
vmovw(xm3, ptr [rax+0x40]);
|
||||
vmovw(ptr [rax+0x40], xm7);
|
||||
|
|
|
@ -355,10 +355,6 @@ vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
|
|||
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
|
||||
vmovsh(ptr [rax+0x40]|k1, xmm1);
|
||||
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
|
||||
vmovw(xmm1, r13d);
|
||||
vmovw(xmm3, ptr [rax+0x40]);
|
||||
vmovw(r9d, xmm1);
|
||||
vmovw(ptr [rax+0x40], xmm7);
|
||||
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
|
||||
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
|
||||
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
|
||||
|
|
|
@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
|
|||
Code()
|
||||
: Xbyak::CodeGenerator(4096*8)
|
||||
{
|
||||
setDefaultEncoding(EvexEncoding, AVX10p2Encoding);
|
||||
setDefaultEncoding(EvexEncoding, AVX10v2Encoding);
|
||||
#include "tmp.cpp"
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1674,8 +1674,8 @@ typedef enum {
|
|||
DefaultEncoding,
|
||||
VexEncoding,
|
||||
EvexEncoding,
|
||||
AVX512Encoding = EvexEncoding,
|
||||
AVX10p2Encoding
|
||||
PreAVX10v2Encoding = EvexEncoding,
|
||||
AVX10v2Encoding
|
||||
} PreferredEncoding;
|
||||
|
||||
class CodeGenerator : public CodeArray {
|
||||
|
@ -3177,9 +3177,9 @@ public:
|
|||
#endif
|
||||
|
||||
// set default encoding
|
||||
// vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex)
|
||||
// avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding)
|
||||
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)
|
||||
// vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding)
|
||||
// avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding)
|
||||
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding)
|
||||
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
|
||||
|
||||
void bswap(const Reg32e& r)
|
||||
|
@ -3194,7 +3194,8 @@ public:
|
|||
}
|
||||
db(0xC8 + (idx & 7));
|
||||
}
|
||||
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
|
||||
// AVX10 zero-extending for vmovd, vmovw
|
||||
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit)
|
||||
{
|
||||
const Operand *p1 = &op1;
|
||||
const Operand *p2 = &op2;
|
||||
|
@ -3208,18 +3209,32 @@ public:
|
|||
std::swap(p1, p2);
|
||||
rev = !rev;
|
||||
}
|
||||
if (getEncoding(encoding, 1) == AVX10p2Encoding) {
|
||||
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) {
|
||||
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6);
|
||||
return;
|
||||
}
|
||||
int sel = -1;
|
||||
if (getEncoding(encoding, 1) == AVX10v2Encoding) {
|
||||
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
|
||||
} else {
|
||||
if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) {
|
||||
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E);
|
||||
return;
|
||||
}
|
||||
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
|
||||
}
|
||||
XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
||||
}
|
||||
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
|
||||
{
|
||||
const uint64_t typeTbl[] = {
|
||||
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
|
||||
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
|
||||
};
|
||||
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
|
||||
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32);
|
||||
}
|
||||
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
|
||||
{
|
||||
const uint64_t typeTbl[] = {
|
||||
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
|
||||
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
|
||||
};
|
||||
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
|
||||
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64);
|
||||
}
|
||||
/*
|
||||
use single byte nop if useMultiByteNop = false
|
||||
|
|
|
@ -2422,9 +2422,6 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F
|
|||
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); }
|
||||
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
|
||||
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
|
||||
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
|
||||
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); }
|
||||
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
|
||||
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
|
||||
|
|
Loading…
Reference in a new issue