mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
This commit is contained in:
parent
f6c66cf6b8
commit
08f71cee95
5 changed files with 99 additions and 17 deletions
|
@ -136,7 +136,7 @@ param|vnniEnc|avx10Enc
|
|||
EvexEncoding|AVX512-VNNI|AVX10.2
|
||||
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
|
||||
default|EvexEncoding|VexEncoding
|
||||
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds
|
||||
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds
|
||||
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
|
|
|
@ -467,16 +467,22 @@ void putX_X_XM_IMM_AVX10()
|
|||
int sel;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
// vpdpb[su,uu,ss]d[,s]
|
||||
{ 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0x51, "vpdpbssds", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0x50, "vpdpbsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0x51, "vpdpbsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0x50, "vpdpbuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0x51, "vpdpbuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
#if 0
|
||||
{ 0x50, "vpdpbuud", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
|
||||
{ 0x51, "vpdpbuuds", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
|
||||
#endif
|
||||
|
||||
// vpdpw[su,us,uu]d[,s]
|
||||
{ 0xD2, "vpdpwsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0xD3, "vpdpwsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0xD2, "vpdpwusd", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0xD3, "vpdpwusds", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0xD2, "vpdpwuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
{ 0xD3, "vpdpwuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||
|
||||
{ 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
|
|
|
@ -1901,6 +1901,7 @@ void put()
|
|||
}
|
||||
// avx-vnni-int8
|
||||
// avx-vnni-int16
|
||||
#if 0
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
|
@ -1914,12 +1915,12 @@ void put()
|
|||
// { 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||
// { 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||
|
||||
{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
||||
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
||||
// { 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -1927,6 +1928,7 @@ void put()
|
|||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void put32()
|
||||
|
|
|
@ -91,3 +91,77 @@ vpdpbuuds(ym1, ym2, ptr_b[rax+128]);
|
|||
vpdpbuuds(zm1, zm2, zm3);
|
||||
vpdpbuuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpbuuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsud(xm1, xm2, xm3);
|
||||
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(ym1, ym2, ym3);
|
||||
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsud(zm1, zm2, zm3);
|
||||
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwsuds(xm1, xm2, xm3);
|
||||
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(ym1, ym2, ym3);
|
||||
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwsuds(zm1, zm2, zm3);
|
||||
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vpdpwuud(xm1, xm2, xm3);
|
||||
vpdpwuud(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwuud(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuud(ym1, ym2, ym3);
|
||||
vpdpwuud(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwuud(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuud(zm1, zm2, zm3);
|
||||
vpdpwuud(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwuud(zm1, zm2, ptr_b[rax+128]);
|
||||
//
|
||||
vpdpwuuds(xm1, xm2, xm3);
|
||||
vpdpwuuds(xm1, xm2, ptr[rax+128]);
|
||||
vpdpwuuds(xm1, xm2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuuds(ym1, ym2, ym3);
|
||||
vpdpwuuds(ym1, ym2, ptr[rax+128]);
|
||||
vpdpwuuds(ym1, ym2, ptr_b[rax+128]);
|
||||
|
||||
vpdpwuuds(zm1, zm2, zm3);
|
||||
vpdpwuuds(zm1, zm2, ptr[rax+128]);
|
||||
vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
|
||||
|
|
|
@ -1423,12 +1423,6 @@ void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding
|
|||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); }
|
||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); }
|
||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); }
|
||||
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD2); }
|
||||
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD3); }
|
||||
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD2); }
|
||||
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD3); }
|
||||
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD2); }
|
||||
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD3); }
|
||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x36); }
|
||||
|
@ -2451,6 +2445,12 @@ void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding
|
|||
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
|
||||
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
|
||||
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
|
||||
|
|
Loading…
Reference in a new issue