mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
This commit is contained in:
parent
f6c66cf6b8
commit
08f71cee95
5 changed files with 99 additions and 17 deletions
|
@ -136,7 +136,7 @@ param|vnniEnc|avx10Enc
|
||||||
EvexEncoding|AVX512-VNNI|AVX10.2
|
EvexEncoding|AVX512-VNNI|AVX10.2
|
||||||
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
|
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
|
||||||
default|EvexEncoding|VexEncoding
|
default|EvexEncoding|VexEncoding
|
||||||
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds
|
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds
|
||||||
|
|
||||||
### Remark
|
### Remark
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
|
|
|
@ -467,16 +467,22 @@ void putX_X_XM_IMM_AVX10()
|
||||||
int sel;
|
int sel;
|
||||||
bool hasIMM;
|
bool hasIMM;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
|
// vpdpb[su,uu,ss]d[,s]
|
||||||
{ 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x50, "vpdpbssd", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
{ 0x51, "vpdpbssds", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x51, "vpdpbssds", T_F2|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
{ 0x50, "vpdpbsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x50, "vpdpbsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
{ 0x51, "vpdpbsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x51, "vpdpbsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
{ 0x50, "vpdpbuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x50, "vpdpbuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
{ 0x51, "vpdpbuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
{ 0x51, "vpdpbuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
#if 0
|
|
||||||
{ 0x50, "vpdpbuud", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
|
// vpdpw[su,us,uu]d[,s]
|
||||||
{ 0x51, "vpdpbuuds", T_MUST_EVEX | T_YMM | T_0F38 | T_EW0 | T_B32, false },
|
{ 0xD2, "vpdpwsud", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
#endif
|
{ 0xD3, "vpdpwsuds", T_F3|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
|
{ 0xD2, "vpdpwusd", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
|
{ 0xD3, "vpdpwusds", T_66|T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
|
{ 0xD2, "vpdpwuud", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
|
{ 0xD3, "vpdpwuuds", T_0F38|T_YMM, T_W0, T_EW0|T_B32, 1, false },
|
||||||
|
|
||||||
{ 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true },
|
{ 0x42, "vmpsadbw", T_0F3A|T_YMM, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1, true },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
|
|
@ -1901,6 +1901,7 @@ void put()
|
||||||
}
|
}
|
||||||
// avx-vnni-int8
|
// avx-vnni-int8
|
||||||
// avx-vnni-int16
|
// avx-vnni-int16
|
||||||
|
#if 0
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
uint8_t code;
|
uint8_t code;
|
||||||
|
@ -1914,12 +1915,12 @@ void put()
|
||||||
// { 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
// { 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||||
// { 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
// { 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
|
|
||||||
{ 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
// { 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
// { 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
// { 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
// { 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
// { 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM },
|
||||||
{ 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
// { 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -1927,6 +1928,7 @@ void put()
|
||||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void put32()
|
void put32()
|
||||||
|
|
|
@ -91,3 +91,77 @@ vpdpbuuds(ym1, ym2, ptr_b[rax+128]);
|
||||||
vpdpbuuds(zm1, zm2, zm3);
|
vpdpbuuds(zm1, zm2, zm3);
|
||||||
vpdpbuuds(zm1, zm2, ptr[rax+128]);
|
vpdpbuuds(zm1, zm2, ptr[rax+128]);
|
||||||
vpdpbuuds(zm1, zm2, ptr_b[rax+128]);
|
vpdpbuuds(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
//
|
||||||
|
vpdpwsud(xm1, xm2, xm3);
|
||||||
|
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsud(ym1, ym2, ym3);
|
||||||
|
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsud(zm1, zm2, zm3);
|
||||||
|
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
//
|
||||||
|
vpdpwsuds(xm1, xm2, xm3);
|
||||||
|
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsuds(ym1, ym2, ym3);
|
||||||
|
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsuds(zm1, zm2, zm3);
|
||||||
|
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
//
|
||||||
|
vpdpwsud(xm1, xm2, xm3);
|
||||||
|
vpdpwsud(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwsud(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsud(ym1, ym2, ym3);
|
||||||
|
vpdpwsud(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwsud(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsud(zm1, zm2, zm3);
|
||||||
|
vpdpwsud(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwsud(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
//
|
||||||
|
vpdpwsuds(xm1, xm2, xm3);
|
||||||
|
vpdpwsuds(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsuds(ym1, ym2, ym3);
|
||||||
|
vpdpwsuds(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwsuds(zm1, zm2, zm3);
|
||||||
|
vpdpwsuds(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwsuds(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
//
|
||||||
|
vpdpwuud(xm1, xm2, xm3);
|
||||||
|
vpdpwuud(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwuud(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwuud(ym1, ym2, ym3);
|
||||||
|
vpdpwuud(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwuud(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwuud(zm1, zm2, zm3);
|
||||||
|
vpdpwuud(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwuud(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
//
|
||||||
|
vpdpwuuds(xm1, xm2, xm3);
|
||||||
|
vpdpwuuds(xm1, xm2, ptr[rax+128]);
|
||||||
|
vpdpwuuds(xm1, xm2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwuuds(ym1, ym2, ym3);
|
||||||
|
vpdpwuuds(ym1, ym2, ptr[rax+128]);
|
||||||
|
vpdpwuuds(ym1, ym2, ptr_b[rax+128]);
|
||||||
|
|
||||||
|
vpdpwuuds(zm1, zm2, zm3);
|
||||||
|
vpdpwuuds(zm1, zm2, ptr[rax+128]);
|
||||||
|
vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
|
||||||
|
|
|
@ -1423,12 +1423,6 @@ void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding
|
||||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); }
|
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); }
|
||||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); }
|
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); }
|
||||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); }
|
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); }
|
||||||
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD2); }
|
|
||||||
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD3); }
|
|
||||||
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD2); }
|
|
||||||
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD3); }
|
|
||||||
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD2); }
|
|
||||||
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD3); }
|
|
||||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x36); }
|
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x36); }
|
||||||
|
@ -2451,6 +2445,12 @@ void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding
|
||||||
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
|
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
|
||||||
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
|
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
|
||||||
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
|
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
|
||||||
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
|
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
|
||||||
|
|
Loading…
Reference in a new issue