mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
support all avx10 bf16 instructions
This commit is contained in:
parent
6dc564185b
commit
842c3cc83f
3 changed files with 143 additions and 35 deletions
|
@ -198,6 +198,8 @@ void putX_XM()
|
|||
{ 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
{ 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
|
||||
{ 0x51, "vsqrtnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -401,6 +403,30 @@ void putX_X_XM_IMM()
|
|||
{ 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
|
||||
{ 0x58, "vaddnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x5E, "vdivnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x5F, "vmaxpbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x5D, "vminpbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x59, "vmulnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x2C, "vscalefpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16,false },
|
||||
{ 0x5C, "vsubnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
|
||||
{ 0x98, "vfmadd132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xA8, "vfmadd213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xB8, "vfmadd231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
|
||||
{ 0x9C, "vfnmadd132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xAC, "vfnmadd213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xBC, "vfnmadd231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
|
||||
{ 0x9A, "vfmsub132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xAA, "vfmsub213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xBA, "vfmsub231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
|
||||
{ 0x9E, "vfnmsub132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xAE, "vfnmsub213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xBE, "vfnmsub231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -758,6 +784,15 @@ void putX_XM_IMM()
|
|||
|
||||
{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
|
||||
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
||||
|
||||
{ 0x2F, "vcomsbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_N2, false },
|
||||
{ 0x42, "vgetexppbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x26, "vgetmantpbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
||||
{ 0x4C, "vrcppbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x56, "vreducenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
||||
{ 0x08, "vrndscalenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
||||
{ 0x4E, "vrsqrtpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0x2C, "vscalefpbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -946,41 +981,6 @@ void putFP16_2()
|
|||
|
||||
void putAVX10_BF16()
|
||||
{
|
||||
// x, x, op : 8
|
||||
const struct xxopTbl {
|
||||
const char *name;
|
||||
uint64_t type;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "vaddnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x58 },
|
||||
{ "vdivnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5E },
|
||||
{ "vmaxpbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5F },
|
||||
{ "vminpbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5D },
|
||||
{ "vmulnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x59 },
|
||||
{ "vscalefpbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x2C },
|
||||
{ "vsubnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5C },
|
||||
|
||||
{ "vfmadd132nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x98 },
|
||||
{ "vfmadd213nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xA8 },
|
||||
{ "vfmadd231nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xB8 },
|
||||
|
||||
{ "vfnmadd132nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x9C },
|
||||
{ "vfnmadd213nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xAC },
|
||||
{ "vfnmadd231nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xBC },
|
||||
|
||||
{ "vfmsub132nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x9A },
|
||||
{ "vfmsub213nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xAA },
|
||||
{ "vfmsub231nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xBA },
|
||||
|
||||
{ "vfnmsub132nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x9E },
|
||||
{ "vfnmsub213nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xAE },
|
||||
{ "vfnmsub231nepbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0xBE },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const xxopTbl& p = tbl[i];
|
||||
std::string s = type2String(p.type | T_MUST_EVEX);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code);
|
||||
}
|
||||
puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
|
||||
puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");
|
||||
}
|
||||
|
|
|
@ -109,3 +109,102 @@ vfpclasspbf16(k5|k5, yword[rax+128], 10);
|
|||
vfpclasspbf16(k6|k5, yword_b[rax+128], 11);
|
||||
vfpclasspbf16(k7|k5, zword[rax+128], 12);
|
||||
vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
|
||||
|
||||
vcomsbf16(xm2, xm3);
|
||||
vcomsbf16(xm2, ptr[rax+128]);
|
||||
|
||||
vgetexppbf16(xm1|k3, xmm2);
|
||||
vgetexppbf16(xm1|k3, ptr[rax+128]);
|
||||
vgetexppbf16(xm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexppbf16(ym1|k3, ymm2);
|
||||
vgetexppbf16(ym1|k3, ptr[rax+128]);
|
||||
vgetexppbf16(ym1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetexppbf16(zm1|k3, zmm2);
|
||||
vgetexppbf16(zm1|k3, ptr[rax+128]);
|
||||
vgetexppbf16(zm1|k3, ptr_b[rax+128]);
|
||||
|
||||
vgetmantpbf16(xm1|k3, xmm2, 3);
|
||||
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
|
||||
vgetmantpbf16(xm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantpbf16(ym1|k3, ymm2, 3);
|
||||
vgetmantpbf16(ym1|k3, ptr[rax+128], 5);
|
||||
vgetmantpbf16(ym1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vgetmantpbf16(zm1|k3, zmm2, 3);
|
||||
vgetmantpbf16(zm1|k3, ptr[rax+128], 5);
|
||||
vgetmantpbf16(zm1|k3, ptr_b[rax+128], 9);
|
||||
|
||||
vrcppbf16(xm1|k5, xm2);
|
||||
vrcppbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrcppbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcppbf16(ym1|k5, ym2);
|
||||
vrcppbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrcppbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrcppbf16(zm1|k5, zm2);
|
||||
vrcppbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrcppbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vreducenepbf16(xm1|k4, xm2, 1);
|
||||
vreducenepbf16(xm1|k4, ptr[rax+128], 1);
|
||||
vreducenepbf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducenepbf16(ym1|k4, ym2, 1);
|
||||
vreducenepbf16(ym1|k4, ptr[rax+128], 1);
|
||||
vreducenepbf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vreducenepbf16(zm1|k4, zm2, 1);
|
||||
vreducenepbf16(zm1|k4, ptr[rax+128], 1);
|
||||
vreducenepbf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalenepbf16(xm1|k4, xm2, 1);
|
||||
vrndscalenepbf16(xm1|k4, ptr[rax+128], 1);
|
||||
vrndscalenepbf16(xm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalenepbf16(ym1|k4, ym2, 1);
|
||||
vrndscalenepbf16(ym1|k4, ptr[rax+128], 1);
|
||||
vrndscalenepbf16(ym1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrndscalenepbf16(zm1|k4, zm2, 1);
|
||||
vrndscalenepbf16(zm1|k4, ptr[rax+128], 1);
|
||||
vrndscalenepbf16(zm1|k4, ptr_b[rax+128], 1);
|
||||
|
||||
vrsqrtpbf16(xm1|k5, xm2);
|
||||
vrsqrtpbf16(xm1|k5, ptr[rcx+128]);
|
||||
vrsqrtpbf16(xm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtpbf16(ym1|k5, ym2);
|
||||
vrsqrtpbf16(ym1|k5, ptr[rcx+128]);
|
||||
vrsqrtpbf16(ym1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vrsqrtpbf16(zm1|k5, zm2);
|
||||
vrsqrtpbf16(zm1|k5, ptr[rcx+128]);
|
||||
vrsqrtpbf16(zm1|k5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefpbf16(xm1|k5, xm5, xm2);
|
||||
vscalefpbf16(xm1|k5, xm5, ptr[rcx+128]);
|
||||
vscalefpbf16(xm1|k5, xm5, ptr_b[rcx+128]);
|
||||
|
||||
vscalefpbf16(ym1|k5, ym9, ym2);
|
||||
vscalefpbf16(ym1|k5, ym9, ptr[rcx+128]);
|
||||
vscalefpbf16(ym1|k5, ym9, ptr_b[rcx+128]);
|
||||
|
||||
vscalefpbf16(zm1|k5, zm30, zm2);
|
||||
vscalefpbf16(zm1|k5, zm30, ptr[rcx+128]);
|
||||
vscalefpbf16(zm1|k5, zm30, ptr_b[rcx+128]);
|
||||
|
||||
vsqrtnepbf16(xm5|k3, xmm4);
|
||||
vsqrtnepbf16(xm5|k3, ptr[rax+128]);
|
||||
vsqrtnepbf16(xm5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtnepbf16(ym5|k3, ymm4);
|
||||
vsqrtnepbf16(ym5|k3, ptr[rax+128]);
|
||||
vsqrtnepbf16(ym5|k3, ptr_b[rax+128]);
|
||||
|
||||
vsqrtnepbf16(zm5|k3, zmm4);
|
||||
vsqrtnepbf16(zm5|k3, ptr[rax+128]);
|
||||
vsqrtnepbf16(zm5|k3, ptr_b[rax+128]);
|
||||
|
|
|
@ -2202,6 +2202,7 @@ void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x
|
|||
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
|
||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||
void vcomsbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_EW0|T_MUST_EVEX, 0x2F); }
|
||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); }
|
||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
|
||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); }
|
||||
|
@ -2345,12 +2346,14 @@ void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0
|
|||
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
|
||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); }
|
||||
void vgetexppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x42); }
|
||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); }
|
||||
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); }
|
||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); }
|
||||
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0x43); }
|
||||
void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
|
||||
void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
|
||||
void vgetmantpbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x26, imm); }
|
||||
void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x26, imm); }
|
||||
void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x26, imm); }
|
||||
void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x26, imm); }
|
||||
|
@ -2549,14 +2552,17 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
|
|||
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
|
||||
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCB); }
|
||||
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCB); }
|
||||
void vrcppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); }
|
||||
void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); }
|
||||
void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4D); }
|
||||
void vreducenepbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x56, imm); }
|
||||
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x56, imm); }
|
||||
void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x56, imm); }
|
||||
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x56, imm); }
|
||||
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
|
||||
void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
|
||||
void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
|
||||
void vrndscalenepbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x08, imm); }
|
||||
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x09, imm); }
|
||||
void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x08, imm); }
|
||||
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x08, imm); }
|
||||
|
@ -2571,8 +2577,10 @@ void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 |
|
|||
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
|
||||
void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCD); }
|
||||
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCD); }
|
||||
void vrsqrtpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
|
||||
void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
|
||||
void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4F); }
|
||||
void vscalefpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); }
|
||||
void vscalefpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); }
|
||||
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x2C); }
|
||||
void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x2C); }
|
||||
|
@ -2596,6 +2604,7 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
|
|||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||
void vsqrtnepbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x51); }
|
||||
void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x51); }
|
||||
void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x51); }
|
||||
void vsubnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5C); }
|
||||
|
|
Loading…
Reference in a new issue