mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
add type of w(x, x, op) in avx10 bf16
This commit is contained in:
parent
864fd0c49c
commit
3ca7e64c63
3 changed files with 46 additions and 2 deletions
|
@ -946,18 +946,26 @@ void putFP16_2()
|
|||
|
||||
void putAVX10_BF16()
|
||||
{
|
||||
const struct Tbl {
|
||||
// x, x, op
|
||||
const struct xxopTbl {
|
||||
const char *name;
|
||||
uint64_t type;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "vaddnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x58 },
|
||||
{ "vdivnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5E },
|
||||
{ "vmaxpbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5F },
|
||||
{ "vminpbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5D },
|
||||
{ "vmulnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x59 },
|
||||
{ "vscalefpbf16", T_MAP6 | T_EW0 | T_YMM | T_B16, 0x2C },
|
||||
{ "vsubnepbf16", T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, 0x5C },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
const xxopTbl& p = tbl[i];
|
||||
std::string s = type2String(p.type | T_MUST_EVEX);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code);
|
||||
}
|
||||
// { "vrcppbf16", T_66 | T_MAP6 | T_EW0 | T_YMM | T_B16, 0x4C },
|
||||
}
|
||||
|
||||
void putFP16()
|
||||
|
|
|
@ -2,3 +2,33 @@ vaddnepbf16(xm1, xm2, xm3);
|
|||
vaddnepbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vaddnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vaddnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vdivnepbf16(xm1, xm2, xm3);
|
||||
vdivnepbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vdivnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vdivnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmaxpbf16(xm1, xm2, xm3);
|
||||
vmaxpbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmaxpbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmaxpbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vminpbf16(xm1, xm2, xm3);
|
||||
vminpbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vminpbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vminpbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vmulnepbf16(xm1, xm2, xm3);
|
||||
vmulnepbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vmulnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vmulnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vscalefpbf16(xm1, xm2, xm3);
|
||||
vscalefpbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vscalefpbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vscalefpbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
vsubnepbf16(xm1, xm2, xm3);
|
||||
vsubnepbf16(ym1|k1, ym2, ptr[rax+128]);
|
||||
vsubnepbf16(ym1|k1, ym2, ptr_b[rax+128]);
|
||||
vsubnepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
|
||||
|
|
|
@ -2259,6 +2259,7 @@ void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2
|
|||
void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
|
||||
void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
|
||||
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x42, imm); }
|
||||
void vdivnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5E); }
|
||||
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
|
||||
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); }
|
||||
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52); }
|
||||
|
@ -2350,8 +2351,10 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm)
|
|||
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3A, imm); }
|
||||
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x38, imm); }
|
||||
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3A, imm); }
|
||||
void vmaxpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5F); }
|
||||
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5F); }
|
||||
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5F); }
|
||||
void vminpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5D); }
|
||||
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5D); }
|
||||
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5D); }
|
||||
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
|
||||
|
@ -2372,6 +2375,7 @@ void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2,
|
|||
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
|
||||
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
|
||||
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
|
||||
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
|
||||
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
||||
|
@ -2555,6 +2559,7 @@ void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(
|
|||
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCD); }
|
||||
void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
|
||||
void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4F); }
|
||||
void vscalefpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); }
|
||||
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x2C); }
|
||||
void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x2C); }
|
||||
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x2C); }
|
||||
|
@ -2579,6 +2584,7 @@ void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
|
|||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||
void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x51); }
|
||||
void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x51); }
|
||||
void vsubnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5C); }
|
||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
||||
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
|
||||
|
|
Loading…
Reference in a new issue