add vcmppbf16, vfpclasspbf16

This commit is contained in:
MITSUNARI Shigeo 2024-10-11 13:02:17 +09:00
parent a84866bcbc
commit 6dc564185b
3 changed files with 22 additions and 2 deletions

View file

@ -946,7 +946,7 @@ void putFP16_2()
void putAVX10_BF16()
{
// x, x, op
// x, x, op : 8
const struct xxopTbl {
const char *name;
uint64_t type;
@ -981,7 +981,8 @@ void putAVX10_BF16()
std::string s = type2String(p.type | T_MUST_EVEX);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%2X); }\n" , p.name, s.c_str(), p.code);
}
// { "vrcppbf16", T_66 | T_MAP6 | T_EW0 | T_YMM | T_B16, 0x4C },
puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");
}
void putFP16()

View file

@ -92,3 +92,20 @@ vfnmsub231nepbf16(xm1, xm2, xm3);
vfnmsub231nepbf16(ym1|k1, ym2, ptr[rax+128]);
vfnmsub231nepbf16(ym1|k1, ym2, ptr_b[rax+128]);
vfnmsub231nepbf16(zm1|k2|T_z, zm2, ptr_b[rax+128]);
vcmppbf16(k1, xm5, xm4, 5);
vcmppbf16(k2, ym5, ym4, 6);
vcmppbf16(k3, ym15, ptr_b[rax+128], 7);
vcmppbf16(k4, zm30, zm20, 8);
vcmppbf16(k5, zm1, ptr[rax+128], 9);
vcmppbf16(k6, zm10, ptr_b[rax+128], 10);
vfpclasspbf16(k1, xm4, 5);
vfpclasspbf16(k2|k5, ym4, 6);
vfpclasspbf16(k3|k5, zm20, 7);
vfpclasspbf16(k3|k5, xword[rax+128], 8);
vfpclasspbf16(k3, xword_b[rax+128], 9);
vfpclasspbf16(k5|k5, yword[rax+128], 10);
vfpclasspbf16(k6|k5, yword_b[rax+128], 11);
vfpclasspbf16(k7|k5, zword[rax+128], 12);
vfpclasspbf16(k7|k5, zword_b[rax+128], 13);

View file

@ -2176,6 +2176,7 @@ void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x,
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0xC2, imm); }
void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0xC2, imm); }
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0xC2, imm); }
@ -2325,6 +2326,7 @@ void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
void vfnmsub231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBF); }
void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }