mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
add avx10 compare and convert instructions
This commit is contained in:
parent
8457f52cbb
commit
56fc5457eb
6 changed files with 278 additions and 14 deletions
|
@ -137,8 +137,6 @@ void putVcmp()
|
|||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
|
||||
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
|
||||
}
|
||||
|
||||
void putVcmpAlias()
|
||||
|
@ -200,6 +198,17 @@ void putX_XM()
|
|||
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
|
||||
{ 0x51, "vsqrtnepbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
|
||||
{ 0x2F, "vcomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||
{ 0x2E, "vucomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||
|
||||
{ 0x2F, "vcomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
||||
{ 0x2F, "vcomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||
{ 0x2F, "vcomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
||||
|
||||
{ 0x2E, "vucomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
||||
{ 0x2E, "vucomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||
{ 0x2E, "vucomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -427,6 +436,12 @@ void putX_X_XM_IMM()
|
|||
{ 0x9E, "vfnmsub132nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xAE, "vfnmsub213nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
{ 0xBE, "vfnmsub231nepbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||
|
||||
{ 0x67, "vcvt2ps2phx", T_MUST_EVEX | T_66 | T_0F38 | T_EW0 | T_YMM | T_B32 | T_ER_Y | T_ER_Z, false },
|
||||
{ 0x74, "vcvtne2ph2bf8", T_MUST_EVEX | T_F2 | T_0F38 | T_EW0 | T_YMM | T_B16 | T_N1, false },
|
||||
{ 0x74, "vcvtne2ph2bf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
|
||||
{ 0x18, "vcvtne2ph2hf8", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
|
||||
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -979,12 +994,6 @@ void putFP16_2()
|
|||
}
|
||||
}
|
||||
|
||||
void putAVX10_BF16()
|
||||
{
|
||||
puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
|
||||
puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");
|
||||
}
|
||||
|
||||
void putFP16()
|
||||
{
|
||||
putFP16_1();
|
||||
|
@ -993,9 +1002,39 @@ void putFP16()
|
|||
putFP16_2();
|
||||
}
|
||||
|
||||
void putAVX10()
|
||||
void putAVX10_2()
|
||||
{
|
||||
putAVX10_BF16();
|
||||
puts("void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }");
|
||||
puts("void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }");
|
||||
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
uint64_t type;
|
||||
} tbl1[] = {
|
||||
{ 0x74, "vcvtbiasph2bf8", T_MUST_EVEX | T_0F38 | T_EW0 |T_YMM | T_B16 },
|
||||
{ 0x74, "vcvtbiasph2bf8s", T_MUST_EVEX | T_MAP5 | T_EW0 |T_YMM | T_B16 },
|
||||
{ 0x18, "vcvtbiasph2hf8", T_MUST_EVEX | T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
{ 0x1B, "vcvtbiasph2hf8s", T_MUST_EVEX | T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl1); i++) {
|
||||
const Tbl *p = &tbl1[i];
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, %s, 0x%02X); }\n" , p->name, s.c_str(), p->code);
|
||||
}
|
||||
puts("void vcvthf82ph(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_N1, 0x1E); }");
|
||||
|
||||
const Tbl tbl2[] = {
|
||||
{ 0x74, "vcvtneph2bf8", T_MUST_EVEX | T_F3 | T_0F38 | T_EW0 | T_YMM | T_B16 },
|
||||
{ 0x74, "vcvtneph2bf8s", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
{ 0x18, "vcvtneph2hf8", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
{ 0x1B, "vcvtneph2hf8s", T_MUST_EVEX | T_F3 |T_MAP5 | T_EW0 | T_YMM | T_B16 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl2); i++) {
|
||||
const Tbl *p = &tbl2[i];
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n" , p->name, s.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *[])
|
||||
|
@ -1023,5 +1062,5 @@ int main(int argc, char *[])
|
|||
putScatter();
|
||||
putV4FMA();
|
||||
putFP16();
|
||||
putAVX10();
|
||||
putAVX10_2();
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC)
|
|||
avx10_test: avx10_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
|
||||
|
||||
TEST_FILES=old.txt new-ymm.txt bf16.txt
|
||||
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt
|
||||
xed_test:
|
||||
@for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done
|
||||
|
||||
|
|
17
test/avx10/comp.txt
Normal file
17
test/avx10/comp.txt
Normal file
|
@ -0,0 +1,17 @@
|
|||
vcomxsd(xm1, xm2|T_sae);
|
||||
vcomxsd(xm1, ptr[rax+128]);
|
||||
|
||||
vcomxsh(xm1, xm2|T_sae);
|
||||
vcomxsh(xm1, ptr[rax+128]);
|
||||
|
||||
vcomxss(xm1, xm2|T_sae);
|
||||
vcomxss(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxsd(xm1, xm2|T_sae);
|
||||
vucomxsd(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxsh(xm1, xm2|T_sae);
|
||||
vucomxsh(xm1, ptr[rax+128]);
|
||||
|
||||
vucomxss(xm1, xm2|T_sae);
|
||||
vucomxss(xm1, ptr[rax+128]);
|
176
test/avx10/convert.txt
Normal file
176
test/avx10/convert.txt
Normal file
|
@ -0,0 +1,176 @@
|
|||
vcvt2ps2phx(xm1|k5, xm2, xm3);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr[rax+128]);
|
||||
vcvt2ps2phx(xm1|k5, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ps2phx(ym1|k5, ym2, ym3);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr[rax+128]);
|
||||
vcvt2ps2phx(ym1|k5, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvt2ps2phx(zm1|k5, zm2, zm3);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr[rax+128]);
|
||||
vcvt2ps2phx(zm1|k5, zm2, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2bf8s
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2bf8s(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
// vcvtbiasph2hf8s
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, xm5);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(xm1|k2, xm3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ym5);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(xm1|k2, ym3, ptr_b[rax+128]);
|
||||
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, zm5);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr[rax+128]);
|
||||
vcvtbiasph2hf8s(ym1|k2, zm3, ptr_b[rax+128]);
|
||||
|
||||
vcvthf82ph(xm1|k5|T_z, xm2);
|
||||
vcvthf82ph(xm1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
vcvthf82ph(ym1|k5|T_z, xm2);
|
||||
vcvthf82ph(ym1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
vcvthf82ph(zm1|k5|T_z, ym2);
|
||||
vcvthf82ph(zm1|k5|T_z, ptr[rax+128]);
|
||||
|
||||
//
|
||||
vcvtne2ph2bf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvtne2ph2bf8(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2bf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvtne2ph2bf8(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2bf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvtne2ph2bf8(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvtne2ph2bf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvtne2ph2bf8s(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8s(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2bf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvtne2ph2bf8s(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8s(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2bf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvtne2ph2bf8s(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvtne2ph2bf8s(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvtne2ph2hf8(xm1|k4|T_z, xm2, xm3);
|
||||
vcvtne2ph2hf8(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2hf8(ym1|k4|T_z, ym2, ym3);
|
||||
vcvtne2ph2hf8(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2hf8(zm1|k4|T_z, zm2, zm3);
|
||||
vcvtne2ph2hf8(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
//
|
||||
vcvtne2ph2hf8s(xm1|k4|T_z, xm2, xm3);
|
||||
vcvtne2ph2hf8s(xm1|k4, xm2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8s(xm1|T_z, xm2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2hf8s(ym1|k4|T_z, ym2, ym3);
|
||||
vcvtne2ph2hf8s(ym1|k4, ym2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8s(ym1|T_z, ym2, ptr_b[rax+128]);
|
||||
|
||||
vcvtne2ph2hf8s(zm1|k4|T_z, zm2, zm3);
|
||||
vcvtne2ph2hf8s(zm1|k4, zm2, ptr[rax+128]);
|
||||
vcvtne2ph2hf8s(zm1|T_z, zm2, ptr_b[rax+128]);
|
||||
|
||||
// vcvtneph2bf8
|
||||
vcvtneph2bf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtneph2bf8(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtneph2bf8(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtneph2bf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtneph2bf8(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtneph2bf8(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtneph2bf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtneph2bf8(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtneph2bf8(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtneph2bf8s
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtneph2bf8s(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtneph2bf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtneph2bf8s(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtneph2bf8s(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtneph2hf8
|
||||
vcvtneph2hf8(xmm1|k2|T_z, xmm2);
|
||||
vcvtneph2hf8(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtneph2hf8(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtneph2hf8(xmm1|k2|T_z, ymm2);
|
||||
vcvtneph2hf8(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtneph2hf8(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtneph2hf8(ymm1|k2|T_z, zmm2);
|
||||
vcvtneph2hf8(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtneph2hf8(ymm1|k2|T_z, zword_b[rax+128]);
|
||||
|
||||
// vcvtneph2hf8s
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, xmm2);
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, xword [rax+128]);
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, xword_b[rax+128]);
|
||||
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, ymm2);
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, yword[rax+128]);
|
||||
vcvtneph2hf8s(xmm1|k2|T_z, yword_b[rax+128]);
|
||||
|
||||
vcvtneph2hf8s(ymm1|k2|T_z, zmm2);
|
||||
vcvtneph2hf8s(ymm1|k2|T_z, zword[rax+128]);
|
||||
vcvtneph2hf8s(ymm1|k2|T_z, zword_b[rax+128]);
|
|
@ -2559,6 +2559,18 @@ private:
|
|||
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
|
||||
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||
}
|
||||
// (x, x, x/m), (x, y, y/m), (y, z, z/m)
|
||||
void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code)
|
||||
{
|
||||
int b1 = x1.getBit();
|
||||
int b2 = x2.getBit();
|
||||
int b3 = op.getBit();
|
||||
if ((b1 == 128 && (b2 == 128 || b2 == 256) && (b2 == b3 || op.isMEM())) || (b1 == 256 && b2 == 512 && (b3 == b2 || op.isMEM()))) {
|
||||
opVex(x1, &x2, op, type, code);
|
||||
return;
|
||||
}
|
||||
XBYAK_THROW(ERR_BAD_COMBINATION);
|
||||
}
|
||||
const Xmm& cvtIdx0(const Operand& x) const
|
||||
{
|
||||
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
|
||||
|
|
|
@ -2199,12 +2199,29 @@ void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x
|
|||
void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); }
|
||||
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
|
||||
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
|
||||
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
|
||||
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||
void vcomsbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_EW0|T_MUST_EVEX, 0x2F); }
|
||||
void vcomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F3|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||
void vcomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||
void vcomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F2|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||
void vcvt2ps2phx(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x67); }
|
||||
void vcvtbiasph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtbiasph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtbiasph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
|
||||
void vcvtbiasph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
|
||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); }
|
||||
void vcvthf82ph(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_N1, 0x1E); }
|
||||
void vcvtne2ph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtne2ph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtne2ph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
|
||||
void vcvtne2ph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
|
||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
|
||||
void vcvtneph2bf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtneph2bf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||
void vcvtneph2hf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
|
||||
void vcvtneph2hf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
|
||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); }
|
||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7B); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); }
|
||||
|
@ -2610,7 +2627,10 @@ void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
|
|||
void vsubnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5C); }
|
||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
||||
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
|
||||
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||
void vucomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F3|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||
void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F2|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||
#ifdef XBYAK64
|
||||
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
|
||||
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
|
||||
|
|
Loading…
Reference in a new issue