From 5f942b59145c66b514085216320ac0de96f841cf Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 14 Oct 2024 05:15:24 +0900 Subject: [PATCH] under developing saturation --- gen/gen_avx512.cpp | 22 +++++ test/Makefile | 2 +- test/avx10/saturation.txt | 202 ++++++++++++++++++++++++++++++++++++++ xbyak/xbyak_mnemonic.h | 15 +++ 4 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 test/avx10/saturation.txt diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index cfe0ac6..46b00b5 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -209,6 +209,28 @@ void putX_XM() { 0x2E, "vucomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 }, { 0x2E, "vucomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 }, { 0x2E, "vucomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 }, + + // 13.1 + { 0x69, "vcvtnebf162ibs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 }, + { 0x6B, "vcvtnebf162iubs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 }, + { 0x68, "vcvttnebf162ibs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 }, + { 0x6A, "vcvttnebf162iubs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 }, + // 13.3 + { 0x6D, "vcvttpd2qqs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW1 | T_B64 | T_SAE_Y | T_SAE_Z }, + // 13.5 + { 0x6C, "vcvttpd2uqqs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW1 | T_B64 | T_SAE_Y | T_SAE_Z }, + // 13.6 + { 0x69, "vcvtph2ibs", T_MUST_EVEX | T_YMM | T_MAP5 | T_EW0 | T_B16 | T_ER_Y | T_ER_Z }, + { 0x6B, "vcvtph2iubs", T_MUST_EVEX | T_YMM | T_MAP5 | T_EW0 | T_B16 | T_ER_Y | T_ER_Z }, + { 0x68, "vcvttph2ibs", T_MUST_EVEX | T_YMM | T_MAP5 | T_EW0 | T_B16 | T_ER_Y | T_ER_Z }, + { 0x6A, "vcvttph2iubs", T_MUST_EVEX | T_YMM | T_MAP5 | T_EW0 | T_B16 | T_ER_Y | T_ER_Z }, + // 13.7 + { 0x6D, "vcvttps2dqs", T_MUST_EVEX | T_YMM | T_MAP5 | T_EW0 | T_B32 | T_SAE_Y | T_SAE_Z }, + // 13.8 + { 0x69, "vcvtps2ibs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW0 | T_B32 | T_ER_Y | T_ER_Z }, + { 0x6B, "vcvtps2iubs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW0 | T_B32 | T_ER_Y | T_ER_Z }, + { 0x68, "vcvttps2ibs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW0 | T_B32 | T_ER_Y | T_ER_Z }, + { 0x6A, "vcvttps2iubs", T_MUST_EVEX | T_YMM | T_66 | T_MAP5 | T_EW0 | T_B32 | T_ER_Y | T_ER_Z }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/Makefile b/test/Makefile index 336dcaf..d5613e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,7 +60,7 @@ apx: apx.cpp $(XBYAK_INC) avx10_test: avx10_test.cpp $(XBYAK_INC) $(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64 -TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt +TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt convert.txt minmax.txt saturation.txt xed_test: @for target in $(addprefix avx10/, $(TEST_FILES)); do ./test_by_xed.sh $$target; done diff --git a/test/avx10/saturation.txt b/test/avx10/saturation.txt new file mode 100644 index 0000000..591960e --- /dev/null +++ b/test/avx10/saturation.txt @@ -0,0 +1,202 @@ +// +vcvtnebf162ibs(xm1, xm2); +vcvtnebf162ibs(xm1, ptr[rax+128]); +vcvtnebf162ibs(xm1, ptr_b[rax+128]); + +vcvtnebf162ibs(ym1, ym2); +vcvtnebf162ibs(ym1, ptr[rax+128]); +vcvtnebf162ibs(ym1, ptr_b[rax+128]); + +vcvtnebf162ibs(zm1, zm2); +vcvtnebf162ibs(zm1, ptr[rax+128]); +vcvtnebf162ibs(zm1, ptr_b[rax+128]); +// +vcvtnebf162iubs(xm1, xm2); +vcvtnebf162iubs(xm1, ptr[rax+128]); +vcvtnebf162iubs(xm1, ptr_b[rax+128]); + +vcvtnebf162iubs(ym1, ym2); +vcvtnebf162iubs(ym1, ptr[rax+128]); +vcvtnebf162iubs(ym1, ptr_b[rax+128]); + +vcvtnebf162iubs(zm1, zm2); +vcvtnebf162iubs(zm1, ptr[rax+128]); +vcvtnebf162iubs(zm1, ptr_b[rax+128]); +// +vcvttnebf162ibs(xm1, xm2); +vcvttnebf162ibs(xm1, ptr[rax+128]); +vcvttnebf162ibs(xm1, ptr_b[rax+128]); + +vcvttnebf162ibs(ym1, ym2); +vcvttnebf162ibs(ym1, ptr[rax+128]); +vcvttnebf162ibs(ym1, ptr_b[rax+128]); + +vcvttnebf162ibs(zm1, zm2); +vcvttnebf162ibs(zm1, ptr[rax+128]); +vcvttnebf162ibs(zm1, ptr_b[rax+128]); +// +vcvttnebf162iubs(xm1, xm2); +vcvttnebf162iubs(xm1, ptr[rax+128]); +vcvttnebf162iubs(xm1, ptr_b[rax+128]); + +vcvttnebf162iubs(ym1, ym2); +vcvttnebf162iubs(ym1, ptr[rax+128]); +vcvttnebf162iubs(ym1, ptr_b[rax+128]); + +vcvttnebf162iubs(zm1, zm2); +vcvttnebf162iubs(zm1, ptr[rax+128]); +vcvttnebf162iubs(zm1, ptr_b[rax+128]); +// +vcvttpd2qqs(xm1, xm2); +vcvttpd2qqs(xm1, ptr[rax+128]); +vcvttpd2qqs(xm1, ptr_b[rax+128]); + +vcvttpd2qqs(ym1, ym2); +vcvttpd2qqs(ym1, ym2|T_sae); +vcvttpd2qqs(ym1, ptr[rax+128]); +vcvttpd2qqs(ym1, ptr_b[rax+128]); + +vcvttpd2qqs(zm1, zm2); +vcvttpd2qqs(zm1, zm2|T_sae); +vcvttpd2qqs(zm1, ptr[rax+128]); +vcvttpd2qqs(zm1, ptr_b[rax+128]); +// +vcvttpd2uqqs(xm1, xm2); +vcvttpd2uqqs(xm1, ptr[rax+128]); +vcvttpd2uqqs(xm1, ptr_b[rax+128]); + +vcvttpd2uqqs(ym1, ym2); +vcvttpd2uqqs(ym1, ym2|T_sae); +vcvttpd2uqqs(ym1, ptr[rax+128]); +vcvttpd2uqqs(ym1, ptr_b[rax+128]); + +vcvttpd2uqqs(zm1, zm2); +vcvttpd2uqqs(zm1, zm2|T_sae); +vcvttpd2uqqs(zm1, ptr[rax+128]); +vcvttpd2uqqs(zm1, ptr_b[rax+128]); +// +vcvtph2ibs(xm1, xm2); +vcvtph2ibs(xm1, ptr[rax+128]); +vcvtph2ibs(xm1, ptr_b[rax+128]); + +vcvtph2ibs(ym1, ym2); +vcvtph2ibs(ym1, ym2|T_rd_sae); +vcvtph2ibs(ym1, ptr[rax+128]); +vcvtph2ibs(ym1, ptr_b[rax+128]); + +vcvtph2ibs(zm1, zm2); +vcvtph2ibs(zm1, zm2|T_ru_sae); +vcvtph2ibs(zm1, ptr[rax+128]); +vcvtph2ibs(zm1, ptr_b[rax+128]); +// +vcvtph2iubs(xm1, xm2); +vcvtph2iubs(xm1, ptr[rax+128]); +vcvtph2iubs(xm1, ptr_b[rax+128]); + +vcvtph2iubs(ym1, ym2); +vcvtph2iubs(ym1, ym2|T_rd_sae); +vcvtph2iubs(ym1, ptr[rax+128]); +vcvtph2iubs(ym1, ptr_b[rax+128]); + +vcvtph2iubs(zm1, zm2); +vcvtph2iubs(zm1, zm2|T_ru_sae); +vcvtph2iubs(zm1, ptr[rax+128]); +vcvtph2iubs(zm1, ptr_b[rax+128]); +// +vcvttph2ibs(xm1, xm2); +vcvttph2ibs(xm1, ptr[rax+128]); +vcvttph2ibs(xm1, ptr_b[rax+128]); + +vcvttph2ibs(ym1, ym2); +vcvttph2ibs(ym1, ym2|T_rd_sae); +vcvttph2ibs(ym1, ptr[rax+128]); +vcvttph2ibs(ym1, ptr_b[rax+128]); + +vcvttph2ibs(zm1, zm2); +vcvttph2ibs(zm1, zm2|T_ru_sae); +vcvttph2ibs(zm1, ptr[rax+128]); +vcvttph2ibs(zm1, ptr_b[rax+128]); +// +vcvttph2iubs(xm1, xm2); +vcvttph2iubs(xm1, ptr[rax+128]); +vcvttph2iubs(xm1, ptr_b[rax+128]); + +vcvttph2iubs(ym1, ym2); +vcvttph2iubs(ym1, ym2|T_rd_sae); +vcvttph2iubs(ym1, ptr[rax+128]); +vcvttph2iubs(ym1, ptr_b[rax+128]); + +vcvttph2iubs(zm1, zm2); +vcvttph2iubs(zm1, zm2|T_ru_sae); +vcvttph2iubs(zm1, ptr[rax+128]); +vcvttph2iubs(zm1, ptr_b[rax+128]); +// +vcvttps2dqs(xm1, xm2); +vcvttps2dqs(xm1, ptr[rax+128]); +vcvttps2dqs(xm1, ptr_b[rax+128]); + +vcvttps2dqs(ym1, ym2); +vcvttps2dqs(ym1, ym2|T_sae); +vcvttps2dqs(ym1, ptr[rax+128]); +vcvttps2dqs(ym1, ptr_b[rax+128]); + +vcvttps2dqs(zm1, zm2); +vcvttps2dqs(zm1, zm2|T_sae); +vcvttps2dqs(zm1, ptr[rax+128]); +vcvttps2dqs(zm1, ptr_b[rax+128]); +// +vcvtps2ibs(xm1, xm2); +vcvtps2ibs(xm1, ptr[rax+128]); +vcvtps2ibs(xm1, ptr_b[rax+128]); + +vcvtps2ibs(ym1, ym2); +vcvtps2ibs(ym1, ym2|T_rd_sae); +vcvtps2ibs(ym1, ptr[rax+128]); +vcvtps2ibs(ym1, ptr_b[rax+128]); + +vcvtps2ibs(zm1, zm2); +vcvtps2ibs(zm1, zm2|T_ru_sae); +vcvtps2ibs(zm1, ptr[rax+128]); +vcvtps2ibs(zm1, ptr_b[rax+128]); +// +vcvtps2iubs(xm1, xm2); +vcvtps2iubs(xm1, ptr[rax+128]); +vcvtps2iubs(xm1, ptr_b[rax+128]); + +vcvtps2iubs(ym1, ym2); +vcvtps2iubs(ym1, ym2|T_rd_sae); +vcvtps2iubs(ym1, ptr[rax+128]); +vcvtps2iubs(ym1, ptr_b[rax+128]); + +vcvtps2iubs(zm1, zm2); +vcvtps2iubs(zm1, zm2|T_ru_sae); +vcvtps2iubs(zm1, ptr[rax+128]); +vcvtps2iubs(zm1, ptr_b[rax+128]); +// +vcvttps2ibs(xm1, xm2); +vcvttps2ibs(xm1, ptr[rax+128]); +vcvttps2ibs(xm1, ptr_b[rax+128]); + +vcvttps2ibs(ym1, ym2); +vcvttps2ibs(ym1, ym2|T_rd_sae); +vcvttps2ibs(ym1, ptr[rax+128]); +vcvttps2ibs(ym1, ptr_b[rax+128]); + +vcvttps2ibs(zm1, zm2); +vcvttps2ibs(zm1, zm2|T_ru_sae); +vcvttps2ibs(zm1, ptr[rax+128]); +vcvttps2ibs(zm1, ptr_b[rax+128]); +// +vcvttps2iubs(xm1, xm2); +vcvttps2iubs(xm1, ptr[rax+128]); +vcvttps2iubs(xm1, ptr_b[rax+128]); + +vcvttps2iubs(ym1, ym2); +vcvttps2iubs(ym1, ym2|T_rd_sae); +vcvttps2iubs(ym1, ptr[rax+128]); +vcvttps2iubs(ym1, ptr_b[rax+128]); + +vcvttps2iubs(zm1, zm2); +vcvttps2iubs(zm1, zm2|T_ru_sae); +vcvttps2iubs(zm1, ptr[rax+128]); +vcvttps2iubs(zm1, ptr_b[rax+128]); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index b4cb11c..4db4f9e 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -2202,6 +2202,8 @@ void vcvtne2ph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X void vcvtne2ph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); } void vcvtne2ph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); } void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); } +void vcvtnebf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x69); } +void vcvtnebf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x6B); } void vcvtneph2bf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } void vcvtneph2bf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } void vcvtneph2hf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); } @@ -2212,6 +2214,8 @@ void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0 void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); } void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); } void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x5B); } +void vcvtph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x69); } +void vcvtph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x6B); } void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x5A); } void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x13); } void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x7B); } @@ -2219,6 +2223,8 @@ void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x79); } void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } +void vcvtps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x69); } +void vcvtps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x6B); } void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW0|T_ER_Z|T_MUST_EVEX|T_B32, 0x1D); } void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x7B); } void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x79); } @@ -2235,15 +2241,24 @@ void vcvtsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3 void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); } void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x1D); } void vcvtss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); } +void vcvttnebf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x68); } +void vcvttnebf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x6A); } void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x7A); } +void vcvttpd2qqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6D); } void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); } void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); } +void vcvttpd2uqqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6C); } void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x5B); } +void vcvttph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x68); } +void vcvttph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x6A); } void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x7A); } void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x78); } void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x78); } void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); } void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); } +void vcvttps2dqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x6D); } +void vcvttps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x68); } +void vcvttps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x6A); } void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x7A); } void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x78); } void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x78); }