From 220ca76f41f8c78bc32b9e00c50f42344fdc5792 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 14 Oct 2024 19:40:52 +0900 Subject: [PATCH] vmovd supports avx10.2 --- gen/gen_code.cpp | 3 --- test/avx10/misc.txt | 5 +++++ test/test_by_xed.cpp | 2 +- xbyak/xbyak.h | 44 +++++++++++++++++++++++++++++------------- xbyak/xbyak_mnemonic.h | 2 -- 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index df4e5b9..c2db4ac 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1734,9 +1734,6 @@ void put() } // mov { - puts("void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); }"); - puts("void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); }"); - printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); diff --git a/test/avx10/misc.txt b/test/avx10/misc.txt index 9464d03..7c969bf 100644 --- a/test/avx10/misc.txt +++ b/test/avx10/misc.txt @@ -165,3 +165,8 @@ vpdpwuuds(ym1, ym2, ptr_b[rax+128]); vpdpwuuds(zm1, zm2, zm3); vpdpwuuds(zm1, zm2, ptr[rax+128]); vpdpwuuds(zm1, zm2, ptr_b[rax+128]); + +// +vmovd(xm10, xm20); +vmovd(xm10, ptr[rax+128]); +vmovd(ptr[rax+128], xm30); diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp index ddac779..71b5137 100644 --- a/test/test_by_xed.cpp +++ b/test/test_by_xed.cpp @@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096*8) { - setDefaultEncoding(VexEncoding, EvexEncoding); + setDefaultEncoding(EvexEncoding, AVX10p2Encoding); #include "tmp.cpp" } }; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index ed5c361..a3d1fca 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1673,7 +1673,9 @@ inline const uint8_t* Label::getAddress() const typedef enum { DefaultEncoding, VexEncoding, - EvexEncoding + EvexEncoding, + AVX512Encoding = EvexEncoding, + AVX10p2Encoding } PreferredEncoding; class CodeGenerator : public CodeArray { @@ -2665,7 +2667,7 @@ private: { opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm); } - bool isVexEncoding(PreferredEncoding encoding, int sel) const + PreferredEncoding getEncoding(PreferredEncoding encoding, int sel) const { if (encoding == DefaultEncoding) { encoding = defaultEncoding_[sel]; @@ -2674,12 +2676,11 @@ private: #ifdef XBYAK_DISABLE_AVX512 XBYAK_THROW(ERR_EVEX_IS_INVALID) #endif - return false; } - return true; + return encoding; } uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) { - bool isVex = isVexEncoding(encoding, sel); + bool isVex = getEncoding(encoding, sel) == VexEncoding; return isVex ? typeVex : T_MUST_EVEX | typeEvex; } void opInOut(const Reg& a, const Reg& d, uint8_t code) @@ -3177,7 +3178,7 @@ public: // set default encoding // vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex) - // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (evex) + // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding) void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding) { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; } @@ -3193,15 +3194,32 @@ public: } db(0xC8 + (idx & 7)); } - void opVmovd(const Xmm& x, const Operand& op, bool rev, PreferredEncoding encoding) + void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) { - if (isVexEncoding(encoding, 1)) { - if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) - uint64_t type = T_0F | T_66 | T_W0 | T_EVEX | T_N4; - int code = rev ? 0x7E : 0x6E; - opAVX_X_X_XM(x, xm0, op, type, code); - } else { + const Operand *p1 = &op1; + const Operand *p2 = &op2; + bool rev = false; + if (p1->isMEM()) { + std::swap(p1, p2); + rev = true; } + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + if (p1->isXMM()) { + std::swap(p1, p2); + rev = !rev; + } + if (getEncoding(encoding, 1) == AVX10p2Encoding) { + if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) { + opAVX_X_X_XM(*static_cast(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6); + return; + } + } else { + if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) { + opAVX_X_X_XM(*static_cast(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E); + return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) } /* use single byte nop if useMultiByteNop = false diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index efd207a..cea4e61 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1332,8 +1332,6 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_ void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); } void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); } void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); } -void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); } -void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); } void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); } void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); }