From 21e80948eec08c3ccd0eef8654441cba55ae75f7 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 14 Oct 2024 12:24:47 +0900 Subject: [PATCH] tweak vmovd --- gen/gen_code.cpp | 34 ++-------------------------------- xbyak/xbyak.h | 21 ++++++++++++++++++--- xbyak/xbyak_mnemonic.h | 4 ++-- 3 files changed, 22 insertions(+), 37 deletions(-) diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index e72df50..df4e5b9 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1734,8 +1734,8 @@ void put() } // mov { - printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); - printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); + puts("void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); }"); + puts("void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); }"); printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); @@ -1900,36 +1900,6 @@ void put() printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, s.c_str(), p->code); } } - // avx-vnni-int8 - // avx-vnni-int16 -#if 0 - { - const struct Tbl { - uint8_t code; - const char *name; - uint64_t type; - } tbl[] = { -// { 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM }, -// { 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM }, -// { 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM }, -// { 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM }, -// { 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM }, -// { 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM }, - -// { 0xD2, "vpdpwsud", T_F3 | T_0F38 | T_W0 | T_YMM }, -// { 0xD3, "vpdpwsuds", T_F3 | T_0F38 | T_W0 | T_YMM }, -// { 0xD2, "vpdpwusd", T_66 | T_0F38 | T_W0 | T_YMM }, -// { 0xD3, "vpdpwusds", T_66 | T_0F38 | T_W0 | T_YMM }, -// { 0xD2, "vpdpwuud", T_0F38 | T_W0 | T_YMM }, -// { 0xD3, "vpdpwuuds", T_0F38 | T_W0 | T_YMM }, - }; - for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - const Tbl *p = &tbl[i]; - std::string s = type2String(p->type); - printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code); - } - } -#endif } void put32() diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 17a9597..ed5c361 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -2665,7 +2665,8 @@ private: { opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm); } - uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) { + bool isVexEncoding(PreferredEncoding encoding, int sel) const + { if (encoding == DefaultEncoding) { encoding = defaultEncoding_[sel]; } @@ -2673,9 +2674,13 @@ private: #ifdef XBYAK_DISABLE_AVX512 XBYAK_THROW(ERR_EVEX_IS_INVALID) #endif - return T_MUST_EVEX | typeEvex; + return false; } - return typeVex; + return true; + } + uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) { + bool isVex = isVexEncoding(encoding, sel); + return isVex ? typeVex : T_MUST_EVEX | typeEvex; } void opInOut(const Reg& a, const Reg& d, uint8_t code) { @@ -3188,6 +3193,16 @@ public: } db(0xC8 + (idx & 7)); } + void opVmovd(const Xmm& x, const Operand& op, bool rev, PreferredEncoding encoding) + { + if (isVexEncoding(encoding, 1)) { + if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + uint64_t type = T_0F | T_66 | T_W0 | T_EVEX | T_N4; + int code = rev ? 0x7E : 0x6E; + opAVX_X_X_XM(x, xm0, op, type, code); + } else { + } + } /* use single byte nop if useMultiByteNop = false */ diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 07ef43e..efd207a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1332,8 +1332,8 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_ void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); } void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); } void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); } -void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); } -void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); } +void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); } +void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); } void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); } void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); }