diff --git a/doc/changelog.md b/doc/changelog.md index 5e25c2d..1d39ae6 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2024/Oct/15 ver 7.11 Added full support for AVX10.2 * 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended. * 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw * 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}. diff --git a/doc/usage.md b/doc/usage.md index ef38d63..9015bff 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -111,13 +111,13 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], ## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc. Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2. The functions for these mnemonics include an optional parameter as the last argument to specify the encoding. -The default behavior depends on the order in which the instruction was introduced (whether VEX or EVEX came first), +The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first), and can be specified using setDefaultEncoding. ``` vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI) -vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above -vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI) +vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above +vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI) setDefaultEncoding(VexEncoding); // default encoding is VEX vpdpbusd(xm0, xm1, xm2); // VEX @@ -128,7 +128,7 @@ setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument. vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2) ``` -- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)` +- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)` Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param. param|vnniEnc|avx10Enc @@ -137,7 +137,7 @@ VexEncoding|AVX-VNNI|- EvexEncoding|AVX512-VNNI|- PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16 AVX10v2Encoding|-|AVX10.2 -default|EvexEncoding|VexEncoding +default|EvexEncoding|PreAVXv2Encoding mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw ### Remark diff --git a/readme.txt b/readme.txt index 417c50e..2fb242b 100644 --- a/readme.txt +++ b/readme.txt @@ -14,7 +14,7 @@ xbyak.hをインクルードするだけですぐ利用することができます。 C++の枠組み内で閉じているため、外部アセンブラは不要です。 32bit/64bit両対応です。 - 対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR + 対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/AVX-512/APX/AVX10.2 ・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応 Windows Xp, Windows 7上ではVC2008, VC2010, VC2012 @@ -46,7 +46,7 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。 ----------------------------------------------------------------------------- ◎新機能 -APX/AVX10対応 +APX/AVX10.2対応 例外なしモード追加 XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。 @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2024/10/15 ver 7.11 AVX10.2完全サポート 2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張. 2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正 2024/10/08 ver 7.09 AVX10.2のYMMレジスタの埋め込み丸め対応 diff --git a/test/avx10_test.cpp b/test/avx10_test.cpp index 5f742fe..1ceb52a 100644 --- a/test/avx10_test.cpp +++ b/test/avx10_test.cpp @@ -234,10 +234,10 @@ CYBOZU_TEST_AUTO(vmpsadbw) struct Code : Xbyak::CodeGenerator { Code() { - setDefaultEncoding(); + setDefaultEncodingAVX10(); vmpsadbw(xm1, xm3, xm15, 3); // vex(avx) vmpsadbw(ym1, ym3, ptr[rax+128], 3); // vex(avx2) - setDefaultEncoding(VexEncoding, EvexEncoding); + setDefaultEncodingAVX10(AVX10v2Encoding); vmpsadbw(ym1, ym3, ym15, 3); // evex(avx10.2) vmpsadbw(ym1, ym3, ptr[rax+128], 3); // evex(avx10.2) } diff --git a/test/test_by_xed.cpp b/test/test_by_xed.cpp index af39296..9be9199 100644 --- a/test/test_by_xed.cpp +++ b/test/test_by_xed.cpp @@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096*8) { - setDefaultEncoding(EvexEncoding, AVX10v2Encoding); + setDefaultEncodingAVX10(AVX10v2Encoding); #include "tmp.cpp" } }; diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 5367d83..b56bfb4 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -232,6 +232,7 @@ enum { ERR_CANT_USE_REX2, ERR_INVALID_DFV, ERR_INVALID_REG_IDX, + ERR_BAD_ENCODING_MODE, ERR_INTERNAL // Put it at last. }; @@ -290,6 +291,7 @@ inline const char *ConvertErrorToString(int err) "can't use rex2", "invalid dfv", "invalid reg index", + "bad encoding mode", "internal error" }; assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); @@ -1674,7 +1676,7 @@ typedef enum { DefaultEncoding, VexEncoding, EvexEncoding, - PreAVX10v2Encoding = EvexEncoding, + PreAVX10v2Encoding, AVX10v2Encoding } PreferredEncoding; @@ -2663,25 +2665,24 @@ private: if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) opVex(x, 0, addr, type, code); } - void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0) + void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0) { - opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm); + opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm); } - PreferredEncoding getEncoding(PreferredEncoding encoding, int sel) const + PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const { - if (encoding == DefaultEncoding) { - encoding = defaultEncoding_[sel]; + if (enc == DefaultEncoding) { + enc = defaultEncoding_[sel]; } - if (encoding == EvexEncoding) { + if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding) #ifdef XBYAK_DISABLE_AVX512 - XBYAK_THROW(ERR_EVEX_IS_INVALID) + if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID) #endif - } - return encoding; + return enc; } - uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) { - bool isVex = getEncoding(encoding, sel) == VexEncoding; - return isVex ? typeVex : T_MUST_EVEX | typeEvex; + uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) { + enc = getEncoding(enc, sel); + return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex); } void opInOut(const Reg& a, const Reg& d, uint8_t code) { @@ -3138,8 +3139,8 @@ public: #endif , isDefaultJmpNEAR_(false) { - // select avx512-vnni, vmpsadbw(avx) setDefaultEncoding(); + setDefaultEncodingAVX10(); labelMgr_.set(this); } void reset() @@ -3176,11 +3177,19 @@ public: #undef jnl #endif - // set default encoding - // vnniEnc : AVX512_VNNI (default:EvexEncoding) or AVX-VNNI (VexEncoding) - // avx10Enc : mpsadbw etc., AVX-VNNI-INT8/AVX512-FP16 (default:PreAVX10v2Encoding) or AVX10.2 (AVX10v2Encoding) - void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVX10v2Encoding) - { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; } + // set default encoding of VNNI + // EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI + void setDefaultEncoding(PreferredEncoding enc = EvexEncoding) + { + if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE) + defaultEncoding_[0] = enc; + } + // default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16 + void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding) + { + if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE) + defaultEncoding_[1] = enc; + } void bswap(const Reg32e& r) { @@ -3195,7 +3204,7 @@ public: db(0xC8 + (idx & 7)); } // AVX10 zero-extending for vmovd, vmovw - void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding encoding, int bit) + void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit) { const Operand *p1 = &op1; const Operand *p2 = &op2; @@ -3210,7 +3219,7 @@ public: rev = !rev; } int sel = -1; - if (getEncoding(encoding, 1) == AVX10v2Encoding) { + if (getEncoding(enc, 1) == AVX10v2Encoding) { if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev); } else { if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev); @@ -3218,23 +3227,23 @@ public: if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(*static_cast(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]); } - void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) + void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding) { const uint64_t typeTbl[] = { T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512 T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2 }; const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E }; - opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 32); + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32); } - void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding) + void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding) { const uint64_t typeTbl[] = { T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16 T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2 }; const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E }; - opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, encoding, 16|32|64); + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64); } /* use single byte nop if useMultiByteNop = false