From eb18bc1be5d7df29649cc490f664a3a1ad88cefe Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 23 Mar 2011 21:18:43 +0900 Subject: [PATCH] support alias of vcmppd --- gen/gen_code.cpp | 20 +++ readme.md | 7 +- readme.txt | 3 +- test/a.bat | 2 +- test/make_nm.cpp | 25 ++++ xbyak/xbyak.h | 6 +- xbyak/xbyak_mnemonic.h | 292 ++++++++++++++++++++++++++++++++++++++++- 7 files changed, 345 insertions(+), 10 deletions(-) diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 0372742..eec2e21 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -1241,6 +1241,26 @@ void put() printf("void vmaskmov%s(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x%02X, true, 0); }\n", suf[i], 0x2E + i); } } + // vcmpeqps + { + const char pred[32][16] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", + "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", + "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", + "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" + }; + const char suf[][4] = { "pd", "ps", "sd", "ss" }; + for (int i = 0; i < 4; i++) { + const char *s = suf[i]; + for (int j = 0; j < 32; j++) { + if (j < 8) { + printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j); + } + printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j); + printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s(x, op, %d); }\n", pred[j], s, s, j); + } + } + } // vmov(h|l)(pd|ps) { const struct Tbl { diff --git a/readme.md b/readme.md index b4f0653..fb12772 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ -Xbyak 2.994 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +Xbyak 3.00 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ============= Abstract @@ -202,6 +202,7 @@ http://www.opensource.org/licenses/bsd-license.php History ------------- +* 2011/May/23 ver 3.00 add vcmpeqps and so on * 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) * 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe * 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm @@ -246,5 +247,5 @@ Author MITSUNARI Shigeo(herumi at nifty dot com) --- -$Revision: 1.7 $ -$Date: 2011/02/16 08:06:12 $ +$Revision: 1.9 $ +$Date: 2011/03/23 04:50:42 $ diff --git a/readme.txt b/readme.txt index 061c201..3c265b7 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 2.994 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak version 3.00 ----------------------------------------------------------------------------- ◎概要 @@ -214,6 +214,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2011/03/23 ver 3.00 vcmpeqpsなどを追加 2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) 2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe 2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm diff --git a/test/a.bat b/test/a.bat index 34985e3..311bc86 100644 --- a/test/a.bat +++ b/test/a.bat @@ -1,6 +1,6 @@ @echo off echo 32bit -rm a.lst +rm -rf a.lst echo nasm nasm -l a.lst -f win32 -DWIN32 test.asm cat a.lst diff --git a/test/make_nm.cpp b/test/make_nm.cpp index c87310c..1762863 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1754,6 +1754,30 @@ class Test { put("vpextrw", REG32e | MEM, XMM, IMM); // nasm iw wrong? #endif } + void putCmp() + { + const char pred[32][16] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", + "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", + "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", + "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" + }; + const char suf[][4] = { "pd", "ps", "sd", "ss" }; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 32; j++) { + if (j < 8) { + put((std::string("cmp") + pred[j] + suf[i]).c_str(), XMM, XMM | MEM); + } + std::string str = std::string("vcmp") + pred[j] + suf[i]; + const char *p = str.c_str(); + put(p, XMM, XMM | MEM); + put(p, XMM, XMM, XMM | MEM); + if (i >= 2) continue; + put(p, YMM, YMM | MEM); + put(p, YMM, YMM, YMM | MEM); + } + } + } public: Test(bool isXbyak) : isXbyak_(isXbyak) @@ -1843,6 +1867,7 @@ public: put("clflush", MEM); // current nasm is ok putFpu(); putFpuFpu(); + putCmp(); #else putSSSE3(); putSSE4_1(); diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 6c93d58..829f0bd 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -5,9 +5,9 @@ @file xbyak.h @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ @author herumi - @version $Revision: 1.247 $ + @version $Revision: 1.248 $ @url http://homepage1.nifty.com/herumi/soft/xbyak.html - @date $Date: 2011/02/16 08:06:12 $ + @date $Date: 2011/03/23 04:47:06 $ @note modified new BSD license http://www.opensource.org/licenses/bsd-license.php */ @@ -51,7 +51,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x2994, /* 0xABCD = A.BC(D) */ + VERSION = 0x3000, /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 88e9f20..6fea602 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "2.994"; } +const char *getVersionString() const { return "3.00"; } void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } @@ -887,6 +887,294 @@ void vmaskmovps(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X void vmaskmovps(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2E, true, 0); } void vmaskmovpd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2D, true, 0); } void vmaskmovpd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2F, true, 0); } +void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } +void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); } +void vcmpeqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 0); } +void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); } +void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); } +void vcmpltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 1); } +void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); } +void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); } +void vcmplepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 2); } +void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } +void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); } +void vcmpunordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 3); } +void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); } +void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); } +void vcmpneqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 4); } +void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); } +void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); } +void vcmpnltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 5); } +void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); } +void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); } +void vcmpnlepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 6); } +void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } +void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); } +void vcmpordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 7); } +void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); } +void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 8); } +void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); } +void vcmpngepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 9); } +void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); } +void vcmpngtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 10); } +void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); } +void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 11); } +void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); } +void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 12); } +void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); } +void vcmpgepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 13); } +void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); } +void vcmpgtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 14); } +void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); } +void vcmptruepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 15); } +void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } +void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 16); } +void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); } +void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 17); } +void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); } +void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 18); } +void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); } +void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 19); } +void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); } +void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 20); } +void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); } +void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 21); } +void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); } +void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 22); } +void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); } +void vcmpord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 23); } +void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); } +void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 24); } +void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); } +void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 25); } +void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); } +void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 26); } +void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); } +void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 27); } +void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); } +void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 28); } +void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); } +void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 29); } +void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); } +void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 30); } +void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } +void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 31); } +void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } +void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); } +void vcmpeqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 0); } +void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); } +void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); } +void vcmpltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 1); } +void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); } +void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); } +void vcmpleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 2); } +void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } +void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); } +void vcmpunordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 3); } +void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); } +void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); } +void vcmpneqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 4); } +void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); } +void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); } +void vcmpnltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 5); } +void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); } +void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); } +void vcmpnleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 6); } +void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } +void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } +void vcmpordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 7); } +void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); } +void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 8); } +void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); } +void vcmpngeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 9); } +void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); } +void vcmpngtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 10); } +void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); } +void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpps(x, op, 11); } +void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); } +void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 12); } +void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); } +void vcmpgeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 13); } +void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); } +void vcmpgtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 14); } +void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); } +void vcmptrueps(const Xmm& x, const Operand& op) { vcmpps(x, op, 15); } +void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } +void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 16); } +void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); } +void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 17); } +void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); } +void vcmple_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 18); } +void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); } +void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 19); } +void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); } +void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 20); } +void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); } +void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 21); } +void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); } +void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 22); } +void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); } +void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 23); } +void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); } +void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 24); } +void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); } +void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 25); } +void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); } +void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 26); } +void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); } +void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 27); } +void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); } +void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 28); } +void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); } +void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 29); } +void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); } +void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 30); } +void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } +void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 31); } +void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); } +void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); } +void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 0); } +void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); } +void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); } +void vcmpltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 1); } +void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); } +void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); } +void vcmplesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 2); } +void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } +void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); } +void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 3); } +void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); } +void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); } +void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 4); } +void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); } +void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); } +void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 5); } +void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); } +void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); } +void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 6); } +void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } +void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } +void vcmpordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 7); } +void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); } +void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 8); } +void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); } +void vcmpngesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 9); } +void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); } +void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 10); } +void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); } +void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 11); } +void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); } +void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 12); } +void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); } +void vcmpgesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 13); } +void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); } +void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 14); } +void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); } +void vcmptruesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 15); } +void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } +void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 16); } +void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); } +void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 17); } +void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); } +void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 18); } +void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); } +void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 19); } +void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); } +void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 20); } +void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); } +void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 21); } +void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); } +void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 22); } +void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); } +void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 23); } +void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); } +void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 24); } +void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); } +void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 25); } +void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); } +void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 26); } +void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); } +void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 27); } +void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); } +void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 28); } +void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); } +void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 29); } +void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); } +void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 30); } +void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } +void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 31); } +void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); } +void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); } +void vcmpeqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 0); } +void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); } +void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); } +void vcmpltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 1); } +void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); } +void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); } +void vcmpless(const Xmm& x, const Operand& op) { vcmpss(x, op, 2); } +void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); } +void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); } +void vcmpunordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 3); } +void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); } +void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); } +void vcmpneqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 4); } +void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); } +void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); } +void vcmpnltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 5); } +void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); } +void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); } +void vcmpnless(const Xmm& x, const Operand& op) { vcmpss(x, op, 6); } +void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } +void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } +void vcmpordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 7); } +void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); } +void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 8); } +void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); } +void vcmpngess(const Xmm& x, const Operand& op) { vcmpss(x, op, 9); } +void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); } +void vcmpngtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 10); } +void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); } +void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpss(x, op, 11); } +void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); } +void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 12); } +void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); } +void vcmpgess(const Xmm& x, const Operand& op) { vcmpss(x, op, 13); } +void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); } +void vcmpgtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 14); } +void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); } +void vcmptruess(const Xmm& x, const Operand& op) { vcmpss(x, op, 15); } +void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); } +void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 16); } +void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); } +void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 17); } +void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); } +void vcmple_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 18); } +void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); } +void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 19); } +void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); } +void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 20); } +void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); } +void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 21); } +void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); } +void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 22); } +void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); } +void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 23); } +void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); } +void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 24); } +void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); } +void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 25); } +void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); } +void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 26); } +void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); } +void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 27); } +void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); } +void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 28); } +void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); } +void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 29); } +void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); } +void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 30); } +void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); } +void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 31); } void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); } void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); } void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); } @@ -977,7 +1265,7 @@ void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (! void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } -void vpmovmskb(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); } void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm7, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); } void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm7, x, x, MM_0F | PP_66, 0x73, false); db(imm); } void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm3, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); }