Merge branch 'apx'

This commit is contained in:
MITSUNARI Shigeo 2023-12-01 15:03:37 +09:00
commit 1ec2adbbb8
13 changed files with 4222 additions and 1925 deletions

View file

@ -128,6 +128,24 @@ vpdpbusd(xm0, xm1, xm2); // VEX encoding
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
## APX
[Advanced Performance Extensions (APX) Architecture Specification](https://www.intel.com/content/www/us/en/content-details/786223/intel-advanced-performance-extensions-intel-apx-architecture-specification.html)
- Support 64-bit 16 additional GPRs (general-purpose registers) r16, ..., r31
- 32-bit regs are r16d, ..., r31d
- 16-bit regs are r16w, ..., r31w
- 8-bit regs are r16b, ..., r31b
- `add(r20, r21);`
- `lea(r30, ptr[r29+r31]);`
- Support three-operand instruction
- `add(r20, r21, r23);`
- `add(r20, ptr[rax + rcx * 8 + 0x1234], r23);`
- Support T_nf for NF=1 (status flags update suppression)
- `add(r20|T_nf, r21, r23);` // Set EVEX.NF=1
- Support T_zu for NF=ZU (zero upper) for imul and setcc
- `imul(ax|T_zu, cx, 0x1234);` // Set ND=ZU
- `imul(ax|T_zu|T_nf, cx, 0x1234);` // Set ND=ZU and EVEX.NF=1
- `setb(r31b|T_zu);` // same as set(r31b); movzx(r31, r31b);
## Label
Two kinds of Label are supported. (String literal and Label class).

View file

@ -1,12 +1,17 @@
TARGET=../xbyak/xbyak_mnemonic.h
BIN=sortline gen_code gen_avx512
CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
CFLAGS=-I../ -I ./ -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
avx_type_def.h: ../xbyak/xbyak.h
sed -n '/@@@begin of avx_type_def.h/,/@@@end of avx_type_def.h/p' $< > $@
avx_type.hpp: avx_type_def.h
sortline: sortline.cpp
$(CXX) $(CFLAGS) $< -o $@
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
gen_code: gen_code.cpp avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
gen_avx512: gen_avx512.cpp avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
$(TARGET): $(BIN)
@ -36,4 +41,4 @@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
clean:
$(RM) $(BIN) $(TARGET)
$(RM) $(BIN) $(TARGET) avx_type_def.h

View file

@ -1,190 +1,74 @@
#include <assert.h>
// copy CodeGenerator::AVXtype
enum AVXtype {
// low 3 bit
T_N1 = 1,
T_N2 = 2,
T_N4 = 3,
T_N8 = 4,
T_N16 = 5,
T_N32 = 6,
T_NX_MASK = 7,
//
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
T_DUP = 1 << 4, // N = (8, 32, 64)
T_66 = 1 << 5, // pp = 1
T_F3 = 1 << 6, // pp = 2
T_F2 = T_66 | T_F3, // pp = 3
T_ER_R = 1 << 7, // reg{er}
T_0F = 1 << 8,
T_0F38 = 1 << 9,
T_0F3A = 1 << 10,
T_L0 = 1 << 11,
T_L1 = 1 << 12,
T_W0 = 1 << 13,
T_W1 = 1 << 14,
T_EW0 = 1 << 15,
T_EW1 = 1 << 16,
T_YMM = 1 << 17, // support YMM, ZMM
T_EVEX = 1 << 18,
T_ER_X = 1 << 19, // xmm{er}
T_ER_Y = 1 << 20, // ymm{er}
T_ER_Z = 1 << 21, // zmm{er}
T_SAE_X = 1 << 22, // xmm{sae}
T_SAE_Y = 1 << 23, // ymm{sae}
T_SAE_Z = 1 << 24, // zmm{sae}
T_MUST_EVEX = 1 << 25, // contains T_EVEX
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_B16 = T_B32 | T_B64, // m16bcst
T_M_K = 1 << 28, // mem{k}
T_VSIB = 1 << 29,
T_MEM_EVEX = 1 << 30, // use evex if mem
T_FP16 = 1 << 31,
T_MAP5 = T_FP16 | T_0F,
T_MAP6 = T_FP16 | T_0F38,
T_XXX
};
// T_66 = 1, T_F3 = 2, T_F2 = 3
uint32_t getPP(int type) { return (type >> 5) & 3; }
#include "avx_type_def.h"
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
std::string type2String(int type)
std::string type2String(uint64_t type)
{
if (type == 0) return "T_NONE";
std::string str;
int low = type & T_NX_MASK;
if (0 < low) {
if (0 < low && low < 7) {
const char *tbl[8] = {
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
};
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
str = tbl[low - 1];
}
if (type & T_N_VL) {
if (!str.empty()) str += " | ";
str += "T_N_VL";
}
if (type & T_DUP) {
if (!str.empty()) str += " | ";
str += "T_DUP";
}
if (type & T_F2) {
if (!str.empty()) str += " | ";
switch (type & T_F2) {
case T_66: str += "T_66"; break;
case T_F3: str += "T_F3"; break;
case T_F2: str += "T_F2"; break;
default: break;
}
}
if (type & T_N_VL) str += "|T_N_VL";
if (type & T_APX) str += "|T_APX";
if ((type & T_NX_MASK) == T_DUP) str += "|T_DUP";
if (type & T_66) str += "|T_66";
if (type & T_F3) str += "|T_F3";
if (type & T_F2) str += "|T_F2";
if (type & T_0F) {
if (!str.empty()) str += " | ";
if (type & T_FP16) {
str += "T_MAP5";
str += "|T_MAP5";
} else {
str += "T_0F";
str += "|T_0F";
}
}
if (type & T_0F38) {
if (!str.empty()) str += " | ";
if (type & T_FP16) {
str += "T_MAP6";
str += "|T_MAP6";
} else {
str += "T_0F38";
str += "|T_0F38";
}
}
if (type & T_0F3A) {
if (!str.empty()) str += " | ";
str += "T_0F3A";
}
if (type & T_L0) {
if (!str.empty()) str += " | ";
str += "VEZ_L0";
}
if (type & T_L1) {
if (!str.empty()) str += " | ";
str += "VEZ_L1";
}
if (type & T_W0) {
if (!str.empty()) str += " | ";
str += "T_W0";
}
if (type & T_W1) {
if (!str.empty()) str += " | ";
str += "T_W1";
}
if (type & T_EW0) {
if (!str.empty()) str += " | ";
str += "T_EW0";
}
if (type & T_EW1) {
if (!str.empty()) str += " | ";
str += "T_EW1";
}
if (type & T_YMM) {
if (!str.empty()) str += " | ";
str += "T_YMM";
}
if (type & T_EVEX) {
if (!str.empty()) str += " | ";
str += "T_EVEX";
}
if (type & T_ER_X) {
if (!str.empty()) str += " | ";
str += "T_ER_X";
}
if (type & T_ER_Y) {
if (!str.empty()) str += " | ";
str += "T_ER_Y";
}
if (type & T_ER_Z) {
if (!str.empty()) str += " | ";
str += "T_ER_Z";
}
if (type & T_ER_R) {
if (!str.empty()) str += " | ";
str += "T_ER_R";
}
if (type & T_SAE_X) {
if (!str.empty()) str += " | ";
str += "T_SAE_X";
}
if (type & T_SAE_Y) {
if (!str.empty()) str += " | ";
str += "T_SAE_Y";
}
if (type & T_SAE_Z) {
if (!str.empty()) str += " | ";
str += "T_SAE_Z";
}
if (type & T_MUST_EVEX) {
if (!str.empty()) str += " | ";
str += "T_MUST_EVEX";
}
if (type & T_B32) {
if (!str.empty()) str += " | ";
if (type & T_B64) {
str += "T_B16"; // T_B16 = T_B32 | T_B64
} else {
str += "T_B32";
}
} else if (type & T_B64) {
if (!str.empty()) str += " | ";
str += "T_B64";
}
if (type & T_M_K) {
if (!str.empty()) str += " | ";
str += "T_M_K";
}
if (type & T_VSIB) {
if (!str.empty()) str += " | ";
str += "T_VSIB";
}
if (type & T_MEM_EVEX) {
if (!str.empty()) str += " | ";
str += "T_MEM_EVEX";
if (type & T_0F3A) str += "|T_0F3A";
if (type & T_L0) str += "|T_L0";
if (type & T_L1) str += "|T_L1";
if (type & T_W0) str += "|T_W0";
if (type & T_W1) str += "|T_W1";
if (type & T_EW0) str += "|T_EW0";
if (type & T_EW1) str += "|T_EW1";
if (type & T_YMM) str += "|T_YMM";
if (type & T_EVEX) str += "|T_EVEX";
if (type & T_ER_X) str += "|T_ER_X";
if (type & T_ER_Y) str += "|T_ER_Y";
if (type & T_ER_Z) str += "|T_ER_Z";
if (type & T_ER_R) str += "|T_ER_R";
if (type & T_SAE_X) str += "|T_SAE_X";
if (type & T_SAE_Y) str += "|T_SAE_Y";
if (type & T_SAE_Z) str += "|T_SAE_Z";
if (type & T_MUST_EVEX) str += "|T_MUST_EVEX";
switch (type & T_B16) { // T_B16 = T_B32 | T_B64
case T_B16: str += "|T_B16"; break;
case T_B32: str += "|T_B32"; break;
case T_B64: str += "|T_B64"; break;
default: break;
}
if (type & T_M_K) str += "|T_M_K";
if (type & T_VSIB) str += "|T_VSIB";
if (type & T_MEM_EVEX) str += "|T_MEM_EVEX";
if (type & T_NF) str += "|T_NF";
if (type & T_CODE1_IF1) str += "|T_CODE1_IF1";
if (type & T_MAP3) str += "|T_MAP3";
if (type & T_ND1) str += "|T_ND1";
if (type & T_ZU) str += "|T_ZU";
if (type & T_MAP1) str += "|T_MAP1";
if (str[0] == '|') str = str.substr(1);
return str;
}

55
gen/avx_type_def.h Normal file
View file

@ -0,0 +1,55 @@
// @@@begin of avx_type_def.h
static const uint64_t T_NONE = 0ull;
// low 3 bit
static const uint64_t T_N1 = 1ull;
static const uint64_t T_N2 = 2ull;
static const uint64_t T_N4 = 3ull;
static const uint64_t T_N8 = 4ull;
static const uint64_t T_N16 = 5ull;
static const uint64_t T_N32 = 6ull;
static const uint64_t T_NX_MASK = 7ull;
static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64)
static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL
static const uint64_t T_APX = 1ull << 4;
static const uint64_t T_66 = 1ull << 5; // pp = 1
static const uint64_t T_F3 = 1ull << 6; // pp = 2
static const uint64_t T_ER_R = 1ull << 7; // reg{er}
static const uint64_t T_0F = 1ull << 8;
static const uint64_t T_0F38 = 1ull << 9;
static const uint64_t T_0F3A = 1ull << 10;
static const uint64_t T_L0 = 1ull << 11;
static const uint64_t T_L1 = 1ull << 12;
static const uint64_t T_W0 = 1ull << 13;
static const uint64_t T_W1 = 1ull << 14;
static const uint64_t T_EW0 = 1ull << 15;
static const uint64_t T_EW1 = 1ull << 16;
static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
static const uint64_t T_EVEX = 1ull << 18;
static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
static const uint64_t T_ER_Y = 1ull << 20; // ymm{er}
static const uint64_t T_ER_Z = 1ull << 21; // zmm{er}
static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae}
static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae}
static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae}
static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX
static const uint64_t T_B32 = 1ull << 26; // m32bcst
static const uint64_t T_B64 = 1ull << 27; // m64bcst
static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful)
static const uint64_t T_M_K = 1ull << 28; // mem{k}
static const uint64_t T_VSIB = 1ull << 29;
static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
static const uint64_t T_MAP5 = T_FP16 | T_0F;
static const uint64_t T_MAP6 = T_FP16 | T_0F38;
static const uint64_t T_NF = 1ull << 32; // T_nf
static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
static const uint64_t T_MAP3 = 1ull << 34; // rorx only
static const uint64_t T_ND1 = 1ull << 35; // ND=1
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
static const uint64_t T_F2 = 1ull << 37; // pp = 3
static const uint64_t T_MAP1 = 1ull << 38; // kmov
// T_66 = 1, T_F3 = 2, T_F2 = 3
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
static inline uint32_t getMMM(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
// @@@end of avx_type_def.h

View file

@ -15,8 +15,7 @@ using namespace Xbyak;
void putOpmask(bool only64bit)
{
if (only64bit) {
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }");
return;
}
@ -76,22 +75,14 @@ void putOpmask(bool only64bit)
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
}
}
puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
for (int i = 0; i < 4; i++) {
const char tbl[] = "bwdq";
const int bitTbl[] = { 8, 16, 32, 64 };
int bit = bitTbl[i];
printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit);
printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit);
if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit);
}
}
// vcmppd(k, x, op)
@ -100,7 +91,7 @@ void putVcmp()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
@ -142,9 +133,9 @@ void putVcmp()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
std::string s = type2String(p->type);
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
@ -173,7 +164,7 @@ void putX_XM()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
} tbl[] = {
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
@ -210,8 +201,8 @@ void putX_XM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
std::string s = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
@ -229,7 +220,7 @@ void putM_X()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
} tbl[] = {
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
@ -242,8 +233,8 @@ void putM_X()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
std::string s = type2String(p->type);
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
@ -252,7 +243,7 @@ void putXM_X()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
} tbl[] = {
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
@ -265,8 +256,8 @@ void putXM_X()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
std::string s = type2String(p->type);
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
}
}
@ -275,7 +266,7 @@ void putX_X_XM_IMM()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
@ -413,9 +404,9 @@ void putX_X_XM_IMM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
std::string s = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
@ -425,7 +416,7 @@ void putShift()
const char *name;
uint8_t code;
int idx;
int type;
uint64_t type;
} tbl[] = {
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
@ -435,8 +426,8 @@ void putShift()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
std::string s = type2String(p.type);
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code);
}
}
@ -446,7 +437,7 @@ void putExtractInsert()
const struct Tbl {
const char *name;
uint8_t code;
int type;
uint64_t type;
bool isZMM;
} tbl[] = {
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
@ -461,16 +452,16 @@ void putExtractInsert()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string s = type2String(p.type);
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, s.c_str(), p.code);
}
}
{
const struct Tbl {
const char *name;
uint8_t code;
int type;
uint64_t type;
bool isZMM;
} tbl[] = {
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
@ -485,12 +476,12 @@ void putExtractInsert()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string s = type2String(p.type);
const char *x = p.isZMM ? "Zmm" : "Ymm";
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, s.c_str(), p.code);
}
}
}
@ -501,7 +492,7 @@ void putBroadcast(bool only64bit)
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
int reg;
} tbl[] = {
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
@ -511,9 +502,9 @@ void putBroadcast(bool only64bit)
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string s = type2String(p.type);
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, s.c_str(), p.code);
}
}
}
@ -536,7 +527,7 @@ void putCvt()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
int ptn;
} tbl[] = {
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
@ -583,28 +574,28 @@ void putCvt()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
std::string s = type2String(p.type);
switch (p.ptn) {
case 0:
printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 1:
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 2:
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 3:
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 4:
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 5:
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
case 6:
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
break;
}
}
@ -621,7 +612,7 @@ void putGather()
{
const struct Tbl {
const char *name;
int type;
uint64_t type;
uint8_t code;
int mode;
} tbl[] = {
@ -636,15 +627,15 @@ void putGather()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
std::string s = type2String(p.type | T_VSIB);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
}
}
void putScatter()
{
const struct Tbl {
const char *name;
int type;
uint64_t type;
uint8_t code;
int mode; // reverse of gather
} tbl[] = {
@ -660,8 +651,8 @@ void putScatter()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_VSIB);
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
std::string s = type2String(p.type | T_VSIB);
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
}
}
@ -689,7 +680,7 @@ void putMov()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
int mode;
} tbl[] = {
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
@ -718,8 +709,8 @@ void putMov()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
std::string s = type2String(p.type);
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, s.c_str(), p.code, p.mode ? "true" : "false");
}
}
}
@ -729,7 +720,7 @@ void putX_XM_IMM()
const struct Tbl {
uint8_t code;
const char *name;
int type;
uint64_t type;
bool hasIMM;
} tbl[] = {
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
@ -770,9 +761,9 @@ void putX_XM_IMM()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
std::string s = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
@ -784,7 +775,7 @@ void putMisc()
const struct Tbl {
const char *name;
int zm;
int type;
uint64_t type;
uint8_t code;
bool isZmm;
} tbl[] = {
@ -810,9 +801,9 @@ void putMisc()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
std::string s = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
, p.name, p.zm, s.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
}
}
@ -887,18 +878,18 @@ void putFP16_FMA()
{ "213", 0xA0 },
{ "231", 0xB0 },
};
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
uint64_t type = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
const char *suf = 0;
if (tbl[i].isPH) {
t |= T_ER_Z | T_YMM | T_B16;
type |= T_ER_Z | T_YMM | T_B16;
suf = "ph";
} else {
t |= T_ER_X | T_N2;
type |= T_ER_X | T_N2;
suf = "sh";
}
std::string type = type2String(t);
std::string s = type2String(type);
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
, tbl[i].name, ord[k].str, suf, s.c_str(), tbl[i].code | ord[k].code);
}
}
}
@ -914,23 +905,23 @@ void putFP16_FMA2()
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
for (int j = 0; j < 2; j++) {
int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
uint64_t type = T_MAP6 | T_EW0 | T_MUST_EVEX;
if (j == 0) {
t |= T_F2;
type |= T_F2;
} else {
t |= T_F3;
type |= T_F3;
}
const char *suf = 0;
if (tbl[i].isPH) {
t |= T_ER_Z | T_YMM | T_B32;
type |= T_ER_Z | T_YMM | T_B32;
suf = "ph";
} else {
t |= T_ER_X | T_N2;
type |= T_ER_X | T_N2;
suf = "sh";
}
std::string type = type2String(t);
std::string s = type2String(type);
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
, j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
, j == 0 ? "c" : "", tbl[i].name, suf, s.c_str(), tbl[i].code);
}
}
}
@ -938,16 +929,16 @@ void putFP16_FMA2()
void putFP16_2()
{
{
int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
std::string type = type2String(t);
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
uint64_t type = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
std::string s = type2String(type);
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
}
{
int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
std::string type = type2String(t);
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
std::string s = type2String(type);
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
XBYAK_INC=../xbyak/xbyak.h
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32 apx
XBYAK_INC=../xbyak/xbyak.h ../xbyak/xbyak_mnemonic.h
UNAME_S=$(shell uname -s)
ifeq ($(shell ./detect_x32),x32)
X32?=1
@ -31,30 +31,32 @@ CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
normalize_prefix: normalize_prefix.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
test_mmx: test_mmx.cpp ../xbyak/xbyak.h
test_mmx: test_mmx.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
jmp: jmp.cpp ../xbyak/xbyak.h
jmp: jmp.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
jmp64: jmp.cpp ../xbyak/xbyak.h
jmp64: jmp.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
address: address.cpp ../xbyak/xbyak.h
address: address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
address64: address.cpp ../xbyak/xbyak.h
address64: address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
bad_address: bad_address.cpp ../xbyak/xbyak.h
bad_address: bad_address.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) bad_address.cpp -o $@
misc: misc.cpp ../xbyak/xbyak.h
misc: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) misc.cpp -o $@
misc32: misc.cpp ../xbyak/xbyak.h
misc32: misc.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) misc.cpp -o $@ -DXBYAK32
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
cvt_test: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
cvt_test32: cvt_test.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
noexception: noexception.cpp ../xbyak/xbyak.h
noexception: noexception.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
apx: apx.cpp $(XBYAK_INC)
$(CXX) $(CFLAGS) apx.cpp -o $@
test_nm: normalize_prefix $(TARGET)
$(MAKE) -C ../gen
@ -71,6 +73,7 @@ endif
./misc
./misc32
./cvt_test
./apx
ifeq ($(BIT),64)
CXX=$(CXX) ./test_address.sh 64
ifneq ($(X32),1)

1777
test/apx.cpp Normal file

File diff suppressed because it is too large Load diff

View file

@ -109,8 +109,8 @@ CYBOZU_TEST_AUTO(changeBit)
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
{ &r16b, &r16w, &r16d, &r16, &xmm16, &ymm16, &zmm16 },
{ &r31b, &r31w, &r31d, &r31, &xmm31, &ymm31, &zmm31 },
};
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
#else

View file

@ -5,4 +5,10 @@ call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
echo *** test misc ***
set FILE=misc
call test_misc
echo *** test APX ***
set FILE=apx
call test_misc
echo *** all test end ***

View file

@ -1,4 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
misc
cl -I../ -I./ -DXBYAK_TEST %FILE%.cpp %OPT% /Od /Zi
%FILE%

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff