mirror of
https://github.com/herumi/xbyak
synced 2024-11-21 16:09:11 -07:00
support vgather*
This commit is contained in:
parent
cdd1f24f48
commit
97743ee529
4 changed files with 136 additions and 26 deletions
|
@ -355,6 +355,35 @@ void putCvt()
|
|||
puts("#endif");
|
||||
}
|
||||
|
||||
void putGather()
|
||||
{
|
||||
enum { // same as xbyak.h
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2
|
||||
};
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8 code;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
|
||||
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
|
||||
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
|
||||
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
|
||||
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
|
||||
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
|
||||
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
|
||||
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
puts("#ifndef XBYAK_DISABLE_AVX512");
|
||||
|
@ -369,5 +398,6 @@ int main()
|
|||
putBroadcast();
|
||||
#endif
|
||||
putCvt();
|
||||
putGather();
|
||||
puts("#endif");
|
||||
}
|
||||
|
|
|
@ -18,16 +18,16 @@ const uint64 IMM32 = 1ULL << 5;
|
|||
const uint64 IMM8 = 1ULL << 6;
|
||||
const uint64 _REG8 = 1ULL << 7;
|
||||
const uint64 _REG16 = 1ULL << 8;
|
||||
const uint64 NEG8 = 1ULL << 9;
|
||||
const uint64 IMM16 = 1ULL << 10;
|
||||
const uint64 NEG16 = 1ULL << 11;
|
||||
const uint64 XMM_K = 1ULL << 9;
|
||||
const uint64 YMM_K = 1ULL << 10;
|
||||
const uint64 ZMM_K = 1ULL << 11;
|
||||
const uint64 AX = 1ULL << 12;
|
||||
const uint64 AL = 1ULL << 13;
|
||||
const uint64 IMM_1 = 1ULL << 14;
|
||||
const uint64 MEM8 = 1ULL << 15;
|
||||
const uint64 MEM16 = 1ULL << 16;
|
||||
const uint64 MEM32 = 1ULL << 17;
|
||||
const uint64 ONE = 1ULL << 19;
|
||||
const uint64 VM32Z = 1ULL << 19;
|
||||
const uint64 CL = 1ULL << 20;
|
||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
const uint64 NEG32 = 1ULL << 23;
|
||||
|
@ -337,32 +337,24 @@ class Test {
|
|||
return "al";
|
||||
case CL:
|
||||
return "cl";
|
||||
case ONE:
|
||||
return "1";
|
||||
case IMM32:
|
||||
return isXbyak_ ? "12345678" : "dword 12345678";
|
||||
case IMM16:
|
||||
return isXbyak_ ? "1000" : "word 1000";
|
||||
case IMM8:
|
||||
return isXbyak_ ? "4" : "byte 4";
|
||||
case NEG8:
|
||||
return isXbyak_ ? "-30" : "byte -30";
|
||||
case NEG16:
|
||||
return isXbyak_ ? "-1000" : "word -1000";
|
||||
case NEG32:
|
||||
return isXbyak_ ? "-100000" : "dword -100000";
|
||||
case IMM_1:
|
||||
return "4";
|
||||
case IMM_2:
|
||||
return isXbyak_ ? "0xda" : "0xda";
|
||||
case VM32X_32:
|
||||
return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
|
||||
return isXbyak_ ? "ptr [ebp+64+xmm1*8]" : "[ebp+64+xmm1*8]";
|
||||
case VM32X_64:
|
||||
return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
|
||||
return isXbyak_ ? "ptr [rax+64+xmm13*2]" : "[rax+64+xmm13*2]";
|
||||
case VM32Y_32:
|
||||
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
|
||||
case VM32Y_64:
|
||||
return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
|
||||
return isXbyak_ ? "ptr [64+ymm13*2+r13]" : "[64+ymm13*2+r13]";
|
||||
case VM32Z:
|
||||
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
|
||||
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
|
||||
case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}";
|
||||
case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}";
|
||||
|
@ -417,6 +409,12 @@ class Test {
|
|||
case MEM_K:
|
||||
return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
|
||||
#endif
|
||||
case XMM_K:
|
||||
return isXbyak_ ? "xmm5 | k7" : "xmm5{k7}";
|
||||
case YMM_K:
|
||||
return isXbyak_ ? "ymm5 | k4" : "ymm5{k4}";
|
||||
case ZMM_K:
|
||||
return isXbyak_ ? "zmm5 | k3" : "zmm5{k3}";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1546,12 +1544,55 @@ public:
|
|||
|
||||
put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||
#endif
|
||||
}
|
||||
void putGather()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
enum {
|
||||
xx_yy_zz,
|
||||
xx_yx_zy,
|
||||
xx_xy_yz
|
||||
};
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vpgatherdd", xx_yy_zz },
|
||||
{ "vpgatherdq", xx_yx_zy },
|
||||
{ "vpgatherqd", xx_xy_yz },
|
||||
{ "vpgatherqq", xx_yy_zz },
|
||||
{ "vgatherdps", xx_yy_zz },
|
||||
{ "vgatherdpd", xx_yx_zy },
|
||||
{ "vgatherqps", xx_xy_yz },
|
||||
{ "vgatherqpd", xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
switch (p.mode) {
|
||||
case xx_yy_zz:
|
||||
put(p.name, XMM_K, VM32X);
|
||||
put(p.name, YMM_K, VM32Y);
|
||||
put(p.name, ZMM_K, VM32Z);
|
||||
break;
|
||||
case xx_yx_zy:
|
||||
put(p.name, XMM_K, VM32X);
|
||||
put(p.name, YMM_K, VM32X);
|
||||
put(p.name, ZMM_K, VM32Y);
|
||||
break;
|
||||
case xx_xy_yz:
|
||||
put(p.name, XMM_K, VM32X);
|
||||
put(p.name, XMM_K, VM32Y);
|
||||
put(p.name, YMM_K, VM32Z);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void putMin()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
// put512_cvt();
|
||||
putGather();
|
||||
#endif
|
||||
}
|
||||
void putAVX512()
|
||||
|
@ -1588,6 +1629,8 @@ public:
|
|||
put512_cvt();
|
||||
separateFunc();
|
||||
putMisc1();
|
||||
separateFunc();
|
||||
putGather();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
|
|
@ -173,6 +173,7 @@ enum {
|
|||
ERR_ER_IS_INVALID,
|
||||
ERR_INVALID_BROADCAST,
|
||||
ERR_INVALID_OPMASK_WITH_MEMORY,
|
||||
ERR_INVALID_ZERO,
|
||||
ERR_INTERNAL
|
||||
};
|
||||
|
||||
|
@ -231,6 +232,7 @@ public:
|
|||
"er(embedded rounding) is invalid",
|
||||
"invalid broadcast",
|
||||
"invalid opmask with memory",
|
||||
"invalid zero",
|
||||
"internal error",
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
|
@ -663,7 +665,7 @@ public:
|
|||
: scale_(scale)
|
||||
, disp_(0)
|
||||
{
|
||||
if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
|
||||
if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
|
||||
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE);
|
||||
if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
|
||||
index_ = r;
|
||||
|
@ -671,8 +673,7 @@ public:
|
|||
base_ = r;
|
||||
}
|
||||
}
|
||||
bool isVsib() const { return index_.isBit(128|256); }
|
||||
bool isYMM() const { return index_.isBit(256); }
|
||||
bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
|
||||
void optimize()
|
||||
{
|
||||
// [reg * 2] => [reg + reg]
|
||||
|
@ -1416,7 +1417,7 @@ private:
|
|||
T_RZ_SAE = 4,
|
||||
T_SAE = 5,
|
||||
};
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0)
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, int VL = 0)
|
||||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||
int w = (type & T_EW1) ? 1 : 0;
|
||||
|
@ -1441,7 +1442,8 @@ private:
|
|||
}
|
||||
b = true;
|
||||
} else {
|
||||
int VL = Max(Max(reg.getBit(), base.getBit()), (v ? v->getBit() : 0));
|
||||
if (v) VL = Max(VL, v->getBit());
|
||||
VL = Max(Max(reg.getBit(), base.getBit()), VL);
|
||||
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
||||
if (b) {
|
||||
disp8N = (type & T_B32) ? 4 : 8;
|
||||
|
@ -1793,7 +1795,8 @@ private:
|
|||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||
b = true;
|
||||
}
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa);
|
||||
int VL = addr.getRegExp().isVsib() ? addr.getRegExp().getIndex().getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
||||
} else {
|
||||
vex(r, base, p1, type, code, x);
|
||||
}
|
||||
|
@ -1888,11 +1891,11 @@ private:
|
|||
}
|
||||
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
|
||||
{
|
||||
if (!addr.getRegExp().isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
if (!addr.getRegExp().isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
const int y_vx_y = 0;
|
||||
const int y_vy_y = 1;
|
||||
// const int x_vy_x = 2;
|
||||
const bool isAddrYMM = addr.getRegExp().isYMM();
|
||||
const bool isAddrYMM = addr.getRegExp().getIndex().getBit() == 256;
|
||||
if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
|
||||
bool isOK = false;
|
||||
if (mode == y_vx_y) {
|
||||
|
@ -1907,6 +1910,32 @@ private:
|
|||
addr.permitVsib();
|
||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
|
||||
}
|
||||
enum {
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2
|
||||
};
|
||||
void checkGather2(const Xmm& x, const Address& addr, int mode) const
|
||||
{
|
||||
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||
const RegExp& re = addr.getRegExp();
|
||||
if (x.isXMM() && re.isVsib(128)) return;
|
||||
switch (mode) {
|
||||
case xx_yy_zz: if ((x.isYMM() && re.isVsib(256)) || (x.isZMM() && re.isVsib(512))) return;
|
||||
break;
|
||||
case xx_yx_zy: if ((x.isYMM() && re.isVsib(128)) || (x.isZMM() && re.isVsib(256))) return;
|
||||
break;
|
||||
case xx_xy_yz: if ((x.isXMM() && re.isVsib(256)) || (x.isYMM() && re.isVsib(512))) return;
|
||||
break;
|
||||
}
|
||||
throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
}
|
||||
void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
|
||||
{
|
||||
checkGather2(x, addr, mode);
|
||||
addr.permitVsib();
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
|
|
@ -173,4 +173,12 @@ void vcvtss2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()
|
|||
void vcvttsd2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_F2 | T_0F | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, 0x78); }
|
||||
void vcvttss2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_F3 | T_0F | T_MUST_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x78); }
|
||||
#endif
|
||||
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x90, 0); }
|
||||
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x90, 1); }
|
||||
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x91, 2); }
|
||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x91, 0); }
|
||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x92, 0); }
|
||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x92, 1); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x93, 2); }
|
||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x93, 0); }
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue