support vgather*

This commit is contained in:
MITSUNARI Shigeo 2016-07-20 14:39:54 +09:00
parent cdd1f24f48
commit 97743ee529
4 changed files with 136 additions and 26 deletions

View file

@ -355,6 +355,35 @@ void putCvt()
puts("#endif");
}
void putGather()
{
enum { // same as xbyak.h
xx_yy_zz = 0,
xx_yx_zy = 1,
xx_xy_yz = 2
};
const struct Tbl {
const char *name;
int type;
uint8 code;
int mode;
} tbl[] = {
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
int main()
{
puts("#ifndef XBYAK_DISABLE_AVX512");
@ -369,5 +398,6 @@ int main()
putBroadcast();
#endif
putCvt();
putGather();
puts("#endif");
}

View file

@ -18,16 +18,16 @@ const uint64 IMM32 = 1ULL << 5;
const uint64 IMM8 = 1ULL << 6;
const uint64 _REG8 = 1ULL << 7;
const uint64 _REG16 = 1ULL << 8;
const uint64 NEG8 = 1ULL << 9;
const uint64 IMM16 = 1ULL << 10;
const uint64 NEG16 = 1ULL << 11;
const uint64 XMM_K = 1ULL << 9;
const uint64 YMM_K = 1ULL << 10;
const uint64 ZMM_K = 1ULL << 11;
const uint64 AX = 1ULL << 12;
const uint64 AL = 1ULL << 13;
const uint64 IMM_1 = 1ULL << 14;
const uint64 MEM8 = 1ULL << 15;
const uint64 MEM16 = 1ULL << 16;
const uint64 MEM32 = 1ULL << 17;
const uint64 ONE = 1ULL << 19;
const uint64 VM32Z = 1ULL << 19;
const uint64 CL = 1ULL << 20;
const uint64 MEM_ONLY_DISP = 1ULL << 21;
const uint64 NEG32 = 1ULL << 23;
@ -337,32 +337,24 @@ class Test {
return "al";
case CL:
return "cl";
case ONE:
return "1";
case IMM32:
return isXbyak_ ? "12345678" : "dword 12345678";
case IMM16:
return isXbyak_ ? "1000" : "word 1000";
case IMM8:
return isXbyak_ ? "4" : "byte 4";
case NEG8:
return isXbyak_ ? "-30" : "byte -30";
case NEG16:
return isXbyak_ ? "-1000" : "word -1000";
case NEG32:
return isXbyak_ ? "-100000" : "dword -100000";
case IMM_1:
return "4";
case IMM_2:
return isXbyak_ ? "0xda" : "0xda";
case VM32X_32:
return isXbyak_ ? "ptr [ebp+4+xmm1*8]" : "[ebp+4+xmm1*8]";
return isXbyak_ ? "ptr [ebp+64+xmm1*8]" : "[ebp+64+xmm1*8]";
case VM32X_64:
return isXbyak_ ? "ptr [12345+xmm13*2]" : "[12345+xmm13*2]";
return isXbyak_ ? "ptr [rax+64+xmm13*2]" : "[rax+64+xmm13*2]";
case VM32Y_32:
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
case VM32Y_64:
return isXbyak_ ? "ptr [12345+ymm13*2+r13]" : "[12345+ymm13*2+r13]";
return isXbyak_ ? "ptr [64+ymm13*2+r13]" : "[64+ymm13*2+r13]";
case VM32Z:
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
case M_1to4: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to4}";
case M_1to8: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to8}";
@ -417,6 +409,12 @@ class Test {
case MEM_K:
return isXbyak_ ? "ptr [eax] | k1" : "[eax]{k1}";
#endif
case XMM_K:
return isXbyak_ ? "xmm5 | k7" : "xmm5{k7}";
case YMM_K:
return isXbyak_ ? "ymm5 | k4" : "ymm5{k4}";
case ZMM_K:
return isXbyak_ ? "zmm5 | k3" : "zmm5{k3}";
}
return 0;
}
@ -1546,12 +1544,55 @@ public:
put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
#endif
}
void putGather()
{
#ifdef XBYAK64
enum {
xx_yy_zz,
xx_yx_zy,
xx_xy_yz
};
const struct Tbl {
const char *name;
int mode;
} tbl[] = {
{ "vpgatherdd", xx_yy_zz },
{ "vpgatherdq", xx_yx_zy },
{ "vpgatherqd", xx_xy_yz },
{ "vpgatherqq", xx_yy_zz },
{ "vgatherdps", xx_yy_zz },
{ "vgatherdpd", xx_yx_zy },
{ "vgatherqps", xx_xy_yz },
{ "vgatherqpd", xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
switch (p.mode) {
case xx_yy_zz:
put(p.name, XMM_K, VM32X);
put(p.name, YMM_K, VM32Y);
put(p.name, ZMM_K, VM32Z);
break;
case xx_yx_zy:
put(p.name, XMM_K, VM32X);
put(p.name, YMM_K, VM32X);
put(p.name, ZMM_K, VM32Y);
break;
case xx_xy_yz:
put(p.name, XMM_K, VM32X);
put(p.name, XMM_K, VM32Y);
put(p.name, YMM_K, VM32Z);
break;
}
}
#endif
}
void putMin()
{
#ifdef XBYAK64
// put512_cvt();
putGather();
#endif
}
void putAVX512()
@ -1588,6 +1629,8 @@ public:
put512_cvt();
separateFunc();
putMisc1();
separateFunc();
putGather();
#endif
}
};

View file

@ -173,6 +173,7 @@ enum {
ERR_ER_IS_INVALID,
ERR_INVALID_BROADCAST,
ERR_INVALID_OPMASK_WITH_MEMORY,
ERR_INVALID_ZERO,
ERR_INTERNAL
};
@ -231,6 +232,7 @@ public:
"er(embedded rounding) is invalid",
"invalid broadcast",
"invalid opmask with memory",
"invalid zero",
"internal error",
};
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
@ -663,7 +665,7 @@ public:
: scale_(scale)
, disp_(0)
{
if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER);
if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE);
if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
index_ = r;
@ -671,8 +673,7 @@ public:
base_ = r;
}
}
bool isVsib() const { return index_.isBit(128|256); }
bool isYMM() const { return index_.isBit(256); }
bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
void optimize()
{
// [reg * 2] => [reg + reg]
@ -1416,7 +1417,7 @@ private:
T_RZ_SAE = 4,
T_SAE = 5,
};
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0)
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, int VL = 0)
{
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
int w = (type & T_EW1) ? 1 : 0;
@ -1441,7 +1442,8 @@ private:
}
b = true;
} else {
int VL = Max(Max(reg.getBit(), base.getBit()), (v ? v->getBit() : 0));
if (v) VL = Max(VL, v->getBit());
VL = Max(Max(reg.getBit(), base.getBit()), VL);
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
if (b) {
disp8N = (type & T_B32) ? 4 : 8;
@ -1793,7 +1795,8 @@ private:
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
b = true;
}
disp8N = evex(r, base, p1, type, code, x, b, aaa);
int VL = addr.getRegExp().isVsib() ? addr.getRegExp().getIndex().getBit() : 0;
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
} else {
vex(r, base, p1, type, code, x);
}
@ -1888,11 +1891,11 @@ private:
}
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
{
if (!addr.getRegExp().isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
if (!addr.getRegExp().isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
const int y_vx_y = 0;
const int y_vy_y = 1;
// const int x_vy_x = 2;
const bool isAddrYMM = addr.getRegExp().isYMM();
const bool isAddrYMM = addr.getRegExp().getIndex().getBit() == 256;
if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
bool isOK = false;
if (mode == y_vx_y) {
@ -1907,6 +1910,32 @@ private:
addr.permitVsib();
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
}
enum {
xx_yy_zz = 0,
xx_yx_zy = 1,
xx_xy_yz = 2
};
void checkGather2(const Xmm& x, const Address& addr, int mode) const
{
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
const RegExp& re = addr.getRegExp();
if (x.isXMM() && re.isVsib(128)) return;
switch (mode) {
case xx_yy_zz: if ((x.isYMM() && re.isVsib(256)) || (x.isZMM() && re.isVsib(512))) return;
break;
case xx_yx_zy: if ((x.isYMM() && re.isVsib(128)) || (x.isZMM() && re.isVsib(256))) return;
break;
case xx_xy_yz: if ((x.isXMM() && re.isVsib(256)) || (x.isYMM() && re.isVsib(512))) return;
break;
}
throw Error(ERR_BAD_VSIB_ADDRESSING);
}
void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
{
checkGather2(x, addr, mode);
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;

View file

@ -173,4 +173,12 @@ void vcvtss2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()
void vcvttsd2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_F2 | T_0F | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, 0x78); }
void vcvttss2usi(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_F3 | T_0F | T_MUST_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x78); }
#endif
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x90, 0); }
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x90, 1); }
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x91, 2); }
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x91, 0); }
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x92, 0); }
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x92, 1); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_N4, 0x93, 2); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_N8, 0x93, 0); }
#endif