mirror of
https://github.com/herumi/xbyak
synced 2024-11-21 16:09:11 -07:00
add vscatter*
This commit is contained in:
parent
f5c64c1dfa
commit
d5112c7b13
4 changed files with 102 additions and 28 deletions
|
@ -465,13 +465,13 @@ void putCvt()
|
|||
puts("#endif");
|
||||
}
|
||||
|
||||
enum { // same as xbyak.h
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2,
|
||||
};
|
||||
void putGather()
|
||||
{
|
||||
enum { // same as xbyak.h
|
||||
xx_yy_zz = 0,
|
||||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2
|
||||
};
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
|
@ -493,6 +493,30 @@ void putGather()
|
|||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
void putScatter()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8 code;
|
||||
int mode; // reverse of gather
|
||||
} tbl[] = {
|
||||
{ "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz },
|
||||
{ "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy },
|
||||
{ "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz },
|
||||
{ "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz },
|
||||
|
||||
{ "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz },
|
||||
{ "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy },
|
||||
{ "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz },
|
||||
{ "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
|
||||
void putShuff()
|
||||
{
|
||||
|
@ -606,5 +630,6 @@ int main()
|
|||
putMov();
|
||||
putX_XM_IMM();
|
||||
putMisc();
|
||||
putScatter();
|
||||
puts("#endif");
|
||||
}
|
||||
|
|
|
@ -30,12 +30,13 @@ const uint64 MEM32 = 1ULL << 17;
|
|||
const uint64 VM32Z = 1ULL << 19;
|
||||
const uint64 K_K = 1ULL << 20;
|
||||
const uint64 MEM_ONLY_DISP = 1ULL << 21;
|
||||
//const uint64 QQQ = 1ULL << 23;
|
||||
const uint64 VM32X_K = 1ULL << 23;
|
||||
const uint64 _YMM = 1ULL << 24;
|
||||
const uint64 VM32X_32 = 1ULL << 39;
|
||||
const uint64 VM32X_64 = 1ULL << 40;
|
||||
const uint64 VM32Y_32 = 1ULL << 41;
|
||||
const uint64 VM32Y_64 = 1ULL << 42;
|
||||
const uint64 VM32Z_K = 1ULL << 32;
|
||||
#ifdef XBYAK64
|
||||
const uint64 _MEMe = 1ULL << 25;
|
||||
const uint64 REG32_2 = 1ULL << 26; // r8d, ...
|
||||
|
@ -44,7 +45,6 @@ const uint64 REG8_2 = 1ULL << 28; // r8b, ...
|
|||
const uint64 REG8_3 = 1ULL << 29; // spl, ...
|
||||
const uint64 _REG64 = 1ULL << 30; // rax, ...
|
||||
const uint64 _REG64_2 = 1ULL << 31; // r8, ...
|
||||
const uint64 RAX = 1ULL << 32;
|
||||
const uint64 _XMM2 = 1ULL << 33;
|
||||
const uint64 _YMM2 = 1ULL << 34;
|
||||
const uint64 VM32X = VM32X_32 | VM32X_64;
|
||||
|
@ -57,13 +57,12 @@ const uint64 REG8_2 = 0;
|
|||
const uint64 REG8_3 = 0;
|
||||
const uint64 _REG64 = 0;
|
||||
const uint64 _REG64_2 = 0;
|
||||
const uint64 RAX = 0;
|
||||
const uint64 _XMM2 = 0;
|
||||
const uint64 _YMM2 = 0;
|
||||
const uint64 VM32X = VM32X_32;
|
||||
const uint64 VM32Y = VM32Y_32;
|
||||
#endif
|
||||
const uint64 REG64 = _REG64 | _REG64_2 | RAX;
|
||||
const uint64 REG64 = _REG64 | _REG64_2;
|
||||
const uint64 REG32 = _REG32 | REG32_2 | EAX;
|
||||
const uint64 REG16 = _REG16 | REG16_2 | AX;
|
||||
const uint64 REG32e = REG32 | REG64;
|
||||
|
@ -71,7 +70,7 @@ const uint64 REG8 = _REG8 | REG8_2|AL;
|
|||
const uint64 MEM = _MEM | _MEMe;
|
||||
const uint64 MEM64 = 1ULL << 35;
|
||||
const uint64 YMM_ER = 1ULL << 36;
|
||||
const uint64 STi = 1ULL << 37;
|
||||
const uint64 VM32Y_K = 1ULL << 37;
|
||||
const uint64 IMM_2 = 1ULL << 38;
|
||||
const uint64 IMM = IMM_1 | IMM_2;
|
||||
const uint64 XMM = _XMM | _XMM2;
|
||||
|
@ -171,9 +170,6 @@ class Test {
|
|||
const char *get(uint64 type) const
|
||||
{
|
||||
int idx = (rand() / 31) & 7;
|
||||
if (type == STi) {
|
||||
return "st2";
|
||||
}
|
||||
switch (type) {
|
||||
case _XMM:
|
||||
{
|
||||
|
@ -326,8 +322,6 @@ class Test {
|
|||
};
|
||||
return Reg8_3Tbl[idx];
|
||||
}
|
||||
case RAX:
|
||||
return "rax";
|
||||
#endif
|
||||
case EAX:
|
||||
return "eax";
|
||||
|
@ -353,6 +347,12 @@ class Test {
|
|||
return isXbyak_ ? "ptr [ymm4]" : "[ymm4]";
|
||||
case VM32Y_64:
|
||||
return isXbyak_ ? "ptr [64+ymm13*2+r13]" : "[64+ymm13*2+r13]";
|
||||
case VM32X_K:
|
||||
return isXbyak_ ? "ptr [64+xmm13*2+r13] | k6" : "[64+xmm13*2+r13]{k6}";
|
||||
case VM32Y_K:
|
||||
return isXbyak_ ? "ptr [64+ymm13*2+r13] | k6" : "[64+ymm13*2+r13]{k6}";
|
||||
case VM32Z_K:
|
||||
return isXbyak_ ? "ptr [64+zmm13*2+r13] | k6" : "[64+zmm13*2+r13]{k6}";
|
||||
case VM32Z:
|
||||
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
|
||||
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
|
||||
|
@ -1547,14 +1547,14 @@ public:
|
|||
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||
#endif
|
||||
}
|
||||
enum {
|
||||
xx_yy_zz,
|
||||
xx_yx_zy,
|
||||
xx_xy_yz
|
||||
};
|
||||
void putGather()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
enum {
|
||||
xx_yy_zz,
|
||||
xx_yx_zy,
|
||||
xx_xy_yz
|
||||
};
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int mode;
|
||||
|
@ -1588,6 +1588,45 @@ public:
|
|||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void putScatter()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ "vpscatterdd", xx_yy_zz },
|
||||
{ "vpscatterdq", xx_xy_yz },
|
||||
{ "vpscatterqd", xx_yx_zy },
|
||||
{ "vpscatterqq", xx_yy_zz },
|
||||
|
||||
{ "vscatterdps", xx_yy_zz },
|
||||
{ "vscatterdpd", xx_xy_yz },
|
||||
{ "vscatterqps", xx_yx_zy },
|
||||
{ "vscatterqpd", xx_yy_zz },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
switch (p.mode) {
|
||||
case xx_yy_zz:
|
||||
put(p.name, VM32X_K, _XMM);
|
||||
put(p.name, VM32Y_K, _YMM);
|
||||
put(p.name, VM32Z_K, _ZMM);
|
||||
break;
|
||||
case xx_yx_zy:
|
||||
put(p.name, VM32X_K, _XMM);
|
||||
put(p.name, VM32Y_K, _XMM);
|
||||
put(p.name, VM32Z_K, _YMM);
|
||||
break;
|
||||
case xx_xy_yz:
|
||||
put(p.name, VM32X_K, _XMM);
|
||||
put(p.name, VM32X_K, _YMM);
|
||||
put(p.name, VM32Y_K, _ZMM);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void putBlend()
|
||||
|
@ -1929,6 +1968,7 @@ public:
|
|||
void putMin()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
putScatter();
|
||||
#endif
|
||||
}
|
||||
void putAVX512()
|
||||
|
@ -1985,6 +2025,8 @@ public:
|
|||
putMov();
|
||||
separateFunc();
|
||||
putRot();
|
||||
separateFunc();
|
||||
putScatter();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1915,24 +1915,23 @@ private:
|
|||
xx_yx_zy = 1,
|
||||
xx_xy_yz = 2
|
||||
};
|
||||
void checkGather2(const Xmm& x, const Address& addr, int mode) const
|
||||
void checkGather2(const Xmm& x1, const Reg& x2, int mode) const
|
||||
{
|
||||
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||
const RegExp& re = addr.getRegExp();
|
||||
if (x.isXMM() && re.isVsib(128)) return;
|
||||
if (x1.isXMM() && x2.isXMM()) return;
|
||||
switch (mode) {
|
||||
case xx_yy_zz: if ((x.isYMM() && re.isVsib(256)) || (x.isZMM() && re.isVsib(512))) return;
|
||||
case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
|
||||
break;
|
||||
case xx_yx_zy: if ((x.isYMM() && re.isVsib(128)) || (x.isZMM() && re.isVsib(256))) return;
|
||||
case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
|
||||
break;
|
||||
case xx_xy_yz: if ((x.isXMM() && re.isVsib(256)) || (x.isYMM() && re.isVsib(512))) return;
|
||||
case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
|
||||
break;
|
||||
}
|
||||
throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
}
|
||||
void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode)
|
||||
{
|
||||
checkGather2(x, addr, mode);
|
||||
if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
|
||||
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
||||
addr.permitVsib();
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
|
|
|
@ -293,4 +293,12 @@ void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 |
|
|||
void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
|
||||
void vrndscalepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
|
||||
void vrndscaleps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
|
||||
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA0, 0); }
|
||||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA0, 1); }
|
||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA1, 2); }
|
||||
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA1, 0); }
|
||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA2, 0); }
|
||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA2, 1); }
|
||||
void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA3, 2); }
|
||||
void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA3, 0); }
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue