add vfpclass*

This commit is contained in:
MITSUNARI Shigeo 2016-07-23 08:45:16 +09:00
parent 07c593c35b
commit fbde291731
4 changed files with 55 additions and 14 deletions

View file

@ -135,8 +135,6 @@ void putVcmp()
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
//QQQ { 0x66, "vfpclassps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM |T_EW0 | T_B32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@ -663,6 +661,11 @@ void putMisc()
puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4, 0xC7, Operand::ZMM); }");
puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC6, Operand::YMM); }");
puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC7, Operand::ZMM); }");
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
}
int main()

View file

@ -2047,6 +2047,39 @@ public:
put("vpmadd52huq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
put("vpmadd52huq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
put("vpmadd52huq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
#endif
}
void classSubMem(const char *nm, char x, bool broadcast, int size)
{
printf("%s ", nm);
if (isXbyak_) {
printf("(k5|k3, %cword%s [rax+64], 5);dump();\n", x, broadcast ? "_b" : "");
} else {
if (broadcast) {
int d = x == 'x' ? 128 / size : x == 'y' ? 256 / size : 512 / size;
printf("k5{k3}, [rax+64]{1to%d}, 5\n", d);
} else {
if (x == 'x') x = 'o'; // nasm
printf("k5{k3}, %cword [rax+64], 5\n", x);
}
}
}
void putClassSub(const char *name, int size)
{
put(name, K_K, _XMM | _YMM | _ZMM, IMM8);
for (int i = 0; i < 2; i++) {
classSubMem(name, 'x', i == 0, size);
classSubMem(name, 'y', i == 0, size);
classSubMem(name, 'z', i == 0, size);
}
}
void putClass()
{
#ifdef XBYAK64
putClassSub("vfpclasspd", 64);
putClassSub("vfpclassps", 32);
put("vfpclasssd", K_K, _XMM | _MEM, IMM8);
put("vfpclassss", K_K, _XMM | _MEM, IMM8);
#endif
}
void putMin()
@ -2110,6 +2143,8 @@ public:
putRot();
separateFunc();
putScatter();
separateFunc();
putClass();
#endif
}
};

View file

@ -409,14 +409,7 @@ public:
kind_ = kind;
bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
}
// swap zero_, mask_, rounding_
void swapAttr(Operand& rhs)
{
int t;
t = zero_; zero_ = rhs.zero_; rhs.zero_ = t;
t = mask_; mask_ = rhs.mask_; rhs.mask_ = t;
t = rounding_; rounding_ = rhs.rounding_; rhs.rounding_ = t;
}
void setBit(int bit) { bit_ = bit; }
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
{
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
@ -1955,6 +1948,12 @@ private:
addr.permitVsib();
opVex(x, 0, addr, type, code);
}
void opClass(const Opmask& k, const Operand& op, int type, uint8 code, uint8 imm)
{
if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE);
Reg x = k; x.setBit(op.getBit());
opVex(x, 0, op, type, code, imm);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@ -1968,8 +1967,8 @@ public:
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
const AddressFrame ptr, byte, word, dword, qword, yword;
const AddressFrame ptr_b, yword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
@ -2450,8 +2449,8 @@ public:
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
, ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
, ptr(0), byte(8), word(16), dword(32), qword(64), yword(256)
, ptr_b(0, true), yword_b(256, true)
, ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
, ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
, k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
, T_sae(T_SAE), T_rn_sae(T_RN_SAE), T_rd_sae(T_RD_SAE), T_ru_sae(T_RU_SAE), T_rz_sae(T_RZ_SAE)

View file

@ -335,6 +335,10 @@ void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F3
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4, 0xC7, Operand::ZMM); }
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC6, Operand::YMM); }
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC7, Operand::ZMM); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA0, 0); }
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA0, 1); }
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA1, 2); }