mirror of
https://github.com/herumi/xbyak
synced 2024-11-21 16:09:11 -07:00
add vfpclass*
This commit is contained in:
parent
07c593c35b
commit
fbde291731
4 changed files with 55 additions and 14 deletions
|
@ -135,8 +135,6 @@ void putVcmp()
|
|||
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
|
||||
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
|
||||
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
|
||||
|
||||
//QQQ { 0x66, "vfpclassps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM |T_EW0 | T_B32, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -663,6 +661,11 @@ void putMisc()
|
|||
puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4, 0xC7, Operand::ZMM); }");
|
||||
puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC6, Operand::YMM); }");
|
||||
puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC7, Operand::ZMM); }");
|
||||
|
||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||
}
|
||||
|
||||
int main()
|
||||
|
|
|
@ -2047,6 +2047,39 @@ public:
|
|||
put("vpmadd52huq", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
||||
put("vpmadd52huq", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
||||
put("vpmadd52huq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8);
|
||||
#endif
|
||||
}
|
||||
void classSubMem(const char *nm, char x, bool broadcast, int size)
|
||||
{
|
||||
printf("%s ", nm);
|
||||
if (isXbyak_) {
|
||||
printf("(k5|k3, %cword%s [rax+64], 5);dump();\n", x, broadcast ? "_b" : "");
|
||||
} else {
|
||||
if (broadcast) {
|
||||
int d = x == 'x' ? 128 / size : x == 'y' ? 256 / size : 512 / size;
|
||||
printf("k5{k3}, [rax+64]{1to%d}, 5\n", d);
|
||||
} else {
|
||||
if (x == 'x') x = 'o'; // nasm
|
||||
printf("k5{k3}, %cword [rax+64], 5\n", x);
|
||||
}
|
||||
}
|
||||
}
|
||||
void putClassSub(const char *name, int size)
|
||||
{
|
||||
put(name, K_K, _XMM | _YMM | _ZMM, IMM8);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
classSubMem(name, 'x', i == 0, size);
|
||||
classSubMem(name, 'y', i == 0, size);
|
||||
classSubMem(name, 'z', i == 0, size);
|
||||
}
|
||||
}
|
||||
void putClass()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
putClassSub("vfpclasspd", 64);
|
||||
putClassSub("vfpclassps", 32);
|
||||
put("vfpclasssd", K_K, _XMM | _MEM, IMM8);
|
||||
put("vfpclassss", K_K, _XMM | _MEM, IMM8);
|
||||
#endif
|
||||
}
|
||||
void putMin()
|
||||
|
@ -2110,6 +2143,8 @@ public:
|
|||
putRot();
|
||||
separateFunc();
|
||||
putScatter();
|
||||
separateFunc();
|
||||
putClass();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
|
|
@ -409,14 +409,7 @@ public:
|
|||
kind_ = kind;
|
||||
bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512;
|
||||
}
|
||||
// swap zero_, mask_, rounding_
|
||||
void swapAttr(Operand& rhs)
|
||||
{
|
||||
int t;
|
||||
t = zero_; zero_ = rhs.zero_; rhs.zero_ = t;
|
||||
t = mask_; mask_ = rhs.mask_; rhs.mask_ = t;
|
||||
t = rounding_; rounding_ = rhs.rounding_; rhs.rounding_ = t;
|
||||
}
|
||||
void setBit(int bit) { bit_ = bit; }
|
||||
void setOpmaskIdx(int idx, bool ignore_idx0 = false)
|
||||
{
|
||||
if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID);
|
||||
|
@ -1955,6 +1948,12 @@ private:
|
|||
addr.permitVsib();
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
void opClass(const Opmask& k, const Operand& op, int type, uint8 code, uint8 imm)
|
||||
{
|
||||
if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE);
|
||||
Reg x = k; x.setBit(op.getBit());
|
||||
opVex(x, 0, op, type, code, imm);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
@ -1968,8 +1967,8 @@ public:
|
|||
const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi;
|
||||
const Reg16 ax, cx, dx, bx, sp, bp, si, di;
|
||||
const Reg8 al, cl, dl, bl, ah, ch, dh, bh;
|
||||
const AddressFrame ptr, byte, word, dword, qword, yword;
|
||||
const AddressFrame ptr_b, yword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
|
||||
const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM
|
||||
const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b}
|
||||
const Fpu st0, st1, st2, st3, st4, st5, st6, st7;
|
||||
const Opmask k0, k1, k2, k3, k4, k5, k6, k7;
|
||||
const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae}
|
||||
|
@ -2450,8 +2449,8 @@ public:
|
|||
, eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI)
|
||||
, ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI)
|
||||
, al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH)
|
||||
, ptr(0), byte(8), word(16), dword(32), qword(64), yword(256)
|
||||
, ptr_b(0, true), yword_b(256, true)
|
||||
, ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512)
|
||||
, ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true)
|
||||
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
|
||||
, k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
|
||||
, T_sae(T_SAE), T_rn_sae(T_RN_SAE), T_rd_sae(T_RD_SAE), T_ru_sae(T_RU_SAE), T_rz_sae(T_RZ_SAE)
|
||||
|
|
|
@ -335,6 +335,10 @@ void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F3
|
|||
void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4, 0xC7, Operand::ZMM); }
|
||||
void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC6, Operand::YMM); }
|
||||
void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8, 0xC7, Operand::ZMM); }
|
||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||
void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { opClass(k, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||
void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
||||
void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA0, 0); }
|
||||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_N8, 0xA0, 1); }
|
||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_N4, 0xA1, 2); }
|
||||
|
|
Loading…
Reference in a new issue