vmovd supports avx10.2

This commit is contained in:
MITSUNARI Shigeo 2024-10-14 19:40:52 +09:00
parent 8b0a1acf0e
commit 220ca76f41
5 changed files with 37 additions and 19 deletions

View file

@ -1734,9 +1734,6 @@ void put()
} }
// mov // mov
{ {
puts("void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); }");
puts("void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); }");
printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n");
printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n");
printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n");

View file

@ -165,3 +165,8 @@ vpdpwuuds(ym1, ym2, ptr_b[rax+128]);
vpdpwuuds(zm1, zm2, zm3); vpdpwuuds(zm1, zm2, zm3);
vpdpwuuds(zm1, zm2, ptr[rax+128]); vpdpwuuds(zm1, zm2, ptr[rax+128]);
vpdpwuuds(zm1, zm2, ptr_b[rax+128]); vpdpwuuds(zm1, zm2, ptr_b[rax+128]);
//
vmovd(xm10, xm20);
vmovd(xm10, ptr[rax+128]);
vmovd(ptr[rax+128], xm30);

View file

@ -7,7 +7,7 @@ struct Code : Xbyak::CodeGenerator {
Code() Code()
: Xbyak::CodeGenerator(4096*8) : Xbyak::CodeGenerator(4096*8)
{ {
setDefaultEncoding(VexEncoding, EvexEncoding); setDefaultEncoding(EvexEncoding, AVX10p2Encoding);
#include "tmp.cpp" #include "tmp.cpp"
} }
}; };

View file

@ -1673,7 +1673,9 @@ inline const uint8_t* Label::getAddress() const
typedef enum { typedef enum {
DefaultEncoding, DefaultEncoding,
VexEncoding, VexEncoding,
EvexEncoding EvexEncoding,
AVX512Encoding = EvexEncoding,
AVX10p2Encoding
} PreferredEncoding; } PreferredEncoding;
class CodeGenerator : public CodeArray { class CodeGenerator : public CodeArray {
@ -2665,7 +2667,7 @@ private:
{ {
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm); opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding, typeVex, typeEvex, sel), code, imm);
} }
bool isVexEncoding(PreferredEncoding encoding, int sel) const PreferredEncoding getEncoding(PreferredEncoding encoding, int sel) const
{ {
if (encoding == DefaultEncoding) { if (encoding == DefaultEncoding) {
encoding = defaultEncoding_[sel]; encoding = defaultEncoding_[sel];
@ -2674,12 +2676,11 @@ private:
#ifdef XBYAK_DISABLE_AVX512 #ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID) XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif #endif
return false;
} }
return true; return encoding;
} }
uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) { uint64_t orEvexIf(PreferredEncoding encoding, uint64_t typeVex, uint64_t typeEvex, int sel) {
bool isVex = isVexEncoding(encoding, sel); bool isVex = getEncoding(encoding, sel) == VexEncoding;
return isVex ? typeVex : T_MUST_EVEX | typeEvex; return isVex ? typeVex : T_MUST_EVEX | typeEvex;
} }
void opInOut(const Reg& a, const Reg& d, uint8_t code) void opInOut(const Reg& a, const Reg& d, uint8_t code)
@ -3177,7 +3178,7 @@ public:
// set default encoding // set default encoding
// vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex) // vnniEnc : control AVX512_VNNI (evex:default) or AVX-VNNI (vex)
// avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (evex) // avx10Enc : control mpsadbw, AVX-VNNI-INT8 (vex:default) or AVX10.2 (AVX10p2Encoding)
void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding) void setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)
{ defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; } { defaultEncoding_[0] = vnniEnc; defaultEncoding_[1] = avx10Enc; }
@ -3193,15 +3194,32 @@ public:
} }
db(0xC8 + (idx & 7)); db(0xC8 + (idx & 7));
} }
void opVmovd(const Xmm& x, const Operand& op, bool rev, PreferredEncoding encoding) void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding encoding = DefaultEncoding)
{ {
if (isVexEncoding(encoding, 1)) { const Operand *p1 = &op1;
if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) const Operand *p2 = &op2;
uint64_t type = T_0F | T_66 | T_W0 | T_EVEX | T_N4; bool rev = false;
int code = rev ? 0x7E : 0x6E; if (p1->isMEM()) {
opAVX_X_X_XM(x, xm0, op, type, code); std::swap(p1, p2);
} else { rev = true;
} }
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
if (p1->isXMM()) {
std::swap(p1, p2);
rev = !rev;
}
if (getEncoding(encoding, 1) == AVX10p2Encoding) {
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) {
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|(rev ? T_F3 : T_66)|T_MUST_EVEX|T_0F|T_EW0|T_N4, rev ? 0x7E : 0xD6);
return;
}
} else {
if ((p1->isREG(32) || p1->isMEM()) && p2->isXMM()) {
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, T_EVEX|T_66|T_0F|T_W0|T_N4, rev ? 0x6E : 0x7E);
return;
}
}
XBYAK_THROW(ERR_BAD_COMBINATION)
} }
/* /*
use single byte nop if useMultiByteNop = false use single byte nop if useMultiByteNop = false

View file

@ -1332,8 +1332,6 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); } void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); }
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); } void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); }
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); } void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); }
void vmovd(const Operand& op, const Xmm& x, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, true, encoding); }
void vmovd(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVmovd(x, op, false, encoding); }
void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); }
void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); } void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); }
void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); } void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); }