mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
amx supports apx
This commit is contained in:
parent
944438195b
commit
bec145ba91
4 changed files with 33 additions and 10 deletions
|
@ -2017,12 +2017,12 @@ void put64()
|
|||
|
||||
void putAMX_TILE()
|
||||
{
|
||||
puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }");
|
||||
puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }");
|
||||
puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }");
|
||||
puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }");
|
||||
puts("void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }");
|
||||
puts("void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }");
|
||||
puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }");
|
||||
puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }");
|
||||
puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }");
|
||||
puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }");
|
||||
puts("void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }");
|
||||
puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }");
|
||||
}
|
||||
void putAMX_INT8()
|
||||
|
|
22
test/apx.cpp
22
test/apx.cpp
|
@ -1753,3 +1753,25 @@ CYBOZU_TEST_AUTO(kmov)
|
|||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(amx)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
ldtilecfg(ptr [r30+r29*4+0x12]);
|
||||
sttilecfg(ptr [r30+r29*4+0x12]);
|
||||
tileloadd(tmm1, ptr [r30+r29*4+0x12]);
|
||||
tileloaddt1(tmm3, ptr [r30+r29*4+0x12]);
|
||||
tilestored(ptr [r30+r29*4+0x12], tmm5);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x9a, 0x78, 0x08, 0x49, 0x44, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x49, 0x44, 0xae, 0x12,
|
||||
0x62, 0x9a, 0x7b, 0x08, 0x4b, 0x4c, 0xae, 0x12, 0x62, 0x9a, 0x79, 0x08, 0x4b, 0x5c, 0xae, 0x12,
|
||||
0x62, 0x9a, 0x7a, 0x08, 0x4b, 0x6c, 0xae, 0x12,
|
||||
};
|
||||
const size_t n = sizeof(tbl);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
|
|
@ -2708,6 +2708,7 @@ private:
|
|||
Address addr2 = addr.cloneNoOptimize();
|
||||
const RegExp exp = addr2.getRegExp();
|
||||
if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (opROO(Reg(), addr2, t1, T_APX|type, code)) return;
|
||||
opVex(t1, &tmm0, addr2, type, code);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1926,12 +1926,12 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
|
|||
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
|
||||
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
|
||||
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
|
||||
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
||||
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
||||
void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }
|
||||
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
|
||||
void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
|
||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }
|
||||
void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); }
|
||||
void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }
|
||||
void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }
|
||||
void tilestored(const Address& addr, const Tmm& tm) { if (opROO(Reg(), addr, tm, T_APX|T_F3|T_0F38|T_W0, 0x4B)) return; opVex(tm, &tmm0, addr, T_F3|T_0F38|T_W0, 0x4B); }
|
||||
void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }
|
||||
void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }
|
||||
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
||||
|
|
Loading…
Reference in a new issue