mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
Merge branch 'dev'
This commit is contained in:
commit
f17cb9d6b9
9 changed files with 87 additions and 40 deletions
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 7.01)
|
||||
project(xbyak LANGUAGES CXX VERSION 7.02)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# History
|
||||
|
||||
* 2023/Dec/20 ver 7.02 SHA* support APX
|
||||
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
|
||||
* 2023/Dec/01 ver 7.00 support APX
|
||||
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
|
||||
|
|
|
@ -1402,14 +1402,6 @@ void put()
|
|||
|
||||
{ 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
|
||||
{ 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
|
||||
|
||||
{ 0xCC, "sha1rnds4", T_0F3A, true, 1 },
|
||||
{ 0xC8, "sha1nexte", T_0F38, false, 1 },
|
||||
{ 0xC9, "sha1msg1", T_0F38, false, 1 },
|
||||
{ 0xCA, "sha1msg2", T_0F38, false, 1 },
|
||||
{ 0xCB, "sha256rnds2", T_0F38, false, 1 },
|
||||
{ 0xCC, "sha256msg1", T_0F38, false, 1 },
|
||||
{ 0xCD, "sha256msg2", T_0F38, false, 1 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -1425,6 +1417,26 @@ void put()
|
|||
}
|
||||
}
|
||||
}
|
||||
// sha
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
uint8_t code2;
|
||||
const char *name;
|
||||
} tbl[] = {
|
||||
{ 0xC8, 0xD8, "sha1nexte" },
|
||||
{ 0xC9, 0xD9, "sha1msg1" },
|
||||
{ 0xCA, 0xDA, "sha1msg2" },
|
||||
{ 0xCB, 0xDB, "sha256rnds2" },
|
||||
{ 0xCC, 0xDC, "sha256msg1" },
|
||||
{ 0xCD, 0xDD, "sha256msg2" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
|
||||
}
|
||||
puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
|
||||
}
|
||||
// (m, x), (m, y)
|
||||
{
|
||||
const struct Tbl {
|
||||
|
@ -2036,9 +2048,9 @@ void put64()
|
|||
std::string s1 = type2String(p->type1);
|
||||
std::string s2 = type2String(p->type2);
|
||||
if (p->idx == 8) {
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, s1.c_str(), p->code, s2.c_str(), p->code);
|
||||
} else {
|
||||
printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
|
||||
printf("void %s(const Address& addr) { opSSE_APX(xmm%d, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), p->code, s2.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '7.01',
|
||||
version: '7.02',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Xbyak 7.01 [![Badge Build]][Build Status]
|
||||
# Xbyak 7.02 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.02
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -404,6 +404,9 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2023/12/20 ver 7.02 SHA*のAPX対応
|
||||
2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応
|
||||
2023/12/01 ver 7.00 APX対応
|
||||
2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加
|
||||
2023/08/02 ver 6.72 xabort, xbegin, xend追加
|
||||
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
|
||||
|
|
28
test/apx.cpp
28
test/apx.cpp
|
@ -1870,3 +1870,31 @@ CYBOZU_TEST_AUTO(encodekey)
|
|||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(sha)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
sha1msg1(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha1msg2(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha1nexte(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha256msg1(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha256msg2(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha256rnds2(xmm15, ptr [r30+r29*8+0x12]);
|
||||
sha1rnds4(xmm15, ptr [r30+r29*8+0x12], 0x23);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x1c, 0x78, 0x08, 0xd9, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xda, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xd8, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xdc, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xdd, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xdb, 0x7c, 0xee, 0x12,
|
||||
0x62, 0x1c, 0x78, 0x08, 0xd4, 0x7c, 0xee, 0x12, 0x23,
|
||||
};
|
||||
const size_t n = sizeof(tbl);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x7010 /* 0xABCD = A.BC(.D) */
|
||||
VERSION = 0x7020 /* 0xABCD = A.BC(.D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -2738,15 +2738,6 @@ private:
|
|||
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
|
||||
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
|
||||
}
|
||||
void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code)
|
||||
{
|
||||
if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX)
|
||||
if (addr.hasRex2()) {
|
||||
opROO(Reg(), addr, *x, type2, code);
|
||||
return;
|
||||
}
|
||||
opRO(*x, addr, type1, code);
|
||||
}
|
||||
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
|
||||
{
|
||||
if (r1.getIdx() < 8 && r2.getIdx() < 8) {
|
||||
|
@ -2755,6 +2746,14 @@ private:
|
|||
}
|
||||
opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2);
|
||||
}
|
||||
void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE)
|
||||
{
|
||||
if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) {
|
||||
if (imm != NONE) db(imm);
|
||||
return;
|
||||
}
|
||||
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
@ -3139,6 +3138,10 @@ public:
|
|||
// set default encoding to select Vex or Evex
|
||||
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
||||
|
||||
void sha1msg12(const Xmm& x, const Operand& op)
|
||||
{
|
||||
opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
|
||||
}
|
||||
/*
|
||||
use single byte nop if useMultiByteNop = false
|
||||
*/
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "7.01"; }
|
||||
const char *getVersionString() const { return "7.02"; }
|
||||
void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC); }
|
||||
void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
|
||||
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
|
||||
|
@ -988,13 +988,13 @@ void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x4
|
|||
void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524
|
||||
void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
|
||||
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
|
||||
void sha1msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC9, isXMM_XMMorMEM, NONE); }
|
||||
void sha1msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCA, isXMM_XMMorMEM, NONE); }
|
||||
void sha1nexte(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC8, isXMM_XMMorMEM, NONE); }
|
||||
void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_0F3A, 0xCC, isXMM_XMMorMEM, imm); }
|
||||
void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); }
|
||||
void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); }
|
||||
void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); }
|
||||
void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); }
|
||||
void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }
|
||||
void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); }
|
||||
void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }
|
||||
void sha256msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCC, T_MUST_EVEX, 0xDC); }
|
||||
void sha256msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCD, T_MUST_EVEX, 0xDD); }
|
||||
void sha256rnds2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCB, T_MUST_EVEX, 0xDB); }
|
||||
void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); }
|
||||
void shl(const Operand& op, int imm) { opShift(op, imm, 12); }
|
||||
void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); }
|
||||
|
@ -1926,14 +1926,14 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
|
|||
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
|
||||
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
|
||||
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
|
||||
void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
|
||||
void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
|
||||
void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); }
|
||||
void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); }
|
||||
void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesdec128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDD, T_F3|T_MUST_EVEX, 0xDD); }
|
||||
void aesdec256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDF, T_F3|T_MUST_EVEX, 0xDF); }
|
||||
void aesdecwide128kl(const Address& addr) { opSSE_APX(xmm1, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesdecwide256kl(const Address& addr) { opSSE_APX(xmm3, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesenc128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDC, T_F3|T_MUST_EVEX, 0xDC); }
|
||||
void aesenc256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDE, T_F3|T_MUST_EVEX, 0xDE); }
|
||||
void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
|
||||
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
|
||||
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
|
||||
|
|
Loading…
Reference in a new issue