Merge branch 'dev'

This commit is contained in:
MITSUNARI Shigeo 2023-12-20 15:59:45 +09:00
commit f17cb9d6b9
9 changed files with 87 additions and 40 deletions

View file

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
project(xbyak LANGUAGES CXX VERSION 7.01)
project(xbyak LANGUAGES CXX VERSION 7.02)
file(GLOB headers xbyak/*.h)

View file

@ -1,5 +1,6 @@
# History
* 2023/Dec/20 ver 7.02 SHA* support APX
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
* 2023/Dec/01 ver 7.00 support APX
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16

View file

@ -1402,14 +1402,6 @@ void put()
{ 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 },
{ 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 },
{ 0xCC, "sha1rnds4", T_0F3A, true, 1 },
{ 0xC8, "sha1nexte", T_0F38, false, 1 },
{ 0xC9, "sha1msg1", T_0F38, false, 1 },
{ 0xCA, "sha1msg2", T_0F38, false, 1 },
{ 0xCB, "sha256rnds2", T_0F38, false, 1 },
{ 0xCC, "sha256msg1", T_0F38, false, 1 },
{ 0xCD, "sha256msg2", T_0F38, false, 1 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
@ -1425,6 +1417,26 @@ void put()
}
}
}
// sha
{
const struct Tbl {
uint8_t code;
uint8_t code2;
const char *name;
} tbl[] = {
{ 0xC8, 0xD8, "sha1nexte" },
{ 0xC9, 0xD9, "sha1msg1" },
{ 0xCA, 0xDA, "sha1msg2" },
{ 0xCB, 0xDB, "sha256rnds2" },
{ 0xCC, 0xDC, "sha256msg1" },
{ 0xCD, 0xDD, "sha256msg2" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
printf("void %s(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0x%02X, T_MUST_EVEX, 0x%02X); }\n", p->name, p->code, p->code2);
}
puts("void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }");
}
// (m, x), (m, y)
{
const struct Tbl {
@ -2036,9 +2048,9 @@ void put64()
std::string s1 = type2String(p->type1);
std::string s2 = type2String(p->type2);
if (p->idx == 8) {
printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
printf("void %s(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, s1.c_str(), p->code, s2.c_str(), p->code);
} else {
printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
printf("void %s(const Address& addr) { opSSE_APX(xmm%d, addr, %s, 0x%02X, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), p->code, s2.c_str(), p->code);
}
}
}

View file

@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.01',
version: '7.02',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)

View file

@ -1,5 +1,5 @@
# Xbyak 7.01 [![Badge Build]][Build Status]
# Xbyak 7.02 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.02
-----------------------------------------------------------------------------
◎概要
@ -404,6 +404,9 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2023/12/20 ver 7.02 SHA*のAPX対応
2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応
2023/12/01 ver 7.00 APX対応
2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加
2023/08/02 ver 6.72 xabort, xbegin, xend追加
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。

View file

@ -1870,3 +1870,31 @@ CYBOZU_TEST_AUTO(encodekey)
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(sha)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
sha1msg1(xmm15, ptr [r30+r29*8+0x12]);
sha1msg2(xmm15, ptr [r30+r29*8+0x12]);
sha1nexte(xmm15, ptr [r30+r29*8+0x12]);
sha256msg1(xmm15, ptr [r30+r29*8+0x12]);
sha256msg2(xmm15, ptr [r30+r29*8+0x12]);
sha256rnds2(xmm15, ptr [r30+r29*8+0x12]);
sha1rnds4(xmm15, ptr [r30+r29*8+0x12], 0x23);
}
} c;
const uint8_t tbl[] = {
0x62, 0x1c, 0x78, 0x08, 0xd9, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xda, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xd8, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xdc, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xdd, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xdb, 0x7c, 0xee, 0x12,
0x62, 0x1c, 0x78, 0x08, 0xd4, 0x7c, 0xee, 0x12, 0x23,
};
const size_t n = sizeof(tbl);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

View file

@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7010 /* 0xABCD = A.BC(.D) */
VERSION = 0x7020 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -2738,15 +2738,6 @@ private:
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
}
void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code)
{
if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX)
if (addr.hasRex2()) {
opROO(Reg(), addr, *x, type2, code);
return;
}
opRO(*x, addr, type1, code);
}
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
{
if (r1.getIdx() < 8 && r2.getIdx() < 8) {
@ -2755,6 +2746,14 @@ private:
}
opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2);
}
void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE)
{
if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) {
if (imm != NONE) db(imm);
return;
}
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@ -3139,6 +3138,10 @@ public:
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
void sha1msg12(const Xmm& x, const Operand& op)
{
opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
}
/*
use single byte nop if useMultiByteNop = false
*/

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "7.01"; }
const char *getVersionString() const { return "7.02"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@ -988,13 +988,13 @@ void setpo(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x4
void sets(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 8)) return; opRext(op, 8, 0, T_0F, 0x90 | 8); }//-V524
void setz(const Operand& op) { if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | 4)) return; opRext(op, 8, 0, T_0F, 0x90 | 4); }//-V524
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
void sha1msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC9, isXMM_XMMorMEM, NONE); }
void sha1msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCA, isXMM_XMMorMEM, NONE); }
void sha1nexte(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xC8, isXMM_XMMorMEM, NONE); }
void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_0F3A, 0xCC, isXMM_XMMorMEM, imm); }
void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); }
void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); }
void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); }
void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); }
void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); }
void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); }
void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); }
void sha256msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCC, T_MUST_EVEX, 0xDC); }
void sha256msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCD, T_MUST_EVEX, 0xDD); }
void sha256rnds2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCB, T_MUST_EVEX, 0xDB); }
void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); }
void shl(const Operand& op, int imm) { opShift(op, imm, 12); }
void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); }
@ -1926,14 +1926,14 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); }
void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); }
void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesdec128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDD, T_F3|T_MUST_EVEX, 0xDD); }
void aesdec256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDF, T_F3|T_MUST_EVEX, 0xDF); }
void aesdecwide128kl(const Address& addr) { opSSE_APX(xmm1, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void aesdecwide256kl(const Address& addr) { opSSE_APX(xmm3, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void aesenc128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDC, T_F3|T_MUST_EVEX, 0xDC); }
void aesenc256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDE, T_F3|T_MUST_EVEX, 0xDE); }
void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }