Merge branch 'dev'

This commit is contained in:
MITSUNARI Shigeo 2023-12-19 19:58:16 +09:00
commit c9765588f0
27 changed files with 240 additions and 59 deletions

View file

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)
project(xbyak LANGUAGES CXX VERSION 7.00)
project(xbyak LANGUAGES CXX VERSION 7.01)
file(GLOB headers xbyak/*.h)

View file

@ -1,5 +1,7 @@
# History
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
* 2023/Dec/01 ver 7.00 support APX
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
* 2023/Aug/02 ver 6.72 add xbegin/xabort/xend
* 2023/Jul/27 ver 6.71 Allocator supports huge page

View file

@ -2013,6 +2013,38 @@ void put64()
printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0x%02X); }\n", p->name, p->code);
}
}
// aes
{
const struct Tbl {
const char *name;
uint64_t type1;
uint64_t type2;
uint8_t code;
int idx;
} tbl[] = {
{ "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 8 },
{ "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 8 },
{ "aesdecwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 1 },
{ "aesdecwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 3 },
{ "aesenc128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDC, 8 },
{ "aesenc256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDE, 8 },
{ "aesencwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 0 },
{ "aesencwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 2 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string s1 = type2String(p->type1);
std::string s2 = type2String(p->type2);
if (p->idx == 8) {
printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
} else {
printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
}
}
}
// encodekey
puts("void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }");
puts("void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }");
}
void putAMX_TILE()

View file

@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '7.00',
version: '7.01',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)

View file

@ -1,5 +1,5 @@
# Xbyak 7.00 [![Badge Build]][Build Status]
# Xbyak 7.01 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
@ -21,8 +21,7 @@ It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl
- header file only
- Intel/MASM like syntax
- fully support AVX-512
- support APX
- support APX/AVX10
**Note**:
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
@ -34,6 +33,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
### News
- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
- support APX except for a few instructions
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
- add movdiri, movdir64b, clwb, cldemote

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.00
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01
-----------------------------------------------------------------------------
◎概要
@ -46,6 +46,8 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎新機能
APX/AVX10対応
例外なしモード追加
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt clwb

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt clwb

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize amx_fp16 prefetchiti
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 prefetchiti avx10

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt clwb

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt movdiri movdir64b
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt clwb movdiri movdir64b aeskle wide_kl keylocker keylocker_wide

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote clwb

View file

@ -31,6 +31,7 @@ void putCPUinfo(bool onlyCpuidFeature)
{ Cpu::tSSSE3, "ssse3" },
{ Cpu::tSSE41, "sse41" },
{ Cpu::tSSE42, "sse42" },
{ Cpu::tSSE4a, "sse4a" },
{ Cpu::tPOPCNT, "popcnt" },
{ Cpu::t3DN, "3dn" },
{ Cpu::tE3DN, "e3dn" },
@ -87,6 +88,7 @@ void putCPUinfo(bool onlyCpuidFeature)
{ Cpu::tWAITPKG, "waitpkg" },
{ Cpu::tCLFLUSHOPT, "clflushopt" },
{ Cpu::tCLDEMOTE, "cldemote" },
{ Cpu::tCLWB, "clwb" },
{ Cpu::tMOVDIRI, "movdiri" },
{ Cpu::tMOVDIR64B, "movdir64b" },
{ Cpu::tUINTR, "uintr" },
@ -105,6 +107,10 @@ void putCPUinfo(bool onlyCpuidFeature)
{ Cpu::tAVX_VNNI_INT16, "avx_vnni_int16" },
{ Cpu::tAPX_F, "apx_f" },
{ Cpu::tAVX10, "avx10" },
{ Cpu::tAESKLE, "aeskle" },
{ Cpu::tWIDE_KL, "wide_kl" },
{ Cpu::tKEYLOCKER, "keylocker" },
{ Cpu::tKEYLOCKER_WIDE, "keylocker_wide" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);

View file

@ -1775,3 +1775,98 @@ CYBOZU_TEST_AUTO(amx)
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(aeskl)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
aesdec128kl(xmm15, ptr[rax+rcx*4+0x12]);
aesdec128kl(xmm15, ptr[r30+r29*8+0x34]);
aesdec256kl(xmm15, ptr[rax+rcx*4+0x12]);
aesdec256kl(xmm15, ptr[r30+r29*8+0x34]);
aesdecwide128kl(ptr[rax+rcx*4+0x12]);
aesdecwide128kl(ptr[r30+r29*8+0x34]);
aesdecwide256kl(ptr[rax+rcx*4+0x12]);
aesdecwide256kl(ptr[r30+r29*8+0x34]);
aesenc128kl(xmm15, ptr[rax+rcx*4+0x12]);
aesenc128kl(xmm15, ptr[r30+r29*8+0x34]);
aesenc256kl(xmm15, ptr[rax+rcx*4+0x12]);
aesenc256kl(xmm15, ptr[r30+r29*8+0x34]);
aesencwide128kl(ptr[rax+rcx*4+0x12]);
aesencwide128kl(ptr[r30+r29*8+0x34]);
aesencwide256kl(ptr[rax+rcx*4+0x12]);
aesencwide256kl(ptr[r30+r29*8+0x34]);
}
} c;
const uint8_t tbl[] = {
// aesdec128kl
0xf3, 0x44, 0x0f, 0x38, 0xdd, 0x7c, 0x88, 0x12,
0x62, 0x1c, 0x7a, 0x08, 0xdd, 0x7c, 0xee, 0x34,
// aesdec256kl
0xf3, 0x44, 0x0f, 0x38, 0xdf, 0x7c, 0x88, 0x12,
0x62, 0x1c, 0x7a, 0x08, 0xdf, 0x7c, 0xee, 0x34,
// aesdecwide128kl
0xf3, 0x0f, 0x38, 0xd8, 0x4c, 0x88, 0x12,
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x4c, 0xee, 0x34, 0xf3,
// aesdecwide256kl
0x0f, 0x38, 0xd8, 0x5c, 0x88, 0x12,
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x5c, 0xee, 0x34,
// aesenc128kl
0xf3, 0x44, 0x0f, 0x38, 0xdc, 0x7c, 0x88, 0x12,
0x62, 0x1c, 0x7a, 0x08, 0xdc, 0x7c, 0xee, 0x34,
// aesenc256kl
0xf3, 0x44, 0x0f, 0x38, 0xde, 0x7c, 0x88, 0x12,
0x62, 0x1c, 0x7a, 0x08, 0xde, 0x7c, 0xee, 0x34,
// aesencwide128kl
0xf3, 0x0f, 0x38, 0xd8, 0x44, 0x88, 0x12,
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x44, 0xee, 0x34,
// aesencwide256kl
0xf3, 0x0f, 0x38, 0xd8, 0x54, 0x88, 0x12,
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x54, 0xee, 0x34,
};
const size_t n = sizeof(tbl);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(encodekey)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
encodekey128(eax, ebx);
encodekey128(eax, r8d);
encodekey128(r8d, ebx);
encodekey128(r30d, r29d);
encodekey256(eax, ebx);
encodekey256(eax, r8d);
encodekey256(r8d, ebx);
encodekey256(r30d, r29d);
}
} c;
const uint8_t tbl[] = {
// encodekey128
0xf3, 0x0f, 0x38, 0xfa, 0xc3,
0x62, 0xd4, 0x7e, 0x08, 0xda, 0xc0,
0x62, 0x74, 0x7e, 0x08, 0xda, 0xc3,
0x62, 0x4c, 0x7e, 0x08, 0xda, 0xf5,
// encodekey256
0xf3, 0x0f, 0x38, 0xfb, 0xc3,
0x62, 0xd4, 0x7e, 0x08, 0xdb, 0xc0,
0x62, 0x74, 0x7e, 0x08, 0xdb, 0xc3,
0x62, 0x4c, 0x7e, 0x08, 0xdb, 0xf5,
};
const size_t n = sizeof(tbl);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}

View file

@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7000 /* 0xABCD = A.BC(.D) */
VERSION = 0x7010 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -231,6 +231,7 @@ enum {
ERR_INVALID_ZU,
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
ERR_INTERNAL // Put it at last.
};
@ -288,6 +289,7 @@ inline const char *ConvertErrorToString(int err)
"invalid ZU",
"can't use rex2",
"invalid dfv",
"invalid reg index",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@ -2712,6 +2714,47 @@ private:
opVex(t1, &tmm0, addr2, type, code);
}
#endif
// (reg32e/mem, k) if rev else (k, k/mem/reg32e)
// size = 8, 16, 32, 64
void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
{
int code = 0;
bool isReg = op.isREG(size < 64 ? 32 : 64);
if (rev) {
code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
} else {
code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
}
if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
uint64_t type = 0;
switch (size) {
case 8: type = T_W0|T_66; break;
case 16: type = T_W0; break;
case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
}
const Operand *p1 = &k, *p2 = &op;
if (code == 0x93) { std::swap(p1, p2); }
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
}
void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code)
{
if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX)
if (addr.hasRex2()) {
opROO(Reg(), addr, *x, type2, code);
return;
}
opRO(*x, addr, type1, code);
}
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
{
if (r1.getIdx() < 8 && r2.getIdx() < 8) {
db(0xF3); db(0x0F); db(0x38); db(code1); setModRM(3, r1.getIdx(), r2.getIdx());
return;
}
opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@ -3096,30 +3139,6 @@ public:
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
// (reg32e/mem, k) if rev else (k, k/mem/reg32e)
// size = 8, 16, 32, 64
void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
{
int code = 0;
bool isReg = op.isREG(size < 64 ? 32 : 64);
if (rev) {
code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
} else {
code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
}
if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
uint64_t type = 0;
switch (size) {
case 8: type = T_W0|T_66; break;
case 16: type = T_W0; break;
case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
}
const Operand *p1 = &k, *p2 = &op;
if (code == 0x93) { std::swap(p1, p2); }
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
}
/*
use single byte nop if useMultiByteNop = false
*/

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "7.00"; }
const char *getVersionString() const { return "7.01"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@ -1926,6 +1926,16 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); }
void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); }
void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }

View file

@ -473,6 +473,12 @@ public:
XBYAK_DEFINE_TYPE(81, tAVX_VNNI_INT16);
XBYAK_DEFINE_TYPE(82, tAPX_F);
XBYAK_DEFINE_TYPE(83, tAVX10);
XBYAK_DEFINE_TYPE(84, tAESKLE);
XBYAK_DEFINE_TYPE(85, tWIDE_KL);
XBYAK_DEFINE_TYPE(86, tKEYLOCKER);
XBYAK_DEFINE_TYPE(87, tKEYLOCKER_WIDE);
XBYAK_DEFINE_TYPE(88, tSSE4a);
XBYAK_DEFINE_TYPE(89, tCLWB);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@ -519,13 +525,14 @@ public:
if (maxExtendedNum >= 0x80000001) {
getCpuid(0x80000001, data);
if (EDX & (1U << 31)) type_ |= t3DN;
if (EDX & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (ECX & (1U << 5)) type_ |= tLZCNT;
if (ECX & (1U << 6)) type_ |= tSSE4a;
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 31)) type_ |= t3DN;
}
if (maxExtendedNum >= 0x80000008) {
@ -544,8 +551,8 @@ public:
if (ECX & (1U << 25)) type_ |= tAESNI;
if (ECX & (1U << 26)) type_ |= tXSAVE;
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
if (ECX & (1U << 30)) type_ |= tRDRAND;
if (ECX & (1U << 29)) type_ |= tF16C;
if (ECX & (1U << 30)) type_ |= tRDRAND;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 23)) type_ |= tMMX;
@ -556,8 +563,8 @@ public:
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64_t bv = getXfeature();
if ((bv & 6) == 6) {
if (ECX & (1U << 28)) type_ |= tAVX;
if (ECX & (1U << 12)) type_ |= tFMA;
if (ECX & (1U << 28)) type_ |= tAVX;
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
#if !defined(__APPLE__)
if (((bv >> 5) & 7) == 7)
@ -591,21 +598,23 @@ public:
const uint32_t maxNumSubLeaves = EAX;
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
if (EBX & (1U << 3)) type_ |= tBMI1;
if (EBX & (1U << 4)) type_ |= tHLE;
if (EBX & (1U << 8)) type_ |= tBMI2;
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
if (EBX & (1U << 11)) type_ |= tRTM;
if (EBX & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 18)) type_ |= tRDSEED;
if (EBX & (1U << 19)) type_ |= tADX;
if (EBX & (1U << 20)) type_ |= tSMAP;
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
if (EBX & (1U << 4)) type_ |= tHLE;
if (EBX & (1U << 11)) type_ |= tRTM;
if (EBX & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 24)) type_ |= tCLWB;
if (EBX & (1U << 29)) type_ |= tSHA;
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
if (ECX & (1U << 5)) type_ |= tWAITPKG;
if (ECX & (1U << 8)) type_ |= tGFNI;
if (ECX & (1U << 9)) type_ |= tVAES;
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
if (ECX & (1U << 23)) type_ |= tKEYLOCKER;
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
@ -635,7 +644,13 @@ public:
if (EDX & (1U << 21)) type_ |= tAPX_F;
}
}
if (has(tAVX10) && maxNum >= 24) {
if (maxNum >= 0x19) {
getCpuidEx(0x19, 0, data);
if (EBX & (1U << 0)) type_ |= tAESKLE;
if (EBX & (1U << 2)) type_ |= tWIDE_KL;
if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE;
}
if (has(tAVX10) && maxNum >= 0x24) {
getCpuidEx(0x24, 0, data);
avx10version_ = EBX & mask(7);
}