mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
Merge branch 'dev'
This commit is contained in:
commit
c9765588f0
27 changed files with 240 additions and 59 deletions
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 7.00)
|
||||
project(xbyak LANGUAGES CXX VERSION 7.01)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# History
|
||||
|
||||
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
|
||||
* 2023/Dec/01 ver 7.00 support APX
|
||||
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
|
||||
* 2023/Aug/02 ver 6.72 add xbegin/xabort/xend
|
||||
* 2023/Jul/27 ver 6.71 Allocator supports huge page
|
||||
|
|
|
@ -2013,6 +2013,38 @@ void put64()
|
|||
printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0x%02X); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
// aes
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint64_t type1;
|
||||
uint64_t type2;
|
||||
uint8_t code;
|
||||
int idx;
|
||||
} tbl[] = {
|
||||
{ "aesdec128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDD, 8 },
|
||||
{ "aesdec256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDF, 8 },
|
||||
{ "aesdecwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 1 },
|
||||
{ "aesdecwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 3 },
|
||||
{ "aesenc128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDC, 8 },
|
||||
{ "aesenc256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xDE, 8 },
|
||||
{ "aesencwide128kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 0 },
|
||||
{ "aesencwide256kl", T_F3|T_0F38, T_MUST_EVEX|T_F3, 0xD8, 2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string s1 = type2String(p->type1);
|
||||
std::string s2 = type2String(p->type2);
|
||||
if (p->idx == 8) {
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opAESKL(&x, addr, %s, %s, 0x%02X); }\n", p->name, s1.c_str(), s2.c_str(), p->code);
|
||||
} else {
|
||||
printf("void %s(const Address& addr) { opAESKL(&xmm%d, addr, %s, %s, 0x%02X); }\n", p->name, p->idx, s1.c_str(), s2.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
}
|
||||
// encodekey
|
||||
puts("void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }");
|
||||
puts("void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }");
|
||||
}
|
||||
|
||||
void putAMX_TILE()
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '7.00',
|
||||
version: '7.01',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Xbyak 7.00 [![Badge Build]][Build Status]
|
||||
# Xbyak 7.01 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
|
@ -21,8 +21,7 @@ It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl
|
|||
- header file only
|
||||
- Intel/MASM like syntax
|
||||
- fully support AVX-512
|
||||
|
||||
- support APX
|
||||
- support APX/AVX10
|
||||
|
||||
**Note**:
|
||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
|
@ -34,6 +33,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
|||
|
||||
### News
|
||||
|
||||
- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
|
||||
- support APX except for a few instructions
|
||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||
- add movdiri, movdir64b, clwb, cldemote
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.00
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.01
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -46,6 +46,8 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
|||
-----------------------------------------------------------------------------
|
||||
◎新機能
|
||||
|
||||
APX/AVX10対応
|
||||
|
||||
例外なしモード追加
|
||||
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
|
||||
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt clwb
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt clwb
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize amx_fp16 prefetchiti
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 prefetchiti avx10
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt clwb
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt movdiri movdir64b
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt clwb movdiri movdir64b aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote clwb
|
||||
|
|
|
@ -31,6 +31,7 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tSSSE3, "ssse3" },
|
||||
{ Cpu::tSSE41, "sse41" },
|
||||
{ Cpu::tSSE42, "sse42" },
|
||||
{ Cpu::tSSE4a, "sse4a" },
|
||||
{ Cpu::tPOPCNT, "popcnt" },
|
||||
{ Cpu::t3DN, "3dn" },
|
||||
{ Cpu::tE3DN, "e3dn" },
|
||||
|
@ -87,6 +88,7 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tWAITPKG, "waitpkg" },
|
||||
{ Cpu::tCLFLUSHOPT, "clflushopt" },
|
||||
{ Cpu::tCLDEMOTE, "cldemote" },
|
||||
{ Cpu::tCLWB, "clwb" },
|
||||
{ Cpu::tMOVDIRI, "movdiri" },
|
||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||
{ Cpu::tUINTR, "uintr" },
|
||||
|
@ -105,6 +107,10 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tAVX_VNNI_INT16, "avx_vnni_int16" },
|
||||
{ Cpu::tAPX_F, "apx_f" },
|
||||
{ Cpu::tAVX10, "avx10" },
|
||||
{ Cpu::tAESKLE, "aeskle" },
|
||||
{ Cpu::tWIDE_KL, "wide_kl" },
|
||||
{ Cpu::tKEYLOCKER, "keylocker" },
|
||||
{ Cpu::tKEYLOCKER_WIDE, "keylocker_wide" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
|
|
95
test/apx.cpp
95
test/apx.cpp
|
@ -1775,3 +1775,98 @@ CYBOZU_TEST_AUTO(amx)
|
|||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(aeskl)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
aesdec128kl(xmm15, ptr[rax+rcx*4+0x12]);
|
||||
aesdec128kl(xmm15, ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesdec256kl(xmm15, ptr[rax+rcx*4+0x12]);
|
||||
aesdec256kl(xmm15, ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesdecwide128kl(ptr[rax+rcx*4+0x12]);
|
||||
aesdecwide128kl(ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesdecwide256kl(ptr[rax+rcx*4+0x12]);
|
||||
aesdecwide256kl(ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesenc128kl(xmm15, ptr[rax+rcx*4+0x12]);
|
||||
aesenc128kl(xmm15, ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesenc256kl(xmm15, ptr[rax+rcx*4+0x12]);
|
||||
aesenc256kl(xmm15, ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesencwide128kl(ptr[rax+rcx*4+0x12]);
|
||||
aesencwide128kl(ptr[r30+r29*8+0x34]);
|
||||
|
||||
aesencwide256kl(ptr[rax+rcx*4+0x12]);
|
||||
aesencwide256kl(ptr[r30+r29*8+0x34]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
// aesdec128kl
|
||||
0xf3, 0x44, 0x0f, 0x38, 0xdd, 0x7c, 0x88, 0x12,
|
||||
0x62, 0x1c, 0x7a, 0x08, 0xdd, 0x7c, 0xee, 0x34,
|
||||
// aesdec256kl
|
||||
0xf3, 0x44, 0x0f, 0x38, 0xdf, 0x7c, 0x88, 0x12,
|
||||
0x62, 0x1c, 0x7a, 0x08, 0xdf, 0x7c, 0xee, 0x34,
|
||||
// aesdecwide128kl
|
||||
0xf3, 0x0f, 0x38, 0xd8, 0x4c, 0x88, 0x12,
|
||||
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x4c, 0xee, 0x34, 0xf3,
|
||||
// aesdecwide256kl
|
||||
0x0f, 0x38, 0xd8, 0x5c, 0x88, 0x12,
|
||||
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x5c, 0xee, 0x34,
|
||||
// aesenc128kl
|
||||
0xf3, 0x44, 0x0f, 0x38, 0xdc, 0x7c, 0x88, 0x12,
|
||||
0x62, 0x1c, 0x7a, 0x08, 0xdc, 0x7c, 0xee, 0x34,
|
||||
// aesenc256kl
|
||||
0xf3, 0x44, 0x0f, 0x38, 0xde, 0x7c, 0x88, 0x12,
|
||||
0x62, 0x1c, 0x7a, 0x08, 0xde, 0x7c, 0xee, 0x34,
|
||||
// aesencwide128kl
|
||||
0xf3, 0x0f, 0x38, 0xd8, 0x44, 0x88, 0x12,
|
||||
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x44, 0xee, 0x34,
|
||||
// aesencwide256kl
|
||||
0xf3, 0x0f, 0x38, 0xd8, 0x54, 0x88, 0x12,
|
||||
0x62, 0x9c, 0x7a, 0x08, 0xd8, 0x54, 0xee, 0x34,
|
||||
};
|
||||
const size_t n = sizeof(tbl);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(encodekey)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
encodekey128(eax, ebx);
|
||||
encodekey128(eax, r8d);
|
||||
encodekey128(r8d, ebx);
|
||||
encodekey128(r30d, r29d);
|
||||
|
||||
encodekey256(eax, ebx);
|
||||
encodekey256(eax, r8d);
|
||||
encodekey256(r8d, ebx);
|
||||
encodekey256(r30d, r29d);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
// encodekey128
|
||||
0xf3, 0x0f, 0x38, 0xfa, 0xc3,
|
||||
0x62, 0xd4, 0x7e, 0x08, 0xda, 0xc0,
|
||||
0x62, 0x74, 0x7e, 0x08, 0xda, 0xc3,
|
||||
0x62, 0x4c, 0x7e, 0x08, 0xda, 0xf5,
|
||||
// encodekey256
|
||||
0xf3, 0x0f, 0x38, 0xfb, 0xc3,
|
||||
0x62, 0xd4, 0x7e, 0x08, 0xdb, 0xc0,
|
||||
0x62, 0x74, 0x7e, 0x08, 0xdb, 0xc3,
|
||||
0x62, 0x4c, 0x7e, 0x08, 0xdb, 0xf5,
|
||||
};
|
||||
const size_t n = sizeof(tbl);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x7000 /* 0xABCD = A.BC(.D) */
|
||||
VERSION = 0x7010 /* 0xABCD = A.BC(.D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -231,6 +231,7 @@ enum {
|
|||
ERR_INVALID_ZU,
|
||||
ERR_CANT_USE_REX2,
|
||||
ERR_INVALID_DFV,
|
||||
ERR_INVALID_REG_IDX,
|
||||
ERR_INTERNAL // Put it at last.
|
||||
};
|
||||
|
||||
|
@ -288,6 +289,7 @@ inline const char *ConvertErrorToString(int err)
|
|||
"invalid ZU",
|
||||
"can't use rex2",
|
||||
"invalid dfv",
|
||||
"invalid reg index",
|
||||
"internal error"
|
||||
};
|
||||
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
||||
|
@ -2712,6 +2714,47 @@ private:
|
|||
opVex(t1, &tmm0, addr2, type, code);
|
||||
}
|
||||
#endif
|
||||
// (reg32e/mem, k) if rev else (k, k/mem/reg32e)
|
||||
// size = 8, 16, 32, 64
|
||||
void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
|
||||
{
|
||||
int code = 0;
|
||||
bool isReg = op.isREG(size < 64 ? 32 : 64);
|
||||
if (rev) {
|
||||
code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
|
||||
} else {
|
||||
code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
|
||||
}
|
||||
if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
uint64_t type = 0;
|
||||
switch (size) {
|
||||
case 8: type = T_W0|T_66; break;
|
||||
case 16: type = T_W0; break;
|
||||
case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
|
||||
case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
|
||||
}
|
||||
const Operand *p1 = &k, *p2 = &op;
|
||||
if (code == 0x93) { std::swap(p1, p2); }
|
||||
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
|
||||
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
|
||||
}
|
||||
void opAESKL(const Xmm *x, const Address& addr, uint64_t type1, uint64_t type2, uint8_t code)
|
||||
{
|
||||
if (x && x->getIdx() >= 16) XBYAK_THROW(ERR_INVALID_REG_IDX)
|
||||
if (addr.hasRex2()) {
|
||||
opROO(Reg(), addr, *x, type2, code);
|
||||
return;
|
||||
}
|
||||
opRO(*x, addr, type1, code);
|
||||
}
|
||||
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
|
||||
{
|
||||
if (r1.getIdx() < 8 && r2.getIdx() < 8) {
|
||||
db(0xF3); db(0x0F); db(0x38); db(code1); setModRM(3, r1.getIdx(), r2.getIdx());
|
||||
return;
|
||||
}
|
||||
opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2);
|
||||
}
|
||||
public:
|
||||
unsigned int getVersion() const { return VERSION; }
|
||||
using CodeArray::db;
|
||||
|
@ -3096,30 +3139,6 @@ public:
|
|||
// set default encoding to select Vex or Evex
|
||||
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
||||
|
||||
// (reg32e/mem, k) if rev else (k, k/mem/reg32e)
|
||||
// size = 8, 16, 32, 64
|
||||
void opKmov(const Opmask& k, const Operand& op, bool rev, int size)
|
||||
{
|
||||
int code = 0;
|
||||
bool isReg = op.isREG(size < 64 ? 32 : 64);
|
||||
if (rev) {
|
||||
code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0;
|
||||
} else {
|
||||
code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0;
|
||||
}
|
||||
if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
uint64_t type = 0;
|
||||
switch (size) {
|
||||
case 8: type = T_W0|T_66; break;
|
||||
case 16: type = T_W0; break;
|
||||
case 32: type = isReg ? T_W0|T_F2 : T_W1|T_66; break;
|
||||
case 64: type = isReg ? T_W1|T_F2 : T_W1; break;
|
||||
}
|
||||
const Operand *p1 = &k, *p2 = &op;
|
||||
if (code == 0x93) { std::swap(p1, p2); }
|
||||
if (opROO(Reg(), *p2, *p1, T_MAP1|type, code)) return;
|
||||
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|T_0F|type, code);
|
||||
}
|
||||
/*
|
||||
use single byte nop if useMultiByteNop = false
|
||||
*/
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "7.00"; }
|
||||
const char *getVersionString() const { return "7.01"; }
|
||||
void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC); }
|
||||
void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38 | T_66, 0x0FC); }
|
||||
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
|
||||
|
@ -1926,6 +1926,16 @@ void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r
|
|||
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); }
|
||||
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); }
|
||||
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); }
|
||||
void aesdec128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDD); }
|
||||
void aesdec256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDF); }
|
||||
void aesdecwide128kl(const Address& addr) { opAESKL(&xmm1, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesdecwide256kl(const Address& addr) { opAESKL(&xmm3, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesenc128kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDC); }
|
||||
void aesenc256kl(const Xmm& x, const Address& addr) { opAESKL(&x, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xDE); }
|
||||
void aesencwide128kl(const Address& addr) { opAESKL(&xmm0, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void aesencwide256kl(const Address& addr) { opAESKL(&xmm2, addr, T_F3|T_0F38, T_F3|T_MUST_EVEX, 0xD8); }
|
||||
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
|
||||
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
|
||||
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
|
||||
void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
|
||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }
|
||||
|
|
|
@ -473,6 +473,12 @@ public:
|
|||
XBYAK_DEFINE_TYPE(81, tAVX_VNNI_INT16);
|
||||
XBYAK_DEFINE_TYPE(82, tAPX_F);
|
||||
XBYAK_DEFINE_TYPE(83, tAVX10);
|
||||
XBYAK_DEFINE_TYPE(84, tAESKLE);
|
||||
XBYAK_DEFINE_TYPE(85, tWIDE_KL);
|
||||
XBYAK_DEFINE_TYPE(86, tKEYLOCKER);
|
||||
XBYAK_DEFINE_TYPE(87, tKEYLOCKER_WIDE);
|
||||
XBYAK_DEFINE_TYPE(88, tSSE4a);
|
||||
XBYAK_DEFINE_TYPE(89, tCLWB);
|
||||
|
||||
#undef XBYAK_SPLIT_ID
|
||||
#undef XBYAK_DEFINE_TYPE
|
||||
|
@ -519,13 +525,14 @@ public:
|
|||
if (maxExtendedNum >= 0x80000001) {
|
||||
getCpuid(0x80000001, data);
|
||||
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (ECX & (1U << 5)) type_ |= tLZCNT;
|
||||
if (ECX & (1U << 6)) type_ |= tSSE4a;
|
||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
}
|
||||
|
||||
if (maxExtendedNum >= 0x80000008) {
|
||||
|
@ -544,8 +551,8 @@ public:
|
|||
if (ECX & (1U << 25)) type_ |= tAESNI;
|
||||
if (ECX & (1U << 26)) type_ |= tXSAVE;
|
||||
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
if (ECX & (1U << 29)) type_ |= tF16C;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 23)) type_ |= tMMX;
|
||||
|
@ -556,8 +563,8 @@ public:
|
|||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64_t bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
if (ECX & (1U << 12)) type_ |= tFMA;
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
|
||||
#if !defined(__APPLE__)
|
||||
if (((bv >> 5) & 7) == 7)
|
||||
|
@ -591,21 +598,23 @@ public:
|
|||
const uint32_t maxNumSubLeaves = EAX;
|
||||
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
|
||||
if (EBX & (1U << 3)) type_ |= tBMI1;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 8)) type_ |= tBMI2;
|
||||
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||
if (EBX & (1U << 19)) type_ |= tADX;
|
||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 24)) type_ |= tCLWB;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (ECX & (1U << 5)) type_ |= tWAITPKG;
|
||||
if (ECX & (1U << 8)) type_ |= tGFNI;
|
||||
if (ECX & (1U << 9)) type_ |= tVAES;
|
||||
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
|
||||
if (ECX & (1U << 23)) type_ |= tKEYLOCKER;
|
||||
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
|
||||
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
|
||||
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
|
||||
|
@ -635,7 +644,13 @@ public:
|
|||
if (EDX & (1U << 21)) type_ |= tAPX_F;
|
||||
}
|
||||
}
|
||||
if (has(tAVX10) && maxNum >= 24) {
|
||||
if (maxNum >= 0x19) {
|
||||
getCpuidEx(0x19, 0, data);
|
||||
if (EBX & (1U << 0)) type_ |= tAESKLE;
|
||||
if (EBX & (1U << 2)) type_ |= tWIDE_KL;
|
||||
if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE;
|
||||
}
|
||||
if (has(tAVX10) && maxNum >= 0x24) {
|
||||
getCpuidEx(0x24, 0, data);
|
||||
avx10version_ = EBX & mask(7);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue