mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
Merge branch 'dev'
This commit is contained in:
commit
0924ff4aa0
22 changed files with 68 additions and 22 deletions
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 6.70)
|
||||
project(xbyak LANGUAGES CXX VERSION 6.71)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# History
|
||||
|
||||
* 2023/Jul/27 ver 6.71 Allocator supports huge page
|
||||
* 2023/Jul/05 ver 6.70 add alias of vclmulqdq, correct alias of pclmulqdq
|
||||
* 2023/Jun/27 ver 6.69.2 add constexpr to `TypeT operator|` (thanks to Wunkolo)
|
||||
* 2023/Mar/23 ver 6.69.1 add detection of xsave (thanks to Wunkolo)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '6.70',
|
||||
version: '6.71',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Xbyak 6.70 [![Badge Build]][Build Status]
|
||||
# Xbyak 6.71 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
|
@ -26,6 +26,10 @@ It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl
|
|||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### Derived Projects
|
||||
- [Xbyak_aarch64](https://github.com/fujitsu/xbyak_aarch64/) : for AArch64
|
||||
- [Xbyak_riscv](https://github.com/herumi/xbyak_riscv) : for RISC-V
|
||||
|
||||
### News
|
||||
|
||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.70
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.71
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -402,6 +402,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
|
||||
2023/07/05 ver 6.70 vpclmulqdqのailas追加
|
||||
2023/06/27 ver 6.69.2 `TypeT operator|`にconstexpr追加(thanks to Wunkolo)
|
||||
2023/03/23 ver 6.69.1 xsave判定追加(thanks to Wunkolo)
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr)
|
||||
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
|
||||
|
|
2
sample/cpuid/gnr.txt
Normal file
2
sample/cpuid/gnr.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize amx_fp16 prefetchiti
|
2
sample/cpuid/grr.txt
Normal file
2
sample/cpuid/grr.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd
|
2
sample/cpuid/mtl.txt
Normal file
2
sample/cpuid/mtl.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
|
2
sample/cpuid/rpl.txt
Normal file
2
sample/cpuid/rpl.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize
|
2
sample/cpuid/srf.txt
Normal file
2
sample/cpuid/srf.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd
|
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr)
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
|
||||
|
|
|
@ -13,6 +13,9 @@ ifeq ($(UNAME_S),Darwin)
|
|||
# 32-bit binary is not supported
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(ONLY_64BIT),0)
|
||||
TARGET += jmp address
|
||||
endif
|
||||
|
@ -25,7 +28,7 @@ all: $(TARGET)
|
|||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
|
||||
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||
|
|
|
@ -1050,6 +1050,10 @@ class Test {
|
|||
"nle",
|
||||
"g",
|
||||
};
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wformat-truncation" // wrong detection
|
||||
#endif
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
|
||||
|
@ -1059,6 +1063,9 @@ class Test {
|
|||
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
|
||||
put(buf, REG8|REG8_3|MEM);
|
||||
}
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
void putReg1() const
|
||||
{
|
||||
|
|
|
@ -23,7 +23,7 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame > x.lst
|
||||
diff ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -48,4 +48,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
|
@ -35,4 +35,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
|
@ -61,4 +61,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
|
@ -155,7 +155,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x6700 /* 0xABCD = A.BC(.D) */
|
||||
VERSION = 0x6710 /* 0xABCD = A.BC(.D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -371,7 +371,29 @@ inline const To CastTo(From p) XBYAK_NOEXCEPT
|
|||
}
|
||||
namespace inner {
|
||||
|
||||
static const size_t ALIGN_PAGE_SIZE = 4096;
|
||||
#ifdef _WIN32
|
||||
struct SystemInfo {
|
||||
SYSTEM_INFO info;
|
||||
SystemInfo()
|
||||
{
|
||||
GetSystemInfo(&info);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
//static const size_t ALIGN_PAGE_SIZE = 4096;
|
||||
inline size_t getPageSize()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
static const SystemInfo si;
|
||||
return si.info.dwPageSize;
|
||||
#elif defined(__GNUC__)
|
||||
static const long pageSize = sysconf(_SC_PAGESIZE);
|
||||
if (pageSize > 0) {
|
||||
return (size_t)pageSize;
|
||||
}
|
||||
#endif
|
||||
return 4096;
|
||||
}
|
||||
|
||||
inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
|
||||
inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
|
||||
|
@ -397,7 +419,7 @@ enum LabelMode {
|
|||
*/
|
||||
struct Allocator {
|
||||
explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator
|
||||
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
|
||||
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::getPageSize())); }
|
||||
virtual void free(uint8_t *p) { AlignedFree(p); }
|
||||
virtual ~Allocator() {}
|
||||
/* override to return false if you call protect() manually */
|
||||
|
@ -445,7 +467,7 @@ public:
|
|||
explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
|
||||
uint8_t *alloc(size_t size)
|
||||
{
|
||||
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
|
||||
const size_t alignedSizeM1 = inner::getPageSize() - 1;
|
||||
size = (size + alignedSizeM1) & ~alignedSizeM1;
|
||||
#if defined(MAP_ANONYMOUS)
|
||||
int mode = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
|
@ -1213,9 +1235,6 @@ public:
|
|||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
#ifndef NDEBUG
|
||||
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
|
||||
#endif
|
||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||
#else
|
||||
return true;
|
||||
|
@ -2884,7 +2903,7 @@ public:
|
|||
{
|
||||
if (x == 1) return;
|
||||
if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
|
||||
if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
|
||||
if (isAutoGrow()) XBYAK_THROW(ERR_BAD_ALIGN)
|
||||
size_t remain = size_t(getCurr()) % x;
|
||||
if (remain) {
|
||||
nop(x - remain, useMultiByteNop);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "6.70"; }
|
||||
const char *getVersionString() const { return "6.71"; }
|
||||
void aadd(const Address& addr, const Reg32e ®) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void aand(const Address& addr, const Reg32e ®) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
|
|
|
@ -527,13 +527,13 @@ public:
|
|||
|
||||
getCpuid(1, data);
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
if (ECX & (1U << 19)) type_ |= tSSE41;
|
||||
if (ECX & (1U << 20)) type_ |= tSSE42;
|
||||
if (ECX & (1U << 22)) type_ |= tMOVBE;
|
||||
if (ECX & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (ECX & (1U << 25)) type_ |= tAESNI;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 26)) type_ |= tXSAVE;
|
||||
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
|
|
Loading…
Reference in a new issue