Merge branch 'dev'

This commit is contained in:
MITSUNARI Shigeo 2023-07-27 14:01:03 +09:00
commit 0924ff4aa0
22 changed files with 68 additions and 22 deletions

View file

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 2.6...3.0.2)
project(xbyak LANGUAGES CXX VERSION 6.70)
project(xbyak LANGUAGES CXX VERSION 6.71)
file(GLOB headers xbyak/*.h)

View file

@ -1,5 +1,6 @@
# History
* 2023/Jul/27 ver 6.71 Allocator supports huge page
* 2023/Jul/05 ver 6.70 add alias of vclmulqdq, correct alias of pclmulqdq
* 2023/Jun/27 ver 6.69.2 add constexpr to `TypeT operator|` (thanks to Wunkolo)
* 2023/Mar/23 ver 6.69.1 add detection of xsave (thanks to Wunkolo)

View file

@ -5,7 +5,7 @@
project(
'xbyak',
'cpp',
version: '6.70',
version: '6.71',
license: 'BSD-3-Clause',
default_options: 'b_ndebug=if-release'
)

View file

@ -1,5 +1,5 @@
# Xbyak 6.70 [![Badge Build]][Build Status]
# Xbyak 6.71 [![Badge Build]][Build Status]
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
@ -26,6 +26,10 @@ It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
### Derived Projects
- [Xbyak_aarch64](https://github.com/fujitsu/xbyak_aarch64/) : for AArch64
- [Xbyak_riscv](https://github.com/herumi/xbyak_riscv) : for RISC-V
### News
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.70
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.71
-----------------------------------------------------------------------------
◎概要
@ -402,6 +402,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
-----------------------------------------------------------------------------
◎履歴
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
2023/07/05 ver 6.70 vpclmulqdqのailas追加
2023/06/27 ver 6.69.2 `TypeT operator|`にconstexpr追加(thanks to Wunkolo)
2023/03/23 ver 6.69.1 xsave判定追加(thanks to Wunkolo)

View file

@ -2,7 +2,8 @@
make -C ../ test_util64
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr)
for cpu in ${cpus[@]} ; do
echo $cpu
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt

2
sample/cpuid/gnr.txt Normal file
View file

@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b uintr serialize amx_fp16 prefetchiti

2
sample/cpuid/grr.txt Normal file
View file

@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd

2
sample/cpuid/mtl.txt Normal file
View file

@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize

2
sample/cpuid/rpl.txt Normal file
View file

@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize

2
sample/cpuid/srf.txt Normal file
View file

@ -0,0 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote movdiri movdir64b serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd

View file

@ -1,2 +1,2 @@
vendor intel
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote

View file

@ -2,7 +2,7 @@
make -C ../ test_util64
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr)
for cpu in ${cpus[@]} ; do
echo $cpu
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt

View file

@ -13,6 +13,9 @@ ifeq ($(UNAME_S),Darwin)
# 32-bit binary is not supported
ONLY_64BIT=1
endif
ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
ONLY_64BIT=1
endif
ifeq ($(ONLY_64BIT),0)
TARGET += jmp address
endif
@ -25,7 +28,7 @@ all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h

View file

@ -1050,6 +1050,10 @@ class Test {
"nle",
"g",
};
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat-truncation" // wrong detection
#endif
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
char buf[32];
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
@ -1059,6 +1063,9 @@ class Test {
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
put(buf, REG8|REG8_3|MEM);
}
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
void putReg1() const
{

View file

@ -23,7 +23,7 @@ echo "xbyak"
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame > x.lst
diff ok.lst x.lst && echo "ok"
diff -bB ok.lst x.lst && echo "ok"
}

View file

@ -48,4 +48,4 @@ echo "xbyak"
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"
diff -bB ok.lst x.lst && echo "ok"

View file

@ -35,4 +35,4 @@ echo "xbyak"
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"
diff -bB ok.lst x.lst && echo "ok"

View file

@ -61,4 +61,4 @@ echo "xbyak"
echo "compile nm_frame.cpp"
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff -B ok.lst x.lst && echo "ok"
diff -bB ok.lst x.lst && echo "ok"

View file

@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x6700 /* 0xABCD = A.BC(.D) */
VERSION = 0x6710 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@ -371,7 +371,29 @@ inline const To CastTo(From p) XBYAK_NOEXCEPT
}
namespace inner {
static const size_t ALIGN_PAGE_SIZE = 4096;
#ifdef _WIN32
struct SystemInfo {
SYSTEM_INFO info;
SystemInfo()
{
GetSystemInfo(&info);
}
};
#endif
//static const size_t ALIGN_PAGE_SIZE = 4096;
inline size_t getPageSize()
{
#ifdef _WIN32
static const SystemInfo si;
return si.info.dwPageSize;
#elif defined(__GNUC__)
static const long pageSize = sysconf(_SC_PAGESIZE);
if (pageSize > 0) {
return (size_t)pageSize;
}
#endif
return 4096;
}
inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; }
inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; }
@ -397,7 +419,7 @@ enum LabelMode {
*/
struct Allocator {
explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::getPageSize())); }
virtual void free(uint8_t *p) { AlignedFree(p); }
virtual ~Allocator() {}
/* override to return false if you call protect() manually */
@ -445,7 +467,7 @@ public:
explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
uint8_t *alloc(size_t size)
{
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
const size_t alignedSizeM1 = inner::getPageSize() - 1;
size = (size + alignedSizeM1) & ~alignedSizeM1;
#if defined(MAP_ANONYMOUS)
int mode = MAP_PRIVATE | MAP_ANONYMOUS;
@ -1213,9 +1235,6 @@ public:
size_t pageSize = sysconf(_SC_PAGESIZE);
size_t iaddr = reinterpret_cast<size_t>(addr);
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
#ifndef NDEBUG
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
#endif
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
#else
return true;
@ -2884,7 +2903,7 @@ public:
{
if (x == 1) return;
if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN)
if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x);
if (isAutoGrow()) XBYAK_THROW(ERR_BAD_ALIGN)
size_t remain = size_t(getCurr()) % x;
if (remain) {
nop(x - remain, useMultiByteNop);

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "6.70"; }
const char *getVersionString() const { return "6.71"; }
void aadd(const Address& addr, const Reg32e &reg) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
void aand(const Address& addr, const Reg32e &reg) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }

View file

@ -527,13 +527,13 @@ public:
getCpuid(1, data);
if (ECX & (1U << 0)) type_ |= tSSE3;
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
if (ECX & (1U << 9)) type_ |= tSSSE3;
if (ECX & (1U << 19)) type_ |= tSSE41;
if (ECX & (1U << 20)) type_ |= tSSE42;
if (ECX & (1U << 22)) type_ |= tMOVBE;
if (ECX & (1U << 23)) type_ |= tPOPCNT;
if (ECX & (1U << 25)) type_ |= tAESNI;
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
if (ECX & (1U << 26)) type_ |= tXSAVE;
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
if (ECX & (1U << 30)) type_ |= tRDRAND;