mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
This commit is contained in:
commit
cf209c915b
4 changed files with 47 additions and 45 deletions
|
@ -1,6 +1,6 @@
|
||||||
# History
|
# History
|
||||||
|
|
||||||
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./
|
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
|
||||||
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
|
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
|
||||||
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
|
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
|
||||||
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
|
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
|
||||||
|
|
|
@ -148,6 +148,8 @@ feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2
|
||||||
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
||||||
|
|
||||||
- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.
|
- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.
|
||||||
|
At first, I attempted to use EvexEncoding (resp. VexEncoding) instead of AVX10v2Encoding (resp. EvexEncoding) for `setDefaultEncodingAVX10`.
|
||||||
|
But I abandoned this idea when I found that `vmovd` and `vmovw` had different EVEX encodings in AVX512 and AVX10.2
|
||||||
|
|
||||||
### Remark
|
### Remark
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
|
|
40
readme.md
40
readme.md
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
# Xbyak 7.20 [![Badge Build]][Build Status]
|
# Xbyak 7.20 [![Badge Build]][Build Status]
|
||||||
|
|
||||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
|
||||||
|
|
||||||
## Menu
|
## Menu
|
||||||
|
|
||||||
|
@ -11,15 +11,15 @@
|
||||||
|
|
||||||
## Abstract
|
## Abstract
|
||||||
|
|
||||||
Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
Xbyak is a C++ header-only library that enables dynamic assembly of x86/x64 instructions using mnemonics.
|
||||||
|
|
||||||
The pronunciation of Xbyak is `kəi-bja-k`.
|
The pronunciation of Xbyak is `/kʌɪbjæk/` (kai-byak).
|
||||||
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
|
||||||
|
|
||||||
## Feature
|
The name is derived from the Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate) (kaibyaku), which means "the beginning of the world" or "creation".
|
||||||
|
|
||||||
- header file only
|
## Features
|
||||||
- Intel/MASM like syntax
|
- Header-only library
|
||||||
|
- Intel/MASM-like syntax
|
||||||
- Full support for AVX-512, APX, and AVX10.2
|
- Full support for AVX-512, APX, and AVX10.2
|
||||||
|
|
||||||
**Note**:
|
**Note**:
|
||||||
|
@ -32,22 +32,22 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||||
|
|
||||||
### News
|
### News
|
||||||
|
|
||||||
- support AVX10.2
|
- Support AVX10.2
|
||||||
- support xresldtrk/xsusldtrk
|
- Support xresldtrk/xsusldtrk
|
||||||
- support RAO-INT for APX
|
- Support RAO-INT for APX
|
||||||
- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
|
- Support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
|
||||||
- support APX except for a few instructions
|
- Support APX except for a few instructions
|
||||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
- Add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||||
- add movdiri, movdir64b, clwb, cldemote
|
- Add movdiri, movdir64b, clwb, cldemote
|
||||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||||
- strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error.
|
- Strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error.
|
||||||
- define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd.
|
- Define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd.
|
||||||
- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
|
- Add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
|
||||||
- vnni instructions such as vpdpbusd supports vex encoding.
|
- VNNI instructions such as vpdpbusd supports vex encoding.
|
||||||
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
- (Break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
||||||
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
||||||
- support exception-less mode see. [Exception-less mode](#exception-less-mode)
|
- Support exception-less mode see. [Exception-less mode](#exception-less-mode)
|
||||||
- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined.
|
- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined.
|
||||||
|
|
||||||
### Supported OS
|
### Supported OS
|
||||||
|
|
|
@ -2794,6 +2794,30 @@ private:
|
||||||
}
|
}
|
||||||
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
|
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
|
||||||
}
|
}
|
||||||
|
// AVX10 zero-extending for vmovd, vmovw
|
||||||
|
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
|
||||||
|
{
|
||||||
|
const Operand *p1 = &op1;
|
||||||
|
const Operand *p2 = &op2;
|
||||||
|
bool rev = false;
|
||||||
|
if (p1->isMEM()) {
|
||||||
|
std::swap(p1, p2);
|
||||||
|
rev = true;
|
||||||
|
}
|
||||||
|
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
|
if (p1->isXMM()) {
|
||||||
|
std::swap(p1, p2);
|
||||||
|
rev = !rev;
|
||||||
|
}
|
||||||
|
int sel = -1;
|
||||||
|
if (getEncoding(enc, 1) == AVX10v2Encoding) {
|
||||||
|
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
|
||||||
|
} else {
|
||||||
|
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
|
||||||
|
}
|
||||||
|
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
|
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
unsigned int getVersion() const { return VERSION; }
|
unsigned int getVersion() const { return VERSION; }
|
||||||
using CodeArray::db;
|
using CodeArray::db;
|
||||||
|
@ -3203,30 +3227,6 @@ public:
|
||||||
}
|
}
|
||||||
db(0xC8 + (idx & 7));
|
db(0xC8 + (idx & 7));
|
||||||
}
|
}
|
||||||
// AVX10 zero-extending for vmovd, vmovw
|
|
||||||
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
|
|
||||||
{
|
|
||||||
const Operand *p1 = &op1;
|
|
||||||
const Operand *p2 = &op2;
|
|
||||||
bool rev = false;
|
|
||||||
if (p1->isMEM()) {
|
|
||||||
std::swap(p1, p2);
|
|
||||||
rev = true;
|
|
||||||
}
|
|
||||||
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
||||||
if (p1->isXMM()) {
|
|
||||||
std::swap(p1, p2);
|
|
||||||
rev = !rev;
|
|
||||||
}
|
|
||||||
int sel = -1;
|
|
||||||
if (getEncoding(enc, 1) == AVX10v2Encoding) {
|
|
||||||
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
|
|
||||||
} else {
|
|
||||||
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
|
|
||||||
}
|
|
||||||
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
||||||
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
|
||||||
}
|
|
||||||
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
|
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
|
||||||
{
|
{
|
||||||
const uint64_t typeTbl[] = {
|
const uint64_t typeTbl[] = {
|
||||||
|
|
Loading…
Reference in a new issue