mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
[doc] update doc
This commit is contained in:
parent
ae76be35ac
commit
c3a5c4ba3d
3 changed files with 59 additions and 46 deletions
|
@ -1,5 +1,6 @@
|
|||
# History
|
||||
|
||||
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./
|
||||
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
|
||||
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
|
||||
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
|
||||
|
|
103
doc/usage.md
103
doc/usage.md
|
@ -1,7 +1,7 @@
|
|||
# Usage
|
||||
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
```cpp
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
|
@ -13,7 +13,7 @@ struct Code : Xbyak::CodeGenerator {
|
|||
};
|
||||
```
|
||||
Or you can pass the instance of CodeGenerator without inheriting.
|
||||
```
|
||||
```cpp
|
||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||
using namespace Xbyak::util;
|
||||
code.mov(eax, x);
|
||||
|
@ -23,7 +23,7 @@ void genCode(Xbyak::CodeGenerator& code, int x) {
|
|||
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
```cpp
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
|
@ -32,7 +32,7 @@ printf("ret=%d\n", f()); // ret = 5
|
|||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
```cpp
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
|
@ -43,7 +43,7 @@ ret --> ret();
|
|||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
```
|
||||
```cpp
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
|
@ -53,19 +53,21 @@ mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
|||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
```cpp
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
## AVX
|
||||
|
||||
```
|
||||
```cpp
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
|
@ -74,13 +76,13 @@ vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
|||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
```cpp
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
```cpp
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
|
@ -108,37 +110,44 @@ vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64],
|
|||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
```
|
||||
|
||||
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc.
|
||||
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2.
|
||||
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
|
||||
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
|
||||
The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first),
|
||||
and can be specified using setDefaultEncoding.
|
||||
|
||||
```
|
||||
```cpp
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
|
||||
vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI)
|
||||
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
|
||||
setDefaultEncoding(VexEncoding); // change default encoding
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX
|
||||
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
|
||||
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
|
||||
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
|
||||
setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument.
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2)
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: AVX
|
||||
vmpsadbw(xm1, xm3, xm15, 3, PreAVX10v2Encoding); // same as the above
|
||||
vmpsadbw(xm1, xm3, xm15, 3, AVX10v2Encoding); // AVX10.2
|
||||
setDefaultEncodingAVX10(AVX10v2Encoding); // change default encoding
|
||||
vmpsadbw(xm1, xm3, xm15, 3); // AVX10.2
|
||||
```
|
||||
|
||||
- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)`
|
||||
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.
|
||||
- `setDefaultEncoding(PreferredEncoding enc = EvexEncoding)`
|
||||
- Configure encoding for AVX512-VNNI or AVX-VNNI instructions.
|
||||
- `setDefaultEncodingAVX10(PreferredEncoding enc = PreAVXv2Encoding)`
|
||||
- Configure encoding for pre-AVX10.2 and AVX10.2 instructions.
|
||||
|
||||
param|vnniEnc|avx10Enc
|
||||
`setDefaultEncoding`|EvexEncoding (default)|VexEncoding
|
||||
-|-|-
|
||||
VexEncoding|AVX-VNNI|-
|
||||
EvexEncoding|AVX512-VNNI|-
|
||||
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16
|
||||
AVX10v2Encoding|-|AVX10.2
|
||||
default|EvexEncoding|PreAVXv2Encoding
|
||||
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
||||
feature|AVX512-VNNI|AVX-VNNI
|
||||
|
||||
- Target functions: vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds
|
||||
|
||||
`setDefaultEncodingAVX10`|PreAVX10v2Encoding (default)|AVX10v2Encoding
|
||||
-|-|-
|
||||
feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2
|
||||
|
||||
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
||||
|
||||
- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.
|
||||
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
|
@ -181,7 +190,7 @@ mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds,
|
|||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
```cpp
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
|
@ -203,7 +212,7 @@ L("L3");
|
|||
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
```cpp
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
|
@ -219,7 +228,7 @@ Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabe
|
|||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
```cpp
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
|
@ -242,7 +251,7 @@ void func1()
|
|||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||
|
||||
```
|
||||
```cpp
|
||||
jmp("short-jmp"); // short jmp
|
||||
// small code
|
||||
L("short-jmp");
|
||||
|
@ -251,14 +260,16 @@ jmp("long-jmp");
|
|||
// long code
|
||||
L("long-jmp"); // throw exception
|
||||
```
|
||||
|
||||
Then specify T_NEAR for jmp.
|
||||
```
|
||||
```cpp
|
||||
jmp("long-jmp", T_NEAR); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
|
||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||
```
|
||||
```cpp
|
||||
jmp("long-jmp"); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
|
@ -268,7 +279,7 @@ L("long-jmp");
|
|||
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
```
|
||||
```cpp
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
|
@ -280,7 +291,7 @@ L(label2);
|
|||
```
|
||||
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
```cpp
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
|
@ -297,7 +308,7 @@ L(L1);
|
|||
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
```cpp
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
|
@ -312,7 +323,7 @@ The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
|||
* dstLabel must not be used in `L()`.
|
||||
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
```cpp
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress() == 0);
|
||||
|
@ -321,7 +332,7 @@ assert(label.getAddress() == getCurr());
|
|||
```
|
||||
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
```cpp
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
...
|
||||
|
@ -329,7 +340,7 @@ mov(eax, ptr [rip + label]); // eax = 4
|
|||
L(label);
|
||||
dd(4);
|
||||
```
|
||||
```
|
||||
```cpp
|
||||
int x;
|
||||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
|
@ -340,13 +351,13 @@ int x;
|
|||
Use `word|dword|qword` instead of `ptr` to specify the address size.
|
||||
|
||||
### 32 bit mode
|
||||
```
|
||||
```cpp
|
||||
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||
```
|
||||
|
||||
### 64 bit mode
|
||||
```
|
||||
```cpp
|
||||
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||
|
@ -357,7 +368,7 @@ The same applies to `call`.
|
|||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
```cpp
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
|
@ -374,7 +385,7 @@ You can make jit code on prepared memory.
|
|||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||
|
||||
```
|
||||
```cpp
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
|
@ -400,7 +411,7 @@ int main()
|
|||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
```cpp
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
|
@ -421,7 +432,7 @@ Xbyak set Read/Write/Exec mode to memory to run jit code.
|
|||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
```cpp
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
|
|
|
@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
|
||||
2024/10/15 ver 7.11 AVX10.2完全サポート
|
||||
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
|
||||
2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正
|
||||
|
|
Loading…
Reference in a new issue