[doc] update doc

This commit is contained in:
MITSUNARI Shigeo 2024-10-15 09:54:47 +09:00
parent ae76be35ac
commit c3a5c4ba3d
3 changed files with 59 additions and 46 deletions

View file

@ -1,5 +1,6 @@
# History # History
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2 * 2024/Oct/15 ver 7.11 Added full support for AVX10.2
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended. * 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw * 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw

View file

@ -1,7 +1,7 @@
# Usage # Usage
Inherit `Xbyak::CodeGenerator` class and make the class method. Inherit `Xbyak::CodeGenerator` class and make the class method.
``` ```cpp
#include <xbyak/xbyak.h> #include <xbyak/xbyak.h>
struct Code : Xbyak::CodeGenerator { struct Code : Xbyak::CodeGenerator {
@ -13,7 +13,7 @@ struct Code : Xbyak::CodeGenerator {
}; };
``` ```
Or you can pass the instance of CodeGenerator without inheriting. Or you can pass the instance of CodeGenerator without inheriting.
``` ```cpp
void genCode(Xbyak::CodeGenerator& code, int x) { void genCode(Xbyak::CodeGenerator& code, int x) {
using namespace Xbyak::util; using namespace Xbyak::util;
code.mov(eax, x); code.mov(eax, x);
@ -23,7 +23,7 @@ void genCode(Xbyak::CodeGenerator& code, int x) {
Make an instance of the class and get the function Make an instance of the class and get the function
pointer by calling `getCode()` and call it. pointer by calling `getCode()` and call it.
``` ```cpp
Code c(5); Code c(5);
int (*f)() = c.getCode<int (*)()>(); int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5 printf("ret=%d\n", f()); // ret = 5
@ -32,7 +32,7 @@ printf("ret=%d\n", f()); // ret = 5
## Syntax ## Syntax
Similar to MASM/NASM syntax with parentheses. Similar to MASM/NASM syntax with parentheses.
``` ```cpp
NASM Xbyak NASM Xbyak
mov eax, ebx --> mov(eax, ebx); mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx); inc ecx inc(ecx);
@ -43,7 +43,7 @@ ret --> ret();
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory, Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
otherwise use `ptr`. otherwise use `ptr`.
``` ```cpp
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement] (ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only [rip + 32bit disp] ; x64 only
@ -53,19 +53,21 @@ mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4); test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax] --> inc(qword [rax]); inc qword [rax] --> inc(qword [rax]);
``` ```
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type. **Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
### How to use Selector (Segment Register) ### How to use Selector (Segment Register)
``` ```cpp
mov eax, [fs:eax] --> putSeg(fs); mov eax, [fs:eax] --> putSeg(fs);
mov(eax, ptr [eax]); mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs); mov ax, cs --> mov(ax, cs);
``` ```
**Note**: Segment class is not derived from `Operand`. **Note**: Segment class is not derived from `Operand`.
## AVX ## AVX
``` ```cpp
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
@ -74,13 +76,13 @@ vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
**Note**: **Note**:
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility. If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
But the newer version will not support it. But the newer version will not support it.
``` ```cpp
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
``` ```
## AVX-512 ## AVX-512
``` ```cpp
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]); vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]); vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
@ -108,37 +110,44 @@ vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64],
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
``` ```
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc. ## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2.
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2. Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding. The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first), The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first),
and can be specified using setDefaultEncoding. and can be specified using setDefaultEncoding.
``` ```cpp
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI) vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI) vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
setDefaultEncoding(VexEncoding); // default encoding is VEX setDefaultEncoding(VexEncoding); // change default encoding
vpdpbusd(xm0, xm1, xm2); // VEX vpdpbusd(xm0, xm1, xm2); // VEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI) vmpsadbw(xm1, xm3, xm15, 3); // default encoding: AVX
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above vmpsadbw(xm1, xm3, xm15, 3, PreAVX10v2Encoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2) vmpsadbw(xm1, xm3, xm15, 3, AVX10v2Encoding); // AVX10.2
setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument. setDefaultEncodingAVX10(AVX10v2Encoding); // change default encoding
vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2) vmpsadbw(xm1, xm3, xm15, 3); // AVX10.2
``` ```
- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)` - `setDefaultEncoding(PreferredEncoding enc = EvexEncoding)`
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param. - Configure encoding for AVX512-VNNI or AVX-VNNI instructions.
- `setDefaultEncodingAVX10(PreferredEncoding enc = PreAVXv2Encoding)`
- Configure encoding for pre-AVX10.2 and AVX10.2 instructions.
param|vnniEnc|avx10Enc `setDefaultEncoding`|EvexEncoding (default)|VexEncoding
-|-|- -|-|-
VexEncoding|AVX-VNNI|- feature|AVX512-VNNI|AVX-VNNI
EvexEncoding|AVX512-VNNI|-
PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16 - Target functions: vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds
AVX10v2Encoding|-|AVX10.2
default|EvexEncoding|PreAVXv2Encoding `setDefaultEncodingAVX10`|PreAVX10v2Encoding (default)|AVX10v2Encoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw -|-|-
feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.
### Remark ### Remark
* `k1`, ..., `k7` are opmask registers. * `k1`, ..., `k7` are opmask registers.
@ -181,7 +190,7 @@ mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds,
Two kinds of Label are supported. (String literal and Label class). Two kinds of Label are supported. (String literal and Label class).
### String literal ### String literal
``` ```cpp
L("L1"); L("L1");
jmp("L1"); jmp("L1");
@ -203,7 +212,7 @@ L("L3");
### Support `@@`, `@f`, `@b` like MASM ### Support `@@`, `@f`, `@b` like MASM
``` ```cpp
L("@@"); // <A> L("@@"); // <A>
jmp("@b"); // jmp to <A> jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B> jmp("@f"); // jmp to <B>
@ -219,7 +228,7 @@ Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabe
are treated as a local label. are treated as a local label.
`inLocalLabel()` and `outLocalLabel()` can be nested. `inLocalLabel()` and `outLocalLabel()` can be nested.
``` ```cpp
void func1() void func1()
{ {
inLocalLabel(); inLocalLabel();
@ -242,7 +251,7 @@ void func1()
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
``` ```cpp
jmp("short-jmp"); // short jmp jmp("short-jmp"); // short jmp
// small code // small code
L("short-jmp"); L("short-jmp");
@ -251,14 +260,16 @@ jmp("long-jmp");
// long code // long code
L("long-jmp"); // throw exception L("long-jmp"); // throw exception
``` ```
Then specify T_NEAR for jmp. Then specify T_NEAR for jmp.
``` ```cpp
jmp("long-jmp", T_NEAR); // long jmp jmp("long-jmp", T_NEAR); // long jmp
// long code // long code
L("long-jmp"); L("long-jmp");
``` ```
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
``` ```cpp
jmp("long-jmp"); // long jmp jmp("long-jmp"); // long jmp
// long code // long code
L("long-jmp"); L("long-jmp");
@ -268,7 +279,7 @@ L("long-jmp");
`L()` and `jxx()` support Label class. `L()` and `jxx()` support Label class.
``` ```cpp
Xbyak::Label label1, label2; Xbyak::Label label1, label2;
L(label1); L(label1);
... ...
@ -280,7 +291,7 @@ L(label2);
``` ```
Use `putL` for jmp table Use `putL` for jmp table
``` ```cpp
Label labelTbl, L0, L1, L2; Label labelTbl, L0, L1, L2;
mov(rax, labelTbl); mov(rax, labelTbl);
// rdx is an index of jump table // rdx is an index of jump table
@ -297,7 +308,7 @@ L(L1);
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel. `assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
``` ```cpp
Label label2; Label label2;
Label label1 = L(); // make label1 ; same to Label label1; L(label1); Label label1 = L(); // make label1 ; same to Label label1; L(label1);
... ...
@ -312,7 +323,7 @@ The `jmp` in the above code jumps to label1 assigned by `assignL`.
* dstLabel must not be used in `L()`. * dstLabel must not be used in `L()`.
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified. `Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
``` ```cpp
// not AutoGrow mode // not AutoGrow mode
Label label; Label label;
assert(label.getAddress() == 0); assert(label.getAddress() == 0);
@ -321,7 +332,7 @@ assert(label.getAddress() == getCurr());
``` ```
### Rip ; relative addressing ### Rip ; relative addressing
``` ```cpp
Label label; Label label;
mov(eax, ptr [rip + label]); // eax = 4 mov(eax, ptr [rip + label]); // eax = 4
... ...
@ -329,7 +340,7 @@ mov(eax, ptr [rip + label]); // eax = 4
L(label); L(label);
dd(4); dd(4);
``` ```
``` ```cpp
int x; int x;
... ...
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
@ -340,13 +351,13 @@ int x;
Use `word|dword|qword` instead of `ptr` to specify the address size. Use `word|dword|qword` instead of `ptr` to specify the address size.
### 32 bit mode ### 32 bit mode
``` ```cpp
jmp(word[eax], T_FAR); // jmp m16:16(FF /5) jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5) jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
``` ```
### 64 bit mode ### 64 bit mode
``` ```cpp
jmp(word[rax], T_FAR); // jmp m16:16(FF /5) jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5) jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5) jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
@ -357,7 +368,7 @@ The same applies to `call`.
The default max code size is 4096 bytes. The default max code size is 4096 bytes.
Specify the size in constructor of `CodeGenerator()` if necessary. Specify the size in constructor of `CodeGenerator()` if necessary.
``` ```cpp
class Quantize : public Xbyak::CodeGenerator { class Quantize : public Xbyak::CodeGenerator {
public: public:
Quantize() Quantize()
@ -374,7 +385,7 @@ You can make jit code on prepared memory.
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory. Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
``` ```cpp
uint8_t alignas(4096) buf[8192]; // C++11 or later uint8_t alignas(4096) buf[8192]; // C++11 or later
struct Code : Xbyak::CodeGenerator { struct Code : Xbyak::CodeGenerator {
@ -400,7 +411,7 @@ int main()
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`. The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address. Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
``` ```cpp
struct Code : Xbyak::CodeGenerator { struct Code : Xbyak::CodeGenerator {
Code() Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow) : Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
@ -421,7 +432,7 @@ Xbyak set Read/Write/Exec mode to memory to run jit code.
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
call `setProtectModeRE()` after generating jit code. call `setProtectModeRE()` after generating jit code.
``` ```cpp
struct Code : Xbyak::CodeGenerator { struct Code : Xbyak::CodeGenerator {
Code() Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)

View file

@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
◎履歴 ◎履歴
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
2024/10/15 ver 7.11 AVX10.2完全サポート 2024/10/15 ver 7.11 AVX10.2完全サポート
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張. 2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正 2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正