diff --git a/doc/changelog.md b/doc/changelog.md index 1d39ae6..1461f6e 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -1,5 +1,6 @@ # History +* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./ * 2024/Oct/15 ver 7.11 Added full support for AVX10.2 * 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended. * 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw diff --git a/doc/usage.md b/doc/usage.md index 9015bff..dcb3e10 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -1,7 +1,7 @@ # Usage Inherit `Xbyak::CodeGenerator` class and make the class method. -``` +```cpp #include struct Code : Xbyak::CodeGenerator { @@ -13,7 +13,7 @@ struct Code : Xbyak::CodeGenerator { }; ``` Or you can pass the instance of CodeGenerator without inheriting. -``` +```cpp void genCode(Xbyak::CodeGenerator& code, int x) { using namespace Xbyak::util; code.mov(eax, x); @@ -23,7 +23,7 @@ void genCode(Xbyak::CodeGenerator& code, int x) { Make an instance of the class and get the function pointer by calling `getCode()` and call it. -``` +```cpp Code c(5); int (*f)() = c.getCode(); printf("ret=%d\n", f()); // ret = 5 @@ -32,7 +32,7 @@ printf("ret=%d\n", f()); // ret = 5 ## Syntax Similar to MASM/NASM syntax with parentheses. -``` +```cpp NASM Xbyak mov eax, ebx --> mov(eax, ebx); inc ecx inc(ecx); @@ -43,7 +43,7 @@ ret --> ret(); Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory, otherwise use `ptr`. -``` +```cpp (ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement] [rip + 32bit disp] ; x64 only @@ -53,19 +53,21 @@ mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]); test byte [esp], 4 --> test(byte [esp], 4); inc qword [rax] --> inc(qword [rax]); ``` + **Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type. ### How to use Selector (Segment Register) -``` +```cpp mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]); mov ax, cs --> mov(ax, cs); ``` + **Note**: Segment class is not derived from `Operand`. ## AVX -``` +```cpp vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); @@ -74,13 +76,13 @@ vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); **Note**: If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility. But the newer version will not support it. -``` +```cpp vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 ``` ## AVX-512 -``` +```cpp vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]); vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]); @@ -108,37 +110,44 @@ vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit ``` -## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc. +## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2. Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2. The functions for these mnemonics include an optional parameter as the last argument to specify the encoding. The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first), and can be specified using setDefaultEncoding. -``` +```cpp vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI) -vpdpbusd(xm0, xm1, xm2, AVX10v2Encoding); // same as the above -vpdpbusd(xm0, xm1, xm2, PreAVXv2Encoding); // VEX (AVX-VNNI) -setDefaultEncoding(VexEncoding); // default encoding is VEX +vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above +vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI) +setDefaultEncoding(VexEncoding); // change default encoding vpdpbusd(xm0, xm1, xm2); // VEX -vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI) -vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above -vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2) -setDefaultEncoding(VexEncoding, AVX10v2Encoding); // use 2nd argument. -vmpsadbw(xm1, xm3, xm15, 3); // EVEX (AVX10.2) +vmpsadbw(xm1, xm3, xm15, 3); // default encoding: AVX +vmpsadbw(xm1, xm3, xm15, 3, PreAVX10v2Encoding); // same as the above +vmpsadbw(xm1, xm3, xm15, 3, AVX10v2Encoding); // AVX10.2 +setDefaultEncodingAVX10(AVX10v2Encoding); // change default encoding +vmpsadbw(xm1, xm3, xm15, 3); // AVX10.2 ``` -- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = PreAVXv2Encoding)` -Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param. +- `setDefaultEncoding(PreferredEncoding enc = EvexEncoding)` + - Configure encoding for AVX512-VNNI or AVX-VNNI instructions. +- `setDefaultEncodingAVX10(PreferredEncoding enc = PreAVXv2Encoding)` + - Configure encoding for pre-AVX10.2 and AVX10.2 instructions. -param|vnniEnc|avx10Enc +`setDefaultEncoding`|EvexEncoding (default)|VexEncoding -|-|- -VexEncoding|AVX-VNNI|- -EvexEncoding|AVX512-VNNI|- -PreAVX10v2Encoding|-|AVX-VNNI-INT8, AVX512-FP16 -AVX10v2Encoding|-|AVX10.2 -default|EvexEncoding|PreAVXv2Encoding -mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw +feature|AVX512-VNNI|AVX-VNNI + +- Target functions: vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds + +`setDefaultEncodingAVX10`|PreAVX10v2Encoding (default)|AVX10v2Encoding +-|-|- +feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2 + +- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw + +- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2. ### Remark * `k1`, ..., `k7` are opmask registers. @@ -181,7 +190,7 @@ mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, Two kinds of Label are supported. (String literal and Label class). ### String literal -``` +```cpp L("L1"); jmp("L1"); @@ -203,7 +212,7 @@ L("L3"); ### Support `@@`, `@f`, `@b` like MASM -``` +```cpp L("@@"); // jmp("@b"); // jmp to jmp("@f"); // jmp to @@ -219,7 +228,7 @@ Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabe are treated as a local label. `inLocalLabel()` and `outLocalLabel()` can be nested. -``` +```cpp void func1() { inLocalLabel(); @@ -242,7 +251,7 @@ void func1() Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. -``` +```cpp jmp("short-jmp"); // short jmp // small code L("short-jmp"); @@ -251,14 +260,16 @@ jmp("long-jmp"); // long code L("long-jmp"); // throw exception ``` + Then specify T_NEAR for jmp. -``` +```cpp jmp("long-jmp", T_NEAR); // long jmp // long code L("long-jmp"); ``` + Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. -``` +```cpp jmp("long-jmp"); // long jmp // long code L("long-jmp"); @@ -268,7 +279,7 @@ L("long-jmp"); `L()` and `jxx()` support Label class. -``` +```cpp Xbyak::Label label1, label2; L(label1); ... @@ -280,7 +291,7 @@ L(label2); ``` Use `putL` for jmp table -``` +```cpp Label labelTbl, L0, L1, L2; mov(rax, labelTbl); // rdx is an index of jump table @@ -297,7 +308,7 @@ L(L1); `assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel. -``` +```cpp Label label2; Label label1 = L(); // make label1 ; same to Label label1; L(label1); ... @@ -312,7 +323,7 @@ The `jmp` in the above code jumps to label1 assigned by `assignL`. * dstLabel must not be used in `L()`. `Label::getAddress()` returns the address specified by the label instance and 0 if not specified. -``` +```cpp // not AutoGrow mode Label label; assert(label.getAddress() == 0); @@ -321,7 +332,7 @@ assert(label.getAddress() == getCurr()); ``` ### Rip ; relative addressing -``` +```cpp Label label; mov(eax, ptr [rip + label]); // eax = 4 ... @@ -329,7 +340,7 @@ mov(eax, ptr [rip + label]); // eax = 4 L(label); dd(4); ``` -``` +```cpp int x; ... mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB @@ -340,13 +351,13 @@ int x; Use `word|dword|qword` instead of `ptr` to specify the address size. ### 32 bit mode -``` +```cpp jmp(word[eax], T_FAR); // jmp m16:16(FF /5) jmp(dword[eax], T_FAR); // jmp m16:32(FF /5) ``` ### 64 bit mode -``` +```cpp jmp(word[rax], T_FAR); // jmp m16:16(FF /5) jmp(dword[rax], T_FAR); // jmp m16:32(FF /5) jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5) @@ -357,7 +368,7 @@ The same applies to `call`. The default max code size is 4096 bytes. Specify the size in constructor of `CodeGenerator()` if necessary. -``` +```cpp class Quantize : public Xbyak::CodeGenerator { public: Quantize() @@ -374,7 +385,7 @@ You can make jit code on prepared memory. Call `setProtectModeRE` yourself to change memory mode if using the prepared memory. -``` +```cpp uint8_t alignas(4096) buf[8192]; // C++11 or later struct Code : Xbyak::CodeGenerator { @@ -400,7 +411,7 @@ int main() The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`. Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address. -``` +```cpp struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(, Xbyak::AutoGrow) @@ -421,7 +432,7 @@ Xbyak set Read/Write/Exec mode to memory to run jit code. If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and call `setProtectModeRE()` after generating jit code. -``` +```cpp struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) diff --git a/readme.txt b/readme.txt index 2fb242b..44a7937 100644 --- a/readme.txt +++ b/readme.txt @@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定 2024/10/15 ver 7.11 AVX10.2完全サポート 2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張. 2024/10/10 ver 7.09.1 vpcompressbとvpcompresswの名前修正