mirror of
https://github.com/herumi/xbyak
synced 2024-11-20 16:06:14 -07:00
This commit is contained in:
commit
97b66116ff
15 changed files with 222 additions and 63 deletions
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required(VERSION 3.5)
|
cmake_minimum_required(VERSION 3.5)
|
||||||
|
|
||||||
project(xbyak LANGUAGES CXX VERSION 7.20)
|
project(xbyak LANGUAGES CXX VERSION 7.20.1)
|
||||||
|
|
||||||
file(GLOB headers xbyak/*.h)
|
file(GLOB headers xbyak/*.h)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# History
|
# History
|
||||||
|
|
||||||
|
* 2024/Oct/17 ver 7.20.1 Updated to comply with AVX10.2 specification rev 2.0
|
||||||
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
|
* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10.
|
||||||
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
|
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
|
||||||
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
|
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
|
||||||
|
|
30
doc/usage.md
30
doc/usage.md
|
@ -110,6 +110,15 @@ vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64],
|
||||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Remark
|
||||||
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
|
- `k0` is dealt as no mask.
|
||||||
|
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||||
|
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||||
|
* `k4 | k3` is different from `k3 | k4`.
|
||||||
|
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||||
|
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||||
|
|
||||||
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2.
|
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2.
|
||||||
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
|
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
|
||||||
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
|
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
|
||||||
|
@ -145,20 +154,17 @@ feature|AVX512-VNNI|AVX-VNNI
|
||||||
-|-|-
|
-|-|-
|
||||||
feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2
|
feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2
|
||||||
|
|
||||||
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw
|
- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds and vmovd, vmovw with MEM-to-MEM.
|
||||||
|
|
||||||
- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.
|
|
||||||
At first, I attempted to use EvexEncoding (resp. VexEncoding) instead of AVX10v2Encoding (resp. EvexEncoding) for `setDefaultEncodingAVX10`.
|
|
||||||
But I abandoned this idea when I found that `vmovd` and `vmovw` had different EVEX encodings in AVX512 and AVX10.2
|
|
||||||
|
|
||||||
### Remark
|
### Remark
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
|
||||||
- `k0` is dealt as no mask.
|
1. `vmovd` and `vmovw` instructions with REG-to-XMM or XMM-to-REG operands are always encoded using AVX10.1.
|
||||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
When used with XMM-to-XMM operands, these instructions are always encoded using AVX10.2.
|
||||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
|
||||||
* `k4 | k3` is different from `k3 | k4`.
|
2. `vmovd` and `vmovw` instructions with XMM-to-MEM or MEM-to-XMM operands support multiple encoding formats, including AVX, AVX512F, AVX512-FP16, and AVX10.2.
|
||||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
|
||||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
Initially, I tried implementing `setDefaultEncodingAVX10` using `EvexEncoding` (resp. `VexEncoding`) instead of `AVX10v2Encoding` (resp. `EvexEncoding`).
|
||||||
|
However, I abandoned this approach after discovering the complexity of the encoding requirements of `vmovd` and `vmovw`.
|
||||||
|
|
||||||
## APX
|
## APX
|
||||||
[Advanced Performance Extensions (APX) Architecture Specification](https://www.intel.com/content/www/us/en/content-details/786223/intel-advanced-performance-extensions-intel-apx-architecture-specification.html)
|
[Advanced Performance Extensions (APX) Architecture Specification](https://www.intel.com/content/www/us/en/content-details/786223/intel-advanced-performance-extensions-intel-apx-architecture-specification.html)
|
||||||
|
|
|
@ -202,13 +202,13 @@ void putX_XM()
|
||||||
{ 0x2F, "vcomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
{ 0x2F, "vcomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||||
{ 0x2E, "vucomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
{ 0x2E, "vucomish", T_MUST_EVEX | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||||
|
|
||||||
{ 0x2F, "vcomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
{ 0x2F, "vcomxsd", T_MUST_EVEX | T_F2 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
||||||
{ 0x2F, "vcomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
{ 0x2F, "vcomxsh", T_MUST_EVEX | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||||
{ 0x2F, "vcomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
{ 0x2F, "vcomxss", T_MUST_EVEX | T_F3 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
||||||
|
|
||||||
{ 0x2E, "vucomxsd", T_MUST_EVEX | T_F3 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
{ 0x2E, "vucomxsd", T_MUST_EVEX | T_F2 | T_0F | T_EW1 | T_SAE_X | T_N8 },
|
||||||
{ 0x2E, "vucomxsh", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
{ 0x2E, "vucomxsh", T_MUST_EVEX | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_N2 },
|
||||||
{ 0x2E, "vucomxss", T_MUST_EVEX | T_F2 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
{ 0x2E, "vucomxss", T_MUST_EVEX | T_F3 | T_0F | T_EW0 | T_SAE_X | T_N4 },
|
||||||
|
|
||||||
// 13.1
|
// 13.1
|
||||||
{ 0x69, "vcvtnebf162ibs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 },
|
{ 0x69, "vcvtnebf162ibs", T_MUST_EVEX | T_YMM | T_F2 | T_MAP5 | T_EW0 | T_B16 },
|
||||||
|
@ -893,7 +893,7 @@ void putX_XM_IMM()
|
||||||
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
||||||
|
|
||||||
{ 0x2F, "vcomsbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_N2, false },
|
{ 0x2F, "vcomsbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_N2, false },
|
||||||
{ 0x42, "vgetexppbf16", T_MUST_EVEX | T_66 | T_MAP5 | T_EW0 | T_YMM | T_B16, false },
|
{ 0x42, "vgetexppbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||||
{ 0x26, "vgetmantpbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
{ 0x26, "vgetmantpbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
||||||
{ 0x4C, "vrcppbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
{ 0x4C, "vrcppbf16", T_MUST_EVEX | T_MAP6 | T_EW0 | T_YMM | T_B16, false },
|
||||||
{ 0x56, "vreducenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
{ 0x56, "vreducenepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
project(
|
project(
|
||||||
'xbyak',
|
'xbyak',
|
||||||
'cpp',
|
'cpp',
|
||||||
version: '7.20',
|
version: '7.20.1',
|
||||||
license: 'BSD-3-Clause',
|
license: 'BSD-3-Clause',
|
||||||
default_options: 'b_ndebug=if-release'
|
default_options: 'b_ndebug=if-release'
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
# Xbyak 7.20 [![Badge Build]][Build Status]
|
# Xbyak 7.20.1 [![Badge Build]][Build Status]
|
||||||
|
|
||||||
*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
|
*A JIT assembler for x86/x64 architectures supporting advanced instruction sets up to AVX10.2*
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.20
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.20.1
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -404,6 +404,7 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2024/10/17 ver 7.20.1 AVX10.2 rev 2.0仕様書の変更に追従
|
||||||
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
|
2024/10/15 ver 7.20 setDefaultEncoding/setDefaultEncodingAVX10の仕様確定
|
||||||
2024/10/15 ver 7.11 AVX10.2完全サポート
|
2024/10/15 ver 7.11 AVX10.2完全サポート
|
||||||
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
|
2024/10/13 ver 7.10 AVX10 integer and fp16 vnni, mediaの新命令対応. setDefaultEncodingの拡張.
|
||||||
|
|
|
@ -60,7 +60,8 @@ apx: apx.cpp $(XBYAK_INC)
|
||||||
avx10_test: avx10_test.cpp $(XBYAK_INC)
|
avx10_test: avx10_test.cpp $(XBYAK_INC)
|
||||||
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
|
$(CXX) $(CFLAGS) avx10_test.cpp -o $@ -DXBYAK64
|
||||||
|
|
||||||
TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
|
#TEST_FILES=old.txt new-ymm.txt bf16.txt comp.txt misc.txt convert.txt minmax.txt saturation.txt
|
||||||
|
TEST_FILES=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
|
||||||
xed_test:
|
xed_test:
|
||||||
@set -e; \
|
@set -e; \
|
||||||
for target in $(addprefix avx10/, $(TEST_FILES)); do \
|
for target in $(addprefix avx10/, $(TEST_FILES)); do \
|
||||||
|
|
|
@ -113,17 +113,17 @@ vfpclasspbf16(k7|k5, zword_b[rax+128], 13);
|
||||||
vcomsbf16(xm2, xm3);
|
vcomsbf16(xm2, xm3);
|
||||||
vcomsbf16(xm2, ptr[rax+128]);
|
vcomsbf16(xm2, ptr[rax+128]);
|
||||||
|
|
||||||
vgetexppbf16(xm1|k3, xmm2);
|
//vgetexppbf16(xm1|k3, xmm2);
|
||||||
vgetexppbf16(xm1|k3, ptr[rax+128]);
|
//vgetexppbf16(xm1|k3, ptr[rax+128]);
|
||||||
vgetexppbf16(xm1|k3, ptr_b[rax+128]);
|
//vgetexppbf16(xm1|k3, ptr_b[rax+128]);
|
||||||
|
|
||||||
vgetexppbf16(ym1|k3, ymm2);
|
//vgetexppbf16(ym1|k3, ymm2);
|
||||||
vgetexppbf16(ym1|k3, ptr[rax+128]);
|
//vgetexppbf16(ym1|k3, ptr[rax+128]);
|
||||||
vgetexppbf16(ym1|k3, ptr_b[rax+128]);
|
//vgetexppbf16(ym1|k3, ptr_b[rax+128]);
|
||||||
|
|
||||||
vgetexppbf16(zm1|k3, zmm2);
|
//vgetexppbf16(zm1|k3, zmm2);
|
||||||
vgetexppbf16(zm1|k3, ptr[rax+128]);
|
//vgetexppbf16(zm1|k3, ptr[rax+128]);
|
||||||
vgetexppbf16(zm1|k3, ptr_b[rax+128]);
|
//vgetexppbf16(zm1|k3, ptr_b[rax+128]);
|
||||||
|
|
||||||
vgetmantpbf16(xm1|k3, xmm2, 3);
|
vgetmantpbf16(xm1|k3, xmm2, 3);
|
||||||
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
|
vgetmantpbf16(xm1|k3, ptr[rax+128], 5);
|
||||||
|
|
|
@ -2284,4 +2284,100 @@ CYBOZU_TEST_AUTO(avx_vnni_int)
|
||||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(vmovd)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
setDefaultEncodingAVX10(PreAVX10v2Encoding);
|
||||||
|
vmovd(eax, xm1); // always AVX10.1
|
||||||
|
vmovd(xm1, eax); // always AVX10.1
|
||||||
|
vmovd(xm3, xm1); // always AVX10.2
|
||||||
|
// AVX-512 (AVX10.1)
|
||||||
|
vmovd(ptr[rax+128], xm1);
|
||||||
|
vmovd(xm1, ptr[rax+128]);
|
||||||
|
vmovd(ptr[rax+128], xm30);
|
||||||
|
vmovd(xm30, ptr[rax+128]);
|
||||||
|
|
||||||
|
setDefaultEncodingAVX10(AVX10v2Encoding);
|
||||||
|
vmovd(eax, xm1); // always AVX10.1
|
||||||
|
vmovd(xm1, eax); // always AVX10.1
|
||||||
|
vmovd(xm3, xm1); // always AVX10.2
|
||||||
|
// AVX10.2
|
||||||
|
vmovd(ptr[rax+128], xm1);
|
||||||
|
vmovd(xm1, ptr[rax+128]);
|
||||||
|
vmovd(ptr[rax+128], xm30);
|
||||||
|
vmovd(xm30, ptr[rax+128]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
|
||||||
|
0xc5, 0xf9, 0x6e, 0xc8, // avx10.1
|
||||||
|
0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
|
||||||
|
0xc5, 0xf9, 0x7e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
|
||||||
|
0xc5, 0xf9, 0x6e, 0x88, 0x80, 0x00, 0x00, 0x00, // avx
|
||||||
|
0x62, 0x61, 0x7d, 0x08, 0x7e, 0x70, 0x20, // avx10.1
|
||||||
|
0x62, 0x61, 0x7d, 0x08, 0x6e, 0x70, 0x20, // avx10.1
|
||||||
|
|
||||||
|
0xc5, 0xf9, 0x7e, 0xc8, // avx10.1
|
||||||
|
0xc5, 0xf9, 0x6e, 0xc8, // avx10.1
|
||||||
|
0x62, 0xf1, 0x7e, 0x08, 0x7e, 0xd9, // avx10.2
|
||||||
|
0x62, 0xf1, 0x7d, 0x08, 0xd6, 0x48, 0x20, // avx10.2
|
||||||
|
0x62, 0xf1, 0x7e, 0x08, 0x7e, 0x48, 0x20, // avx10.2
|
||||||
|
0x62, 0x61, 0x7d, 0x08, 0xd6, 0x70, 0x20, // avx10.2
|
||||||
|
0x62, 0x61, 0x7e, 0x08, 0x7e, 0x70, 0x20, // avx10.2
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(vmovw)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
setDefaultEncodingAVX10(PreAVX10v2Encoding);
|
||||||
|
vmovw(eax, xm1); // always avx10.1
|
||||||
|
vmovw(xm1, eax); // always avx10.1
|
||||||
|
vmovw(xm3, xm1); // always avx10.2
|
||||||
|
// AVX10.1
|
||||||
|
vmovw(ptr[rax+128], xm1);
|
||||||
|
vmovw(xm1, ptr[rax+128]);
|
||||||
|
vmovw(ptr[rax+128], xm30);
|
||||||
|
vmovw(xm30, ptr[rax+128]);
|
||||||
|
|
||||||
|
setDefaultEncodingAVX10(AVX10v2Encoding);
|
||||||
|
vmovw(eax, xm1); // always avx10.1
|
||||||
|
vmovw(xm1, eax); // always avx10.1
|
||||||
|
vmovw(xm3, xm1); // always avx10.2
|
||||||
|
// AVX10.2
|
||||||
|
vmovw(ptr[rax+128], xm1);
|
||||||
|
vmovw(xm1, ptr[rax+128]);
|
||||||
|
vmovw(ptr[rax+128], xm30);
|
||||||
|
vmovw(xm30, ptr[rax+128]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8,
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8,
|
||||||
|
0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9,
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x48, 0x40,
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x48, 0x40,
|
||||||
|
0x62, 0x65, 0x7d, 0x08, 0x7e, 0x70, 0x40,
|
||||||
|
0x62, 0x65, 0x7d, 0x08, 0x6e, 0x70, 0x40,
|
||||||
|
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8,
|
||||||
|
0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8,
|
||||||
|
0x62, 0xf5, 0x7e, 0x08, 0x6e, 0xd9,
|
||||||
|
0x62, 0xf5, 0x7e, 0x08, 0x7e, 0x48, 0x40,
|
||||||
|
0x62, 0xf5, 0x7e, 0x08, 0x6e, 0x48, 0x40,
|
||||||
|
0x62, 0x65, 0x7e, 0x08, 0x7e, 0x70, 0x40,
|
||||||
|
0x62, 0x65, 0x7e, 0x08, 0x6e, 0x70, 0x40,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
6
test/test_by_xed.bat
Normal file
6
test/test_by_xed.bat
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
@echo off
|
||||||
|
set CFLAGS=-I ../ /EHsc /nologo
|
||||||
|
copy %1% tmp.cpp
|
||||||
|
cl %CFLAGS% test_by_xed.cpp && test_by_xed.exe
|
||||||
|
%XED% -64 -ir bin > out.txt
|
||||||
|
python3 test_by_xed.py %1% out.txt
|
|
@ -76,7 +76,7 @@ def newReg(s):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
class Memory:
|
class Memory:
|
||||||
def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=False):
|
def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=0):
|
||||||
self.size = size
|
self.size = size
|
||||||
self.base = newReg(base)
|
self.base = newReg(base)
|
||||||
self.index = newReg(index)
|
self.index = newReg(index)
|
||||||
|
@ -85,8 +85,12 @@ class Memory:
|
||||||
self.broadcast = broadcast
|
self.broadcast = broadcast
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))]
|
if self.size == 0:
|
||||||
if self.broadcast:
|
s = 'ptr'
|
||||||
|
else:
|
||||||
|
idx = self.size * max(self.broadcast, 1)
|
||||||
|
s = g_sizeTbl[int(math.log2(idx))]
|
||||||
|
if self.broadcast > 0:
|
||||||
s += '_b'
|
s += '_b'
|
||||||
s += ' ['
|
s += ' ['
|
||||||
needPlus = False
|
needPlus = False
|
||||||
|
@ -107,23 +111,36 @@ class Memory:
|
||||||
s += ']'
|
s += ']'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
# Xbyak uses 'ptr' when it can be automatically detected, so we should consider this in the comparison.
|
||||||
def __eq__(self, rhs):
|
def __eq__(self, rhs):
|
||||||
# xbyak uses ptr if it is automatically detected, so xword == ptr is true
|
if self.broadcast > rhs.broadcast:
|
||||||
if self.broadcast != rhs.broadcast: return False
|
return rhs == self
|
||||||
# if not self.broadcast and 0 < self.size <= 8 and 0 < rhs.size <= 8 and self.size != rhs.size: return False
|
assert(self.broadcast <= rhs.broadcast)
|
||||||
if not self.broadcast and self.size > 0 and rhs.size > 0 and self.size != rhs.size: return False
|
if self.broadcast == 0:
|
||||||
|
if rhs.broadcast > 0: return False
|
||||||
|
# Xbyak uses 'ptr' when it is automatically detected.
|
||||||
|
# Therefore, the comparison is true if 'ptr' (i.e., size = 0) is used.
|
||||||
|
if 0 < self.size and 0 < rhs.size and self.size != rhs.size: return False
|
||||||
|
if self.broadcast == 1: # _b
|
||||||
|
if rhs.broadcast == 1: # compare ptr_b with ptr_b
|
||||||
|
if self.size != rhs.size:
|
||||||
|
return False
|
||||||
|
if self.size > 0 and (self.size != rhs.size * rhs.broadcast): # compare ptr_b with {1toX}
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
if self.broadcast != rhs.broadcast: return False
|
||||||
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
|
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def parseBroadcast(s):
|
def parseBroadcast(s):
|
||||||
if '_b' in s:
|
if '_b' in s:
|
||||||
return (s.replace('_b', ''), True)
|
return (s.replace('_b', ''), 1)
|
||||||
r = re.search(r'({1to\d+})', s)
|
r = re.search(r'({1to(\d+)})', s)
|
||||||
if not r:
|
if not r:
|
||||||
return (s, False)
|
return (s, 0)
|
||||||
return (s.replace(r.group(1), ''), True)
|
return (s.replace(r.group(1), ''), int(r.group(2)))
|
||||||
|
|
||||||
def parseMemory(s, broadcast=False):
|
def parseMemory(s, broadcast=0):
|
||||||
org_s = s
|
org_s = s
|
||||||
|
|
||||||
s = s.replace(' ', '').lower()
|
s = s.replace(' ', '').lower()
|
||||||
|
@ -133,7 +150,7 @@ def parseMemory(s, broadcast=False):
|
||||||
scale = 0
|
scale = 0
|
||||||
disp = 0
|
disp = 0
|
||||||
|
|
||||||
if not broadcast:
|
if broadcast == 0:
|
||||||
(s, broadcast) = parseBroadcast(s)
|
(s, broadcast) = parseBroadcast(s)
|
||||||
|
|
||||||
# Parse size
|
# Parse size
|
||||||
|
@ -157,7 +174,7 @@ def parseMemory(s, broadcast=False):
|
||||||
s = s[3:]
|
s = s[3:]
|
||||||
|
|
||||||
if s.startswith('_b'):
|
if s.startswith('_b'):
|
||||||
broadcast = True
|
broadcast = 1
|
||||||
s = s[2:]
|
s = s[2:]
|
||||||
|
|
||||||
# Extract the content inside brackets
|
# Extract the content inside brackets
|
||||||
|
@ -335,7 +352,7 @@ def parseMemoryTest():
|
||||||
('[]', Memory()),
|
('[]', Memory()),
|
||||||
('[rax]', Memory(0, rax)),
|
('[rax]', Memory(0, rax)),
|
||||||
('ptr[rax]', Memory(0, rax)),
|
('ptr[rax]', Memory(0, rax)),
|
||||||
('ptr_b[rax]', Memory(0, rax, broadcast=True)),
|
('ptr_b[rax]', Memory(0, rax, broadcast=1)),
|
||||||
('dword[rbx]', Memory(4, rbx)),
|
('dword[rbx]', Memory(4, rbx)),
|
||||||
('xword ptr[rcx]', Memory(16, rcx)),
|
('xword ptr[rcx]', Memory(16, rcx)),
|
||||||
('xmmword ptr[rcx]', Memory(16, rcx)),
|
('xmmword ptr[rcx]', Memory(16, rcx)),
|
||||||
|
@ -344,11 +361,36 @@ def parseMemoryTest():
|
||||||
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
|
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
|
||||||
('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
|
('yword [rax+rdx*4]', Memory(32, rax, rdx, 4)),
|
||||||
('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
|
('zword [rax+rdx*4+123]', Memory(64, rax, rdx, 4, 123)),
|
||||||
|
('xword_b [rax]', Memory(16, rax, None, 0, 0, 1)),
|
||||||
|
('dword [rax]{1to4}', Memory(16, rax, None, 0, 0, 1)),
|
||||||
|
('yword_b [rax]', Memory(32, rax, None, 0, 0, 1)),
|
||||||
|
('dword [rax]{1to8}', Memory(32, rax, None, 0, 0, 1)),
|
||||||
]
|
]
|
||||||
for (s, expected) in tbl:
|
for (s, expected) in tbl:
|
||||||
my = parseMemory(s)
|
my = parseMemory(s)
|
||||||
assertEqualStr(my, expected)
|
assertEqualStr(my, expected)
|
||||||
|
|
||||||
|
print('compare test')
|
||||||
|
tbl = [
|
||||||
|
('ptr[rax]', 'dword[rax]', True),
|
||||||
|
('byte[rax]', 'dword[rax]', False),
|
||||||
|
('yword_b[rax]', 'dword [rax]{1to8}', True),
|
||||||
|
('yword_b[rax]', 'word [rax]{1to16}', True),
|
||||||
|
('zword_b[rax]', 'word [rax]{1to32}', True),
|
||||||
|
('zword_b[rax]', 'word [rax]{1to16}', False),
|
||||||
|
('dword [rax]{1to2}', 'dword [rax] {1to4}', False),
|
||||||
|
('zword_b[rax]', 'xword_b [rax]', False),
|
||||||
|
('ptr_b[rax]', 'word [rax]{1to32}', True), # ignore size
|
||||||
|
]
|
||||||
|
for (lhs, rhs, eq) in tbl:
|
||||||
|
a = parseMemory(lhs)
|
||||||
|
b = parseMemory(rhs)
|
||||||
|
if eq:
|
||||||
|
assertEqual(a, b)
|
||||||
|
assertEqual(b, a)
|
||||||
|
else:
|
||||||
|
assert(parseMemory(lhs) != parseMemory(rhs))
|
||||||
|
|
||||||
def parseNmemonicTest():
|
def parseNmemonicTest():
|
||||||
print('parseNmemonicTest')
|
print('parseNmemonicTest')
|
||||||
tbl = [
|
tbl = [
|
||||||
|
@ -364,8 +406,8 @@ def parseNmemonicTest():
|
||||||
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
||||||
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
|
||||||
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
|
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
|
||||||
('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
|
('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 1), 5], [k3, T_z])),
|
||||||
('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
|
('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, 4), 5], [k3, T_z])),
|
||||||
('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])),
|
('vcmpph(k1, xmm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xmm15, Memory(0, rax, None, 0, 64), 1])),
|
||||||
]
|
]
|
||||||
for (s, expected) in tbl:
|
for (s, expected) in tbl:
|
||||||
|
|
5
test/test_by_xed_all.bat
Normal file
5
test/test_by_xed_all.bat
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
set TARGETS=old.txt new-ymm.txt bf16.txt misc.txt convert.txt minmax.txt saturation.txt
|
||||||
|
for %%f in (%TARGETS%) do (
|
||||||
|
echo %%f
|
||||||
|
call test_by_xed.bat avx10\%%f
|
||||||
|
)
|
|
@ -155,7 +155,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x7200 /* 0xABCD = A.BC(.D) */
|
VERSION = 0x7201 /* 0xABCD = A.BC(.D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -2809,11 +2809,12 @@ private:
|
||||||
std::swap(p1, p2);
|
std::swap(p1, p2);
|
||||||
rev = !rev;
|
rev = !rev;
|
||||||
}
|
}
|
||||||
|
enc = getEncoding(enc, 1);
|
||||||
int sel = -1;
|
int sel = -1;
|
||||||
if (getEncoding(enc, 1) == AVX10v2Encoding) {
|
if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) {
|
||||||
if ((p1->isXMM() || p1->isMEM()) && p2->isXMM()) sel = 2 + int(rev);
|
sel = 2 + int(rev);
|
||||||
} else {
|
} else if (p1->isREG(bit) || p1->isMEM()) {
|
||||||
if ((p1->isREG(bit) || p1->isMEM()) && p2->isXMM()) sel = int(rev);
|
sel = int(rev);
|
||||||
}
|
}
|
||||||
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "7.20"; }
|
const char *getVersionString() const { return "7.20.1"; }
|
||||||
void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
|
void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
|
||||||
void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
|
void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
|
||||||
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
|
||||||
|
@ -2186,9 +2186,9 @@ void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP
|
||||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
|
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
|
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
|
||||||
void vcomsbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_EW0|T_MUST_EVEX, 0x2F); }
|
void vcomsbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_EW0|T_MUST_EVEX, 0x2F); }
|
||||||
void vcomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F3|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
void vcomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||||
void vcomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
void vcomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||||
void vcomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F2|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
void vcomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
|
||||||
void vcvt2ps2phx(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x67); }
|
void vcvt2ps2phx(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x67); }
|
||||||
void vcvtbiasph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
void vcvtbiasph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||||
void vcvtbiasph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
void vcvtbiasph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
|
||||||
|
@ -2372,7 +2372,7 @@ void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0
|
||||||
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
|
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
|
||||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); }
|
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); }
|
||||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); }
|
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); }
|
||||||
void vgetexppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x42); }
|
void vgetexppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x42); }
|
||||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); }
|
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); }
|
||||||
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); }
|
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); }
|
||||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); }
|
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); }
|
||||||
|
@ -2656,9 +2656,9 @@ void vsubnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM
|
||||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||||
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
||||||
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||||
void vucomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F3|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
void vucomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||||
void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||||
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F2|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
|
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
|
||||||
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
|
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
|
||||||
|
|
Loading…
Reference in a new issue