start to test by xed

This commit is contained in:
MITSUNARI Shigeo 2024-10-11 09:55:14 +09:00
parent b597cc450b
commit 64d5779bb1
4 changed files with 801 additions and 47 deletions

657
test/target/misc.txt Normal file
View file

@ -0,0 +1,657 @@
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
vpcompressb(ptr[rax + 64], xmm1);
vpcompressb(xmm30 | k5, xmm1);
vpcompressb(ptr[rax + 64], ymm1);
vpcompressb(ymm30 | k3 |T_z, ymm1);
vpcompressb(ptr[rax + 64], zmm1);
vpcompressb(zmm30 | k2 |T_z, zmm1);
vpcompressw(ptr[rax + 64], xmm1);
vpcompressw(xmm30 | k5, xmm1);
vpcompressw(ptr[rax + 64], ymm1);
vpcompressw(ymm30 | k3 |T_z, ymm1);
vpcompressw(ptr[rax + 64], zmm1);
vpcompressw(zmm30 | k2 |T_z, zmm1);
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpexpandb(xmm5|k3|T_z, xmm30);
vpexpandb(ymm5|k3|T_z, ymm30);
vpexpandb(zmm5|k3|T_z, zmm30);
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(xmm5|k3|T_z, xmm30);
vpexpandw(ymm5|k3|T_z, ymm30);
vpexpandw(zmm5|k3|T_z, zmm30);
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
gf2p8affineinvqb(xmm1, xmm2, 3);
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
gf2p8affineqb(xmm1, xmm2, 3);
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
gf2p8mulb(xmm1, xmm2);
gf2p8mulb(xmm1, ptr [rax + 0x40]);
vgf2p8mulb(xmm1, xmm5, xmm2);
vgf2p8mulb(ymm1, ymm5, ymm2);
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(xmm30, xmm31, xmm4);
vgf2p8mulb(ymm30, ymm31, ymm4);
vgf2p8mulb(zmm30, zmm31, zmm4);
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]);
vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]);
vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]);
vcvtneps2bf16(xmm0, xword [rax + 64]);
vcvtneps2bf16(xmm0 | k1, yword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, zword [rax + 64]);
vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]);
vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]);
vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]);
vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]);
ldtilecfg(ptr[rax + rcx * 4 + 64]);
sttilecfg(ptr[rsp + rax * 8 + 128]);
tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]);
tileloaddt1(tmm4, ptr[r8 + r9 + 32]);
tilerelease();
tilestored(ptr[r10 + r11 * 2 + 32], tmm2);
tilezero(tmm7);
tdpbssd(tmm1, tmm2, tmm3);
tdpbsud(tmm2, tmm3, tmm4);
tdpbusd(tmm3, tmm4, tmm5);
tdpbuud(tmm4, tmm5, tmm6);
tdpbf16ps(tmm5, tmm6, tmm7);
tileloadd(tmm1, ptr[r8+r8]);
tileloadd(tmm1, ptr[rax+rcx*4]);
tileloadd(tmm1, ptr[r8+r9*1+0x40]);
vaddph(zmm0, zmm1, ptr[rax+64]);
vaddph(ymm0, ymm1, ptr[rax+64]);
vaddph(xmm0, xmm1, ptr[rax+64]);
vaddph(zmm0, zmm1, ptr_b[rax+64]);
vaddph(ymm0, ymm1, ptr_b[rax+64]);
vaddph(xmm0, xmm1, ptr_b[rax+64]);
vaddsh(xmm0, xmm15, ptr[rax+64]);
vaddsh(xmm0|k5|T_z|T_rd_sae, xmm15, xmm3);
vcmpph(k1, xm15, ptr[rax+64], 1);
vcmpph(k2, ym15, ptr[rax+64], 2);
vcmpph(k3, zm15, ptr[rax+64], 3);
vcmpph(k1, xm15, ptr_b[rax+64], 1);
vcmpph(k2, ym15, ptr_b[rax+64], 2);
vcmpph(k3, zm15, ptr_b[rax+64], 3);
vcmpsh(k1, xm15, ptr[rax+64], 1);
vcmpsh(k3|k5, xmm1, xmm25|T_sae, 4);
vcomish(xmm1, ptr[rax+64]);
vcomish(xmm1|T_sae, xmm15);
vucomish(xmm1, ptr [rax+0x40]);
vucomish(xmm1|T_sae, xmm15);
vfmaddsub213ph(xmm1, xmm2, ptr [rax+0x40]);
vfmaddsub213ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmaddsub213ph(xmm1|k3, xmm2, xmm5);
vfmaddsub213ph(ymm1, ymm2, ptr [rax+0x40]);
vfmaddsub213ph(ymm1, ymm2, ptr_b[rax+0x40]);
vfmaddsub213ph(ymm1|k3, ymm2, ymm5);
vfmaddsub213ph(zmm1, zmm2, ptr [rax+0x40]);
vfmaddsub213ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmaddsub213ph(zmm1|T_ru_sae, zmm2, zmm5);
vfmsubadd132ph(xmm1, xmm2, ptr [rax+0x40]);
vfmsubadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmsubadd132ph(ymm1, ymm2, ptr [rax+0x40]);
vfmsubadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmsubadd132ph(zmm1, zmm2, ptr [rax+0x40]);
vfmsubadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmsubadd132ph(zmm1|T_ru_sae, zmm2, zmm5);
vfmadd132ph(xmm1, xmm2, ptr [rax+0x40]);
vfmadd132ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmadd132ph(ymm1, ymm2, ptr [rax+0x40]);
vfmadd132ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmadd132ph(zmm1, zmm2, ptr [rax+0x40]);
vfmadd132ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmadd132ph(zmm1|T_rd_sae, zmm2, zmm5);
vfmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
vfmsub231ph(xmm1, xmm2, ptr_b [rax+0x40]);
vfmsub231ph(ymm1, ymm2, ptr [rax+0x40]);
vfmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfmsub231ph(zmm1, zmm2, ptr [rax+0x40]);
vfmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
vfnmsub231ph(xmm1, xmm2, ptr [rax+0x40]);
vfnmsub231ph(ymm1, ymm2, ptr_b [rax+0x40]);
vfnmsub231ph(zmm1, zmm2, ptr_b [rax+0x40]);
vfnmsub231ph(zmm1|T_rd_sae, zmm2, zmm5);
vfmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
vfnmadd132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfnmadd132sh(xmm1, xmm2, ptr [rax+0x40]);
vfmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
vfnmsub132sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vfnmsub132sh(xmm1, xmm2, ptr [rax+0x40]);
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr [rax+0x40]);
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr [rax+0x40]);
vfcmaddcph(zmm1|k1, zmm2, ptr [rax+0x40]);
vfcmaddcph(zmm1|k1|T_rd_sae, zmm2, zmm5);
vfcmaddcph(xmm1|k1|T_z, xmm2, ptr_b [rax+0x40]);
vfcmaddcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfcmaddcph(zmm1|k1|T_z, zmm2, ptr_b [rax+0x40]);
vfmaddcph(xm1, xm2, ptr[rax+0x40]);
vfmaddcph(ym1|k1|T_z, ym2, ptr_b[rax+0x40]);
vfmaddcph(zm1, zm2, ptr_b[rax+0x40]);
vfcmulcph(xmm1, xmm2, ptr [rax+0x40]);
vfcmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfcmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
vfmulcph(xmm1, xmm2, ptr [rax+0x40]);
vfmulcph(ymm1|k1|T_z, ymm2, ptr_b [rax+0x40]);
vfmulcph(zmm1, zmm2, ptr_b [rax+0x40]);
vrcpph(xmm1, ptr [rax+0x40]);
vrcpph(xmm1, ptr_b [rax+0x40]);
vrcpph(ymm1, ptr [rax+0x40]);
vrcpph(ymm1, ptr_b [rax+0x40]);
vrcpph(zmm1, ptr [rax+0x40]);
vrcpph(zmm1, ptr_b [rax+0x40]);
vrcpsh(xmm1, xmm3, ptr [rax+0x40]);
vrsqrtph(xmm1, ptr [rax+0x40]);
vrsqrtph(xmm1, ptr_b [rax+0x40]);
vrsqrtph(ymm2, ptr [rax+0x40]);
vrsqrtph(ymm2, ptr_b [rax+0x40]);
vrsqrtph(zmm2, ptr [rax+0x40]);
vrsqrtph(zmm2, ptr_b [rax+0x40]);
vrsqrtsh(xmm1|k5|T_z, xmm7, ptr [rax+0x40]);
vsqrtph(xmm1|k4|T_z, ptr [rax+0x40]);
vsqrtph(xmm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtph(ymm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtph(zmm1|k4|T_z, ptr [rax+0x40]);
vsqrtph(zmm1|k4|T_z, ptr_b [rax+0x40]);
vsqrtsh(xmm1|k4|T_z, xmm5, ptr [rax+0x40]);
vsqrtsh(xmm1|k4|T_z|T_rd_sae, xmm5, xmm7);
vscalefph(xmm1, xmm5, ptr [rax+0x40]);
vscalefph(xmm1, xmm5, ptr_b [rax+0x40]);
vscalefph(ymm1, ymm5, ptr [rax+0x40]);
vscalefph(ymm1, ymm5, ptr_b [rax+0x40]);
vscalefph(zmm1, zmm5, ptr [rax+0x40]);
vscalefph(zmm1, zmm5, ptr_b [rax+0x40]);
vscalefph(zmm1|k1|T_z|T_rd_sae, zmm5, zmm7);
vscalefsh(xmm1, xmm5, ptr [rax+0x40]);
vscalefsh(xmm1|k1|T_z|T_rd_sae, xmm5, xmm7);
vreduceph(xmm1, ptr [rax+0x40], 0x1);
vreduceph(xmm1, ptr_b [rax+0x40], 0x2);
vreduceph(ymm1, ptr [rax+0x40], 0x3);
vreduceph(ymm1, ptr_b [rax+0x40], 0x4);
vreduceph(zmm1, ptr [rax+0x40], 0x5);
vreduceph(zmm1, ptr_b [rax+0x40], 0x6);
vreduceph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
vreducesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
vreducesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
vrndscaleph(xmm1, ptr [rax+0x40], 0x1);
vrndscaleph(xmm1, ptr_b [rax+0x40], 0x2);
vrndscaleph(ymm1, ptr [rax+0x40], 0x3);
vrndscaleph(ymm1, ptr_b [rax+0x40], 0x4);
vrndscaleph(zmm1, ptr [rax+0x40], 0x5);
vrndscaleph(zmm1, ptr_b [rax+0x40], 0x6);
vrndscaleph(zmm1|k1|T_z|T_sae, zmm5, 0x7);
vrndscalesh(xmm1, xmm3, ptr [rax+0x40], 0x1);
vrndscalesh(xmm1|k1|T_z|T_sae, xmm5, xmm4, 0x2);
vfpclassph(k1, xword [rax+0x40], 0x1);
vfpclassph(k1, xword_b[rax+0x40], 0x2);
vfpclassph(k1, yword [rax+0x40], 0x3);
vfpclassph(k1, yword_b[rax+0x40], 0x4);
vfpclassph(k1, zword [rax+0x40], 0x5);
vfpclassph(k1, zword_b[rax+0x40], 0x6);
vfpclasssh(k1|k2, xmm3, 0x5);
vfpclasssh(k1|k2, ptr [rax+0x40], 0x5);
vgetexpph(xmm1, ptr [rax+0x40]);
vgetexpph(ymm1, ptr_b [rax+0x40]);
vgetexpph(zmm1, ptr [rax+0x40]);
vgetexpph(zmm1|k1|T_z|T_sae, zmm5);
vgetexpsh(xmm1, xmm5, ptr [rax+0x40]);
vgetexpsh(xmm1|k1|T_z|T_sae, xmm3, xmm5);
vgetmantph(xmm1, ptr [rax+0x40], 0x1);
vgetmantph(ymm1, ptr_b [rax+0x40], 0x2);
vgetmantph(zmm1, ptr [rax+0x40], 0x3);
vgetmantph(zmm1|k1|T_z|T_sae, zmm5, 0x4);
vgetmantsh(xmm1, xmm5, ptr [rax+0x40], 0x5);
vgetmantsh(xmm1|k1|T_z|T_sae, xmm3, xmm5, 0x6);
vmovsh(xmm1|k1|T_z, ptr [rax+0x40]);
vmovsh(ptr [rax+0x40]|k1, xmm1);
vmovsh(xmm1|k2|T_z, xmm3, xmm5);
vmovw(xmm1, r13d);
vmovw(xmm3, ptr [rax+0x40]);
vmovw(r9d, xmm1);
vmovw(ptr [rax+0x40], xmm7);
vcvtsd2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtsd2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2sd(xmm1|k1|T_z|T_sae, xmm2, xmm3);
vcvtsh2sd(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2ss(xmm1|k1|T_z|T_sae, xmm2, xmm3);
vcvtsh2ss(xmm1, xmm2, ptr [rax+0x40]);
vcvtss2sh(xmm1|k1|T_z|T_rd_sae, xmm2, xmm3);
vcvtss2sh(xmm1, xmm2, ptr [rax+0x40]);
vcvtsh2si(edx|T_rd_sae, xmm1);
vcvtsh2si(edx, ptr [rax+0x40]);
vcvtsh2si(rdx|T_rd_sae, xmm1);
vcvtsh2si(r8, ptr [rax+0x40]);
vcvtph2dq(xmm1, xmm5);
vcvtph2dq(xmm1, ptr [rax+0x40]);
vcvtph2dq(xmm1, ptr_b [rax+0x40]);
vcvtph2dq(ymm1|k2|T_z, xmm5);
vcvtph2dq(ymm1, ptr [rax+0x40]);
vcvtph2dq(ymm1, ptr_b [rax+0x40]);
vcvtph2dq(zmm1|k5|T_z|T_rd_sae, ymm3);
vcvtph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2psx(xmm1, xmm5);
vcvtph2psx(xmm1, ptr [rax+0x40]);
vcvtph2psx(xmm1, ptr_b [rax+0x40]);
vcvtph2psx(ymm1|k2|T_z, xmm5);
vcvtph2psx(ymm1, ptr [rax+0x40]);
vcvtph2psx(ymm1, ptr_b [rax+0x40]);
vcvtph2psx(zmm1|k5|T_z|T_sae, ymm3);
vcvtph2psx(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2psx(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2udq(xmm1, xmm5);
vcvtph2udq(xmm1, ptr [rax+0x40]);
vcvtph2udq(xmm1, ptr_b [rax+0x40]);
vcvtph2udq(ymm1|k2|T_z, xmm5);
vcvtph2udq(ymm1, ptr [rax+0x40]);
vcvtph2udq(ymm1, ptr_b [rax+0x40]);
vcvtph2udq(zmm1|k5|T_z|T_rd_sae, ymm3);
vcvtph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2dq(xmm1, xmm5);
vcvttph2dq(xmm1, ptr [rax+0x40]);
vcvttph2dq(xmm1, ptr_b [rax+0x40]);
vcvttph2dq(ymm1|k2|T_z, xmm5);
vcvttph2dq(ymm1, ptr [rax+0x40]);
vcvttph2dq(ymm1, ptr_b [rax+0x40]);
vcvttph2dq(zmm1|k5|T_z|T_sae, ymm3);
vcvttph2dq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2dq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2udq(xmm1, xmm5);
vcvttph2udq(xmm1, ptr [rax+0x40]);
vcvttph2udq(xmm1, ptr_b [rax+0x40]);
vcvttph2udq(ymm1|k2|T_z, xmm5);
vcvttph2udq(ymm1, ptr [rax+0x40]);
vcvttph2udq(ymm1, ptr_b [rax+0x40]);
vcvttph2udq(zmm1|k5|T_z|T_sae, ymm3);
vcvttph2udq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2udq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2pd(xmm1, xmm5);
vcvtph2pd(xmm1, ptr [rax+0x40]);
vcvtph2pd(xmm1, ptr_b [rax+0x40]);
vcvtph2pd(ymm1|k2|T_z, xmm5);
vcvtph2pd(ymm1, ptr [rax+0x40]);
vcvtph2pd(ymm1, ptr_b [rax+0x40]);
vcvtph2pd(zmm1|k5|T_z|T_sae, xmm3);
vcvtph2pd(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2pd(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2qq(xmm1, xmm5);
vcvtph2qq(xmm1, ptr [rax+0x40]);
vcvtph2qq(xmm1, ptr_b [rax+0x40]);
vcvtph2qq(ymm1|k2|T_z, xmm5);
vcvtph2qq(ymm1, ptr [rax+0x40]);
vcvtph2qq(ymm1, ptr_b [rax+0x40]);
vcvtph2qq(zmm1|k5|T_z|T_rd_sae, xmm3);
vcvtph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtph2uqq(xmm1, xmm5);
vcvtph2uqq(xmm1, ptr [rax+0x40]);
vcvtph2uqq(xmm1, ptr_b [rax+0x40]);
vcvtph2uqq(ymm1|k2|T_z, xmm5);
vcvtph2uqq(ymm1, ptr [rax+0x40]);
vcvtph2uqq(ymm1, ptr_b [rax+0x40]);
vcvtph2uqq(zmm1|k5|T_z|T_rd_sae, xmm3);
vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvttph2uqq(xmm1, xmm5);
vcvttph2uqq(xmm1, ptr [rax+0x40]);
vcvttph2uqq(xmm1, ptr_b [rax+0x40]);
vcvttph2uqq(ymm1|k2|T_z, xmm5);
vcvttph2uqq(ymm1, ptr [rax+0x40]);
vcvttph2uqq(ymm1, ptr_b [rax+0x40]);
vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3);
vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtdq2ph(xmm1, xmm5);
vcvtdq2ph(xmm1, xword [rax+0x40]);
vcvtdq2ph(xmm1, xword_b [rax+0x40]);
vcvtdq2ph(xmm1, yword [rax+0x40]);
vcvtdq2ph(xmm1, yword_b [rax+0x40]);
vcvtdq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtdq2ph(ymm1, ptr [rax+0x40]);
vcvtdq2ph(ymm1, ptr_b [rax+0x40]);
vcvtps2phx(xmm1, xmm5);
vcvtps2phx(xmm1, xword [rax+0x40]);
vcvtps2phx(xmm1, xword_b [rax+0x40]);
vcvtps2phx(xmm1, yword [rax+0x40]);
vcvtps2phx(xmm1, yword_b [rax+0x40]);
vcvtps2phx(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtps2phx(ymm1, ptr [rax+0x40]);
vcvtps2phx(ymm1, ptr_b [rax+0x40]);
vcvtudq2ph(xmm1, xmm5);
vcvtudq2ph(xmm1, xword [rax+0x40]);
vcvtudq2ph(xmm1, xword_b [rax+0x40]);
vcvtudq2ph(xmm1, yword [rax+0x40]);
vcvtudq2ph(xmm1, yword_b [rax+0x40]);
vcvtudq2ph(ymm1|k2|T_z|T_rd_sae, zmm5);
vcvtudq2ph(ymm1, ptr [rax+0x40]);
vcvtudq2ph(ymm1, ptr_b [rax+0x40]);
vcvtpd2ph(xmm1, xmm5);
vcvtpd2ph(xmm1, ymm5);
vcvtpd2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtpd2ph(xmm1, xword [rax+0x40]);
vcvtpd2ph(xmm1, xword_b [rax+0x40]);
vcvtpd2ph(xmm1, yword [rax+0x40]);
vcvtpd2ph(xmm1, yword_b [rax+0x40]);
vcvtpd2ph(xmm1, zword [rax+0x40]);
vcvtpd2ph(xmm1, zword_b [rax+0x40]);
vcvtqq2ph(xmm1, xmm5);
vcvtqq2ph(xmm1, ymm5);
vcvtqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtqq2ph(xmm1, xword [rax+0x40]);
vcvtqq2ph(xmm1, xword_b [rax+0x40]);
vcvtqq2ph(xmm1, yword [rax+0x40]);
vcvtqq2ph(xmm1, yword_b [rax+0x40]);
vcvtqq2ph(xmm1, zword [rax+0x40]);
vcvtqq2ph(xmm1, zword_b [rax+0x40]);
vcvtuqq2ph(xmm1, xmm5);
vcvtuqq2ph(xmm1, ymm5);
vcvtuqq2ph(xmm1|k2|T_z|T_rd_sae, zmm5);
vcvtuqq2ph(xmm1, xword [rax+0x40]);
vcvtuqq2ph(xmm1, xword_b [rax+0x40]);
vcvtuqq2ph(xmm1, yword [rax+0x40]);
vcvtuqq2ph(xmm1, yword_b [rax+0x40]);
vcvtuqq2ph(xmm1, zword [rax+0x40]);
vcvtuqq2ph(xmm1, zword_b [rax+0x40]);
vcvtph2uw(xmm1, xmm5);
vcvtph2uw(xmm1, ptr [rax+0x40]);
vcvtph2uw(xmm1, ptr_b [rax+0x40]);
vcvtph2uw(ymm1, ptr [rax+0x40]);
vcvtph2uw(ymm1, ptr_b [rax+0x40]);
vcvtph2uw(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtph2uw(zmm1, ptr [rax+0x40]);
vcvtph2uw(zmm1, ptr_b [rax+0x40]);
vcvtph2w(xmm1, xmm5);
vcvtph2w(xmm1, ptr [rax+0x40]);
vcvtph2w(xmm1, ptr_b [rax+0x40]);
vcvtph2w(ymm1, ptr [rax+0x40]);
vcvtph2w(ymm1, ptr_b [rax+0x40]);
vcvtph2w(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtph2w(zmm1, ptr [rax+0x40]);
vcvtph2w(zmm1, ptr_b [rax+0x40]);
vcvttph2uw(xmm1, xmm5);
vcvttph2uw(xmm1, ptr [rax+0x40]);
vcvttph2uw(xmm1, ptr_b [rax+0x40]);
vcvttph2uw(ymm1, ptr [rax+0x40]);
vcvttph2uw(ymm1, ptr_b [rax+0x40]);
vcvttph2uw(zmm1|k2|T_z|T_sae, zmm5);
vcvttph2uw(zmm1, ptr [rax+0x40]);
vcvttph2uw(zmm1, ptr_b [rax+0x40]);
vcvttph2w(xmm1, xmm5);
vcvttph2w(xmm1, ptr [rax+0x40]);
vcvttph2w(xmm1, ptr_b [rax+0x40]);
vcvttph2w(ymm1, ptr [rax+0x40]);
vcvttph2w(ymm1, ptr_b [rax+0x40]);
vcvttph2w(zmm1|k2|T_z|T_sae, zmm5);
vcvttph2w(zmm1, ptr [rax+0x40]);
vcvttph2w(zmm1, ptr_b [rax+0x40]);
vcvtuw2ph(xmm1, xmm5);
vcvtuw2ph(xmm1, ptr [rax+0x40]);
vcvtuw2ph(xmm1, ptr_b [rax+0x40]);
vcvtuw2ph(ymm1, ptr [rax+0x40]);
vcvtuw2ph(ymm1, ptr_b [rax+0x40]);
vcvtuw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtuw2ph(zmm1, ptr [rax+0x40]);
vcvtuw2ph(zmm1, ptr_b [rax+0x40]);
vcvtw2ph(xmm1, xmm5);
vcvtw2ph(xmm1, ptr [rax+0x40]);
vcvtw2ph(xmm1, ptr_b [rax+0x40]);
vcvtw2ph(ymm1, ptr [rax+0x40]);
vcvtw2ph(ymm1, ptr_b [rax+0x40]);
vcvtw2ph(zmm1|k2|T_z|T_rd_sae, zmm5);
vcvtw2ph(zmm1, ptr [rax+0x40]);
vcvtw2ph(zmm1, ptr_b [rax+0x40]);
vcvtps2ph(xmm1, xmm2, 0x1);
vcvtps2ph(ptr [rax+0x40], xmm2, 0x2);
vcvtps2ph(xmm1, ymm2, 0x3);
vcvtps2ph(ptr [rax+0x40], ymm2, 0x4);
vcvtps2ph(xmm1|k1|T_z, xmm2, 0x5);
vcvtps2ph(ptr [rax+0x40]|k1, xmm3, 0x6);
vcvtps2ph(xmm1|k2, ymm4, 0x7);
vcvtps2ph(ptr [rax+0x40]|k2, ymm5, 0x8);
vcvtps2ph(ymm1|k2|T_sae, zmm5, 0x9);
vcvtps2ph(ptr [rax+0x40]|k5, zmm4, 0xa);
vcvtsh2usi(ecx|T_rd_sae, xmm1);
vcvtsh2usi(eax, ptr [rax+0x40]);
vcvtsh2usi(r9|T_rd_sae, xmm1);
vcvtsh2usi(r13, ptr [rax+0x40]);
vcvttsh2si(ecx|T_sae, xmm1);
vcvttsh2si(eax, ptr [rax+0x40]);
vcvttsh2si(r9|T_sae, xmm1);
vcvttsh2si(r13, ptr [rax+0x40]);
vcvttsh2usi(ecx|T_sae, xmm1);
vcvttsh2usi(eax, ptr [rax+0x40]);
vcvttsh2usi(r9|T_sae, xmm1);
vcvttsh2usi(r13, ptr [rax+0x40]);
vcvttph2qq(xmm1, xmm5);
vcvttph2qq(xmm1, ptr [rax+0x40]);
vcvttph2qq(xmm1, ptr_b [rax+0x40]);
vcvttph2qq(ymm1|k2|T_z, xmm5);
vcvttph2qq(ymm1, ptr [rax+0x40]);
vcvttph2qq(ymm1, ptr_b [rax+0x40]);
vcvttph2qq(zmm1|k5|T_z|T_sae, xmm3);
vcvttph2qq(zmm1|k5|T_z, ptr [rax+0x40]);
vcvttph2qq(zmm1|k5|T_z, ptr_b [rax+0x40]);
vcvtsi2sh(xmm1|T_rd_sae, xmm2, eax);
vcvtsi2sh(xmm1, xmm2, dword [rax+0x40]);
vcvtsi2sh(xmm1|T_rd_sae, xmm2, r9);
vcvtsi2sh(xmm1, xmm2, qword [rax+0x40]);
vcvtusi2sh(xmm1|T_rd_sae, xmm2, eax);
vcvtusi2sh(xmm1, xmm2, dword [rax+0x40]);
vcvtusi2sh(xmm1|T_rd_sae, xmm2, r9);
vcvtusi2sh(xmm1, xmm2, qword [rax+0x40]);
aadd(ptr[rax], ecx);
aadd(ptr[eax], ecx);
aadd(ptr[rax], r10);
aand(ptr[rax], ecx);
aand(ptr[eax], ecx);
aand(ptr[rax], r10);
aor(ptr[rax], ecx);
aor(ptr[eax], ecx);
aor(ptr[rax], r10);
axor(ptr[rax], ecx);
axor(ptr[eax], ecx);
axor(ptr[rax], r10);
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
cmplexadd(ptr[rax+r10*4], rcx, rdx);
cmplxadd(ptr[rax+r10*4], rcx, rdx);
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
cmppxadd(ptr[rax+r10*4], rcx, rdx);
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
vsha512msg1(ymm3, xmm5);
vsha512msg2(ymm9, ymm10);
vsha512rnds2(ymm1, ymm3, xmm2);
vsm3msg1(xmm1, xmm2, xmm3);
vsm3msg1(xmm1, xmm2, ptr [rax]);
vsm3msg2(xmm5, xmm7, xmm3);
vsm3msg2(xmm5, xmm6, ptr [rax]);
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
vsm4key4(xmm1, xmm2, xmm3);
vsm4key4(xmm1, xmm2, ptr [rdx]);
vsm4rnds4(xmm1, xmm2, xmm3);
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
vpdpbssd(xmm1, xmm2, xmm3);
vpdpbssd(ymm1, ymm2, ptr [rax]);
vpdpbssds(xmm1, xmm2, xmm3);
vpdpbssds(ymm1, ymm2, ptr [rax]);
vpdpbsud(xmm1, xmm2, xmm3);
vpdpbsud(ymm1, ymm2, ptr [rax]);
vpdpbsuds(xmm1, xmm2, xmm3);
vpdpbsuds(ymm1, ymm2, ptr [rax]);
vpdpbuud(xmm1, xmm2, xmm3);
vpdpbuud(ymm1, ymm2, ptr [rax]);
vpdpbuuds(xmm1, xmm2, xmm3);
vpdpbuuds(ymm1, ymm2, ptr [rax]);
vpdpwsud(xmm1, xmm2, xmm3);
vpdpwsud(ymm1, ymm2, ptr [rax]);
vpdpwsuds(xmm1, xmm2, xmm3);
vpdpwsuds(ymm1, ymm2, ptr [rax]);
vpdpwusd(xmm1, xmm2, xmm3);
vpdpwusd(ymm1, ymm2, ptr [rax]);
vpdpwusds(xmm1, xmm2, xmm3);
vpdpwusds(ymm1, ymm2, ptr [rax]);
vpdpwuud(xmm1, xmm2, xmm3);
vpdpwuud(ymm1, ymm2, ptr [rax]);
vpdpwuuds(xmm1, xmm2, xmm3);
vpdpwuuds(ymm1, ymm2, ptr [rax]);

View file

@ -3,8 +3,9 @@
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096*8)
{
#include "cpp.txt"
#include "tmp.cpp"
}
};

View file

@ -7,6 +7,25 @@ class Reg:
self.name = s
def __str__(self):
return self.name
def __eq__(self, rhs):
return self.name == rhs.name
def __lt__(self, rhs):
return self.name < rhs.name
g_xmmTbl = '''
xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23
xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31
ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7
ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15
ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23
ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31
zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7
zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15
zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23
zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31
'''.split()
g_regTbl = '''
eax ecx edx ebx esp ebp esi edi
@ -22,49 +41,53 @@ r16w r17w r18w r19w r20w r21w r22w r23w r24w r25w r26w r27w r28w r29w r30w r31w
r8b r9b r10b r11b r12b r13b r14b r15b
r16b r17b r18b r19b r20b r21b r22b r23b r24b r25b r26b r27b r28b r29b r30b r31b
spl bpl sil dil
xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23
xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31
ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7
ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15
ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23
ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31
zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7
zmm8 zmm9 zmm10 zmm11 zmm12 zmm13 zmm14 zmm15
zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23
zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31
'''.split()
tmm0 tmm1 tmm2 tmm3 tmm4 tmm5 tmm6 tmm7
'''.split()+g_xmmTbl
# define global constants
for e in g_regTbl:
globals()[e] = Reg(e)
g_maskTbl = [k1, k2, k3, k4, k5, k6, k7]
g_replaceCharTbl = '{}();|,'
g_replaceChar = str.maketrans(g_replaceCharTbl, ' '*len(g_replaceCharTbl))
g_sizeTbl = ['byte', 'word', 'dword', 'qword', 'xword', 'yword', 'zword']
g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae'] #, 'T_z']
g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae']
g_xedSizeTbl = ['xmmword', 'ymmword', 'zmmword']
g_attrTbl = ['T_sae', 'T_rn_sae', 'T_rd_sae', 'T_ru_sae', 'T_rz_sae', 'T_z']
g_attrXedTbl = ['sae', 'rne-sae', 'rd-sae', 'ru-sae', 'rz-sae', 'z']
class Attr:
def __init__(self, s):
self.name = s
def __str__(self):
return self.name
def __eq__(self, rhs):
return self.name == rhs.name
def __lt__(self, rhs):
return self.name < rhs.name
for e in g_attrTbl:
globals()[e] = Attr(e)
def newReg(s):
if type(s) == str:
return Reg(s)
return s
class Memory:
def __init__(self, size=0, base=None, index=None, scale=0, disp=0):
def __init__(self, size=0, base=None, index=None, scale=0, disp=0, broadcast=False):
self.size = size
self.base = base
self.index = index
self.base = newReg(base)
self.index = newReg(index)
self.scale = scale
self.disp = disp
self.broadcast = broadcast
def __str__(self):
s = 'ptr' if self.size == 0 else g_sizeTbl[int(math.log2(self.size))]
if self.broadcast:
s += '_b'
s += ' ['
needPlus = False
if self.base:
@ -84,47 +107,72 @@ class Memory:
s += ']'
return s
def __eq__(self, rhs):
return str(self) == str(rhs)
# xbyak uses ptr if it is automatically detected, so xword == ptr is true
if self.broadcast != rhs.broadcast: return False
# if not self.broadcast and 0 < self.size <= 8 and 0 < rhs.size <= 8 and self.size != rhs.size: return False
if not self.broadcast and self.size > 0 and rhs.size > 0 and self.size != rhs.size: return False
r = self.base == rhs.base and self.index == rhs.index and self.scale == rhs.scale and self.disp == rhs.disp
return r
def parseMemory(s):
sizeTbl = {
'byte': 1, 'word': 2, 'dword': 4, 'qword': 8,
'xword': 16, 'yword': 32, 'zword': 64
}
def parseBroadcast(s):
if '_b' in s:
return (s.replace('_b', ''), True)
r = re.search(r'({1to\d+})', s)
if not r:
return (s, False)
return (s.replace(r.group(1), ''), True)
def parseMemory(s, broadcast=False):
org_s = s
s = s.replace(' ', '').lower()
# Parse size
size = 0
base = index = None
scale = 0
disp = 0
if not broadcast:
(s, broadcast) = parseBroadcast(s)
# Parse size
for i in range(len(g_sizeTbl)):
w = g_sizeTbl[i]
if s.startswith(w):
size = 1<<i
s = s[len(w):]
break
if size == 0:
for i in range(len(g_xedSizeTbl)):
w = g_xedSizeTbl[i]
if s.startswith(w):
size = 1<<(i+4)
s = s[len(w):]
break
# Remove 'ptr' if present
if s.startswith('ptr'):
s = s[3:]
if s.startswith('_b'):
broadcast = True
s = s[2:]
# Extract the content inside brackets
r = re.match(r'\[(.*)\]', s)
if not r:
raise ValueError(f'bad format {s=}')
raise ValueError(f'bad format {org_s=}')
# Parse components
elems = re.findall(r'([a-z0-9]+)(?:\*([0-9]+))?|([+-])', r.group(1))
base = index = None
scale = 0
disp = 0
for i, e in enumerate(elems):
if e[2]: # This is a '+' or '-' sign
continue
if e[0].isalpha():
if e[0] in g_regTbl:
if base is None and (not e[1] or int(e[1]) == 1):
base = e[0]
elif index is None:
@ -137,25 +185,53 @@ def parseMemory(s):
b = 16 if e[0].startswith('0x') else 10
disp += sign * int(e[0], b)
return Memory(size, base, index, scale, disp)
return Memory(size, base, index, scale, disp, broadcast)
class Nmemonic:
def __init__(self, name, args=[], attrs=[]):
self.name = name
self.args = args
self.attrs = attrs
self.attrs = attrs.sort()
def __str__(self):
s = f'{self.name}('
for i in range(len(self.args)):
if i > 0:
s += ', '
s += str(self.args[i])
for e in self.attrs:
s += f'|{e}'
if i == 0 and self.attrs:
for e in self.attrs:
s += f'|{e}'
s += ');'
return s
def __eq__(self, rhs):
return self.name == rhs.name and self.args == rhs.args and self.attrs == rhs.attrs
def parseNmemonic(s):
args = []
attrs = []
(s, broadcast) = parseBroadcast(s)
# replace xm0 with xmm0
while True:
r = re.search(r'([xyz])m(\d\d?)', s)
if not r:
break
s = s.replace(r.group(0), r.group(1) + 'mm' + r.group(2))
# check 'zmm0{k7}'
r = re.search(r'({k[1-7]})', s)
if r:
idx = int(r.group(1)[2])
attrs.append(g_maskTbl[idx-1])
s = s.replace(r.group(1), '')
# check 'zmm0|k7'
r = re.search(r'(\|\s*k[1-7])', s)
if r:
idx = int(r.group(1)[-1])
attrs.append(g_maskTbl[idx-1])
s = s.replace(r.group(1), '')
s = s.translate(g_replaceChar)
# reconstruct memory string
@ -168,13 +244,12 @@ def parseNmemonic(s):
inMemory = False
else:
v.append(e)
if e in g_sizeTbl or e == 'ptr':
if e in g_sizeTbl or e in g_xedSizeTbl or e.startswith('ptr'):
v[-1] += ' ' # to avoid 'byteptr'
inMemory = True
if ']' not in v[-1]:
inMemory = True
name = v[0]
args = []
attrs = []
for e in v[1:]:
if e.startswith('0x'):
args.append(int(e, 16))
@ -185,9 +260,12 @@ def parseNmemonic(s):
elif e in g_attrXedTbl:
attrs.append(Attr(g_attrTbl[g_attrXedTbl.index(e)]))
elif e in g_regTbl:
args.append(e)
args.append(Reg(e))
# xed special format : xmm8+3
elif e[:-2] in g_xmmTbl and e.endswith('+3'):
args.append(Reg(e[:-2]))
else:
args.append(parseMemory(e))
args.append(parseMemory(e, broadcast))
return Nmemonic(name, args, attrs)
def loadFile(name):
@ -215,13 +293,17 @@ def run(cppText, xedText):
m1 = parseNmemonic(line1)
m2 = parseNmemonic(line2)
assertEqualStr(m1, m2, f'{i}')
assertEqual(m1, m2, f'{i+1}')
print('run ok')
def assertEqualStr(a, b, msg=None):
if str(a) != str(b):
raise Exception(f'assert fail {msg}:', str(a), str(b))
def assertEqual(a, b, msg=None):
if a != b:
raise Exception(f'assert fail {msg}:', str(a), str(b))
def MemoryTest():
tbl = [
(Memory(0, rax), 'ptr [rax]'),
@ -231,18 +313,23 @@ def MemoryTest():
(Memory(8, None, rcx, 4), 'qword [rcx*4]'),
(Memory(8, rax, None, 0, 5), 'qword [rax+0x5]'),
(Memory(8, None, None, 0, 255), 'qword [0xff]'),
(Memory(0, r8, r9, 1, 32), 'ptr [r8+r9+0x20]'),
]
for (m, expected) in tbl:
assertEqualStr(m, expected)
assertEqual(Memory(16, rax), Memory(0, rax))
def parseMemoryTest():
print('parseMemoryTest')
tbl = [
('[]', Memory()),
('[rax]', Memory(0, rax)),
('ptr[rax]', Memory(0, rax)),
('ptr_b[rax]', Memory(0, rax, broadcast=True)),
('dword[rbx]', Memory(4, rbx)),
('xword ptr[rcx]', Memory(16, rcx)),
('xmmword ptr[rcx]', Memory(16, rcx)),
('xword ptr[rdx*8]', Memory(16, None, rdx, 8)),
('[12345]', Memory(0, None, None, 0, 12345)),
('[0x12345]', Memory(0, None, None, 0, 0x12345)),
@ -262,10 +349,19 @@ def parseNmemonicTest():
('mov(rax, ptr [rcx + rdx * 8 ] );', Nmemonic('mov', [rax, Memory(0, rcx, rdx, 8)])),
('vcmppd(k1, ymm2, ymm3 |T_sae, 3);', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
('vcmppd k1{sae}, ymm2, ymm3, 0x3', Nmemonic('vcmppd', [k1, ymm2, ymm3, 3], [T_sae])),
('v4fmaddps zmm1, zmm8+3, xmmword ptr [rdx+0x40]', Nmemonic('v4fmaddps', [zmm1, zmm8, Memory(16, rdx, None, 0, 0x40)])),
('vp4dpwssd zmm23{k7}{z}, zmm1+3, xmmword ptr [rax+0x40]', Nmemonic('vp4dpwssd', [zmm23, zmm1, Memory(16, rax, None, 0, 0x40)], [k7, T_z])),
('v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);', Nmemonic('v4fnmaddps', [zmm5, zmm2, Memory(0, rcx, None, 0, 0x80)], [k5])),
('vpcompressw(zmm30 | k2 |T_z, zmm1);', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpcompressw zmm30{k2}{z}, zmm1', Nmemonic('vpcompressw', [zmm30, zmm1], [k2, T_z])),
('vpshldw(xmm9|k3|T_z, xmm2, ptr [rax + 0x40], 5);', Nmemonic('vpshldw', [xmm9, xmm2, Memory(0, rax, None, 0, 0x40), 5], [k3, T_z])),
('vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
('vpshrdd xmm5{k3}{z}, xmm2, dword ptr [rax+0x40]{1to4}, 0x5', Nmemonic('vpshrdd', [xmm5, xmm2, Memory(0, rax, None, 0, 0x40, True), 5], [k3, T_z])),
('vcmpph(k1, xm15, ptr[rax+64], 1);', Nmemonic('vcmpph', [k1, xm15, Memory(0, rax, None, 0, 64), 1])),
]
for (s, expected) in tbl:
e = parseNmemonic(s)
assertEqualStr(e, expected)
assertEqual(e, expected)
def test():
print('test start')

View file

@ -15,9 +15,9 @@ TARGET=$1
CFLAGS="-Wall -Wextra -I ../"
echo "test:" $TARGET
cp $TARGET cpp.txt
cp $TARGET tmp.cpp
$CXX $CFLAGS test_by_xed.cpp -o test_by_xed
./test_by_xed
$XED -64 -ir bin > out.txt
$PYTHON test_by_xed.py cpp.txt out.txt
$PYTHON test_by_xed.py $TARGET out.txt