change format and add getter for data_cache_size

This commit is contained in:
MITSUNARI Shigeo 2018-02-13 12:03:43 +09:00
parent 80b3c7b933
commit fd587b55ca
6 changed files with 59 additions and 44 deletions

View file

@ -1,5 +1,5 @@
Xbyak 5.61 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
Xbyak 5.62 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
=============
Abstract
@ -333,6 +333,7 @@ The header files under xbyak/ are independent of cybozulib.
History
-------------
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)

View file

@ -1,5 +1,5 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.610
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.62
-----------------------------------------------------------------------------
◎概要
@ -343,6 +343,7 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク
-----------------------------------------------------------------------------
◎履歴
2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
2018/02/07 ver 5.61 vmov*がmem{k}{z}形式対応(忘れてた)
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)

View file

@ -104,6 +104,9 @@ void putCPUinfo()
Core i7-3930K 6 2D
*/
cpu.putFamily();
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
}
}
int main()

View file

@ -105,7 +105,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x5610 /* 0xABCD = A.BC(D) */
VERSION = 0x5620 /* 0xABCD = A.BC(D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED

View file

@ -1,4 +1,4 @@
const char *getVersionString() const { return "5.61"; }
const char *getVersionString() const { return "5.62"; }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }

View file

@ -84,52 +84,54 @@ class Cpu {
displayModel = model;
}
}
unsigned int value_from_bits(unsigned int val, unsigned int base, unsigned int end)
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
{
unsigned int shift = sizeof(val) * 8 - end - 1;
return (val << shift) >> (shift + base);
return (val >> base) & ((1u << (end - base)) - 1);
}
void setCacheHierarchy()
{
unsigned int cache_type = 42;
if ((type_ & tINTEL) == 0) return;
const unsigned int NO_CACHE = 0;
const unsigned int DATA_CACHE = 1;
// const unsigned int INSTRUCTION_CACHE = 2;
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
unsigned int n_cores;
unsigned int n_cores = 0;
unsigned int data[4];
if ((type_ & tINTEL) == 0) {
fprintf(stderr, "ERR cache hierarchy querying is not supported\n");
throw Error(ERR_INTERNAL);
}
// if leaf 11 exists, we use it to get the number of smt cores and cores on socket
// If x2APIC is supported, these are the only correct numbers.
/*
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
If x2APIC is supported, these are the only correct numbers.
*/
getCpuidEx(0x0, 0, data);
if(data[0] >= 11){
if (data[0] >= 11) {
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
smt_width = (data[1] & 0x7FFF);
smt_width = data[1] & 0x7FFF;
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
n_cores = (data[1] & 0x7FFF);
n_cores = data[1] & 0x7FFF;
}
/* Assumptions:
* - the first level of data cache is not shared (which is the
* case for every existing architecture) and use this to
* determine the SMT width for arch not supporting leaf 11
* - when leaf 4 reports a number of core less than n_cores
* on socket reported by leaf 11, then it is a correct number
* of cores not an upperbound */
for (int i = 0; ((cache_type != NO_CACHE) && (data_cache_levels < max_number_cache_levels)); i++) {
/*
Assumptions:
the first level of data cache is not shared (which is the
case for every existing architecture) and use this to
determine the SMT width for arch not supporting leaf 11.
when leaf 4 reports a number of core less than n_cores
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
getCpuidEx(0x4, i, data);
cache_type = value_from_bits(data[0], 0, 4);
if ((cache_type == DATA_CACHE) || (cache_type == UNIFIED_CACHE)) {
int nb_logical_cores = (std::min)(value_from_bits(data[0], 14, 25) + 1,
n_cores);
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
unsigned int nb_logical_cores = (std::min)(extractBit(data[0], 14, 25) + 1, n_cores);
data_cache_size[data_cache_levels] =
(value_from_bits(data[1], 22, 31) + 1)
* (value_from_bits(data[1], 12, 21) + 1)
* (value_from_bits(data[1], 0, 11) + 1)
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if ((cache_type == DATA_CACHE) && (smt_width == 0)) smt_width = nb_logical_cores;
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
assert(smt_width != 0);
cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
data_cache_levels++;
@ -146,11 +148,24 @@ public:
int displayFamily; // family + extFamily
int displayModel; // model + extModel
static const unsigned int max_number_cache_levels = 10;
unsigned int data_cache_size[max_number_cache_levels];
unsigned int cores_sharing_data_cache[max_number_cache_levels];
// may I move these members into private?
static const unsigned int maxNumberCacheLevels = 10;
unsigned int data_cache_size[maxNumberCacheLevels];
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return cores_sharing_data_cache[i];
}
unsigned int getDataCacheSize(unsigned int i) const
{
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
return data_cache_size[i];
}
/*
data[] = { eax, ebx, ecx, edx }
*/
@ -183,10 +198,6 @@ public:
#endif
}
typedef uint64 Type;
static const Type NO_CACHE = 0;
static const Type DATA_CACHE = 1;
static const Type INSTRUCTION_CACHE = 2;
static const Type UNIFIED_CACHE = 3;
static const Type NONE = 0;
static const Type tMMX = 1 << 0;
@ -346,8 +357,7 @@ public:
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
if ((type_ & tINTEL) == tINTEL)
setCacheHierarchy();
setCacheHierarchy();
}
void putFamily() const
{