#19921 [Dup] LLC Core count calculations updated (#20171)

### Description
<!-- Describe your changes. -->

See #19921 Just to address one comment:
https://github.com/microsoft/onnxruntime/pull/19921#discussion_r1543398640

since this is an external branch. need to open another pull request for
this.

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

---------

Co-authored-by: Sai Kishan Pampana <sai.kishan.pampana@intel.com>
Co-authored-by: rachguo <rachguo@rachguos-Mini.attlocal.net>
Co-authored-by: Jian Chen <cjian@microsoft.com>
This commit is contained in:
Rachel Guo 2024-04-02 16:53:47 -07:00 коммит произвёл GitHub
Родитель 12e2538065
Коммит 19793de1b3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 18 добавлений и 16 удалений

Просмотреть файл

@ -32,7 +32,7 @@ limitations under the License.
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
#if defined(_M_X64) && !defined(_M_ARM64EC) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC)
#include "core/platform/windows/hardware_core_enumerator.h"
#endif
#include <unsupported/Eigen/CXX11/ThreadPool>
@ -252,7 +252,7 @@ void WindowsEnv::SleepForMicroseconds(int64_t micros) const {
}
// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)
static constexpr std::array<int, 3> kVendorID_Intel = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
#endif
int WindowsEnv::DefaultNumCores() {
@ -261,7 +261,7 @@ int WindowsEnv::DefaultNumCores() {
int WindowsEnv::GetNumPhysicalCpuCores() const {
// EIGEN_NO_CPUID is not defined in any C/C++ source code. It is a compile option.
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID) && defined(ONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH)
#if defined(_M_X64) && !defined(_M_ARM64EC) && !defined(EIGEN_NO_CPUID)
// The following code is a temporary fix for a perf problem on Intel's Meteor Lake CPUs. The Intel compute platform has
// a hybrid architecture that some CPU cores runs significant slower than the others. If we distribute our compute work
// evenly to all CPU cores, the slowest CPU core will drag the performance down. So, instead, we reduce the total number

Просмотреть файл

@ -15,7 +15,7 @@ struct LogicalProcessorInformation {
struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t SocDieCores = 0;
uint32_t LLCCores = 0;
};
static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
@ -42,7 +42,7 @@ uint32_t CountSetBits(DWORD input) {
return c;
}
static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
CoreCounter cores;
@ -73,17 +73,18 @@ static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
read += currentProcessorInfo->Size;
}
// Cores with L2 and LLC cache levels = # Physical Cores - # logical cores without LLC
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
cores.SocDieCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
return cores;
}
uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
// We want to use the number of physical cores, but exclude soc cores
return cores.PhysicalCores - cores.SocDieCores;
auto cores = GetCoreInfo();
return cores.LLCCores;
}
} // namespace onnxruntime

Просмотреть файл

@ -14,7 +14,7 @@ struct LogicalProcessorInformation {
struct CoreCounter {
uint32_t PhysicalCores = 0;
uint32_t Num2CacheCores = 0;
uint32_t LLCCores = 0;
};
static LogicalProcessorInformation GetLogicalProcessorInfos(LOGICAL_PROCESSOR_RELATIONSHIP relationship) {
@ -42,7 +42,7 @@ uint32_t CountSetBits(DWORD input) {
return c;
}
static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
static CoreCounter GetCoreInfo() {
auto logicalProcessorInformation = GetLogicalProcessorInfos(RelationAll);
CoreCounter cores;
@ -64,6 +64,7 @@ static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
cores.PhysicalCores++;
break;
case RelationCache:
//Cache level masks count Logicial processors
if (currentProcessorInfo->Cache.Level == 2) {
dwLevel2GroupMask |= currentProcessorInfo->Cache.GroupMask.Mask;
} else if (currentProcessorInfo->Cache.Level == 3) {
@ -75,14 +76,15 @@ static CoreCounter GetNumberOPhysicalAndEngineeringCores() {
read += currentProcessorInfo->Size;
}
cores.Num2CacheCores = CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
cores.LLCCores = cores.PhysicalCores - CountSetBits(dwLevel2GroupMask & ~dwLevel3GroupMask);
return cores;
}
uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
auto cores = GetNumberOPhysicalAndEngineeringCores();
auto cores = GetCoreInfo();
#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
@ -97,9 +99,8 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
auto isHybrid = (regs_leaf7[3] & (1 << 15));
if (isIntel && isHybrid) {
// We want to use the number of physical cores, but exclude soc cores
// On Intel Hybrid processors, numSocCores == cores.Num2CacheCores
return cores.PhysicalCores - cores.Num2CacheCores;
// We want to use the number of physical cores, but exclude cores without an LLC
return cores.LLCCores;
}
#endif