Adds Several Performance Related Fixes (#405)

This commit is contained in:
Nick Banks 2020-05-26 12:27:54 -07:00 коммит произвёл GitHub
Родитель 80fa7359ae
Коммит 5c6f14469e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 51 добавлений и 7 удалений

Просмотреть файл

@ -4355,6 +4355,11 @@ QuicConnRecvDatagrams(
uint32_t ReleaseChainCount = 0;
QUIC_RECEIVE_PROCESSING_STATE RecvState = { FALSE, FALSE, 0 };
RecvState.PartitionIndex = QuicPartitionIdGetIndex(Connection->PartitionID);
if (Connection->Registration &&
QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
QUIC_DBG_ASSERT(RecvState.PartitionIndex != 0);
RecvState.PartitionIndex -= QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
}
UNREFERENCED_PARAMETER(DatagramChainCount);
@ -4588,6 +4593,9 @@ QuicConnRecvDatagrams(
if (!Connection->State.UpdateWorker &&
Connection->State.Connected &&
RecvState.UpdatePartitionId) {
if (QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
RecvState.PartitionIndex += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
}
QUIC_DBG_ASSERT(RecvState.PartitionIndex != QuicPartitionIdGetIndex(Connection->PartitionID));
Connection->PartitionID = QuicPartitionIdCreate(RecvState.PartitionIndex);
QuicConnGenerateNewSourceCids(Connection, TRUE);

Просмотреть файл

@ -633,3 +633,8 @@ int64_t
QuicTimeEpochMs64(
void
);
BOOLEAN
QuicRegistrationIsSplitPartitioning(
_In_ const QUIC_REGISTRATION* Registration
);

Просмотреть файл

@ -113,6 +113,12 @@ typedef struct QUIC_PATH QUIC_PATH;
//
#define QUIC_MAX_PARTITION_COUNT 64
//
// The number of partitions (cores) to offset from the receive (RSS) core when
// using the QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT profile.
//
#define QUIC_MAX_THROUGHPUT_PARTITION_OFFSET 2 // Two to skip over hyper-threaded cores
//
// The fraction ((0 to UINT16_MAX) / UINT16_MAX) of memory that must be
// exhausted before enabling retry.

Просмотреть файл

@ -79,10 +79,13 @@ MsQuicRegistrationOpen(
switch (Registration->ExecProfile) {
default:
case QUIC_EXECUTION_PROFILE_LOW_LATENCY:
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC;
WorkerThreadFlags =
QUIC_THREAD_FLAG_SET_IDEAL_PROC;
break;
case QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT:
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC | QUIC_THREAD_FLAG_SET_AFFINITIZE;
WorkerThreadFlags =
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
QUIC_THREAD_FLAG_SET_AFFINITIZE;
break;
case QUIC_EXECUTION_PROFILE_TYPE_SCAVENGER:
WorkerThreadFlags = 0;
@ -91,8 +94,7 @@ MsQuicRegistrationOpen(
case QUIC_EXECUTION_PROFILE_TYPE_REAL_TIME:
WorkerThreadFlags =
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
QUIC_THREAD_FLAG_SET_AFFINITIZE |
QUIC_THREAD_FLAG_HIGH_PRIORITY;
QUIC_THREAD_FLAG_SET_AFFINITIZE;
break;
}
@ -324,16 +326,16 @@ QuicRegistrationAcceptConnection(
_In_ QUIC_CONNECTION* Connection
)
{
if (Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT) {
if (QuicRegistrationIsSplitPartitioning(Registration)) {
//
// TODO - Figure out how to check to see if hyper-threading was enabled first
// TODO - Constrain ++PartitionID to the same NUMA node.
// TODO - Constrain PartitionID to the same NUMA node.
//
// When hyper-threading is enabled, better bulk throughput can sometimes
// be gained by sharing the same physical core, but not the logical one.
// The shared one is always one greater than the RSS core.
//
Connection->PartitionID++;
Connection->PartitionID += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
}
uint8_t Index =

Просмотреть файл

@ -89,6 +89,23 @@ typedef struct QUIC_REGISTRATION {
#define QUIC_REG_VERIFY(Registration, Expr)
#endif
inline
BOOLEAN
QuicRegistrationIsSplitPartitioning(
_In_ const QUIC_REGISTRATION* Registration
)
{
//
// When hyper-threading is enabled, better bulk throughput can sometimes
// be gained by sharing the same physical core, but not the logical one.
// The shared core is always one greater than the RSS core.
//
// TODO - Figure out how to check to see if hyper-threading is enabled
// TODO - Constrain ++PartitionID to the same NUMA node.
//
return Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT;
}
//
// Tracing rundown for the registration.
//

Просмотреть файл

@ -50,6 +50,7 @@ PrintUsage()
printf(
#if _WIN32
" -comp:<####> The compartment ID to run in.\n"
" -core:<####> The CPU core to use for the main thread.\n"
#endif
" -alpn:<str> The ALPN to use. (def:%s)\n"
" -port:<####> The UDP port of the server. (def:%u)\n"
@ -109,6 +110,11 @@ ParseCommonCommands(
printf("Running in Compartment %d\n", compartmentid);
}
}
uint8_t cpuCore;
if (TryGetValue(argc, argv, "core", &cpuCore)) {
SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(1ull << cpuCore));
}
#endif
const char* alpn = DEFAULT_ALPN;