зеркало из https://github.com/microsoft/msquic.git
Adds Several Performance Related Fixes (#405)
This commit is contained in:
Родитель
80fa7359ae
Коммит
5c6f14469e
|
@ -4355,6 +4355,11 @@ QuicConnRecvDatagrams(
|
|||
uint32_t ReleaseChainCount = 0;
|
||||
QUIC_RECEIVE_PROCESSING_STATE RecvState = { FALSE, FALSE, 0 };
|
||||
RecvState.PartitionIndex = QuicPartitionIdGetIndex(Connection->PartitionID);
|
||||
if (Connection->Registration &&
|
||||
QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
|
||||
QUIC_DBG_ASSERT(RecvState.PartitionIndex != 0);
|
||||
RecvState.PartitionIndex -= QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||
}
|
||||
|
||||
UNREFERENCED_PARAMETER(DatagramChainCount);
|
||||
|
||||
|
@ -4588,6 +4593,9 @@ QuicConnRecvDatagrams(
|
|||
if (!Connection->State.UpdateWorker &&
|
||||
Connection->State.Connected &&
|
||||
RecvState.UpdatePartitionId) {
|
||||
if (QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
|
||||
RecvState.PartitionIndex += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||
}
|
||||
QUIC_DBG_ASSERT(RecvState.PartitionIndex != QuicPartitionIdGetIndex(Connection->PartitionID));
|
||||
Connection->PartitionID = QuicPartitionIdCreate(RecvState.PartitionIndex);
|
||||
QuicConnGenerateNewSourceCids(Connection, TRUE);
|
||||
|
|
|
@ -633,3 +633,8 @@ int64_t
|
|||
QuicTimeEpochMs64(
|
||||
void
|
||||
);
|
||||
|
||||
BOOLEAN
|
||||
QuicRegistrationIsSplitPartitioning(
|
||||
_In_ const QUIC_REGISTRATION* Registration
|
||||
);
|
||||
|
|
|
@ -113,6 +113,12 @@ typedef struct QUIC_PATH QUIC_PATH;
|
|||
//
|
||||
#define QUIC_MAX_PARTITION_COUNT 64
|
||||
|
||||
//
|
||||
// The number of partitions (cores) to offset from the receive (RSS) core when
|
||||
// using the QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT profile.
|
||||
//
|
||||
#define QUIC_MAX_THROUGHPUT_PARTITION_OFFSET 2 // Two to skip over hyper-threaded cores
|
||||
|
||||
//
|
||||
// The fraction ((0 to UINT16_MAX) / UINT16_MAX) of memory that must be
|
||||
// exhausted before enabling retry.
|
||||
|
|
|
@ -79,10 +79,13 @@ MsQuicRegistrationOpen(
|
|||
switch (Registration->ExecProfile) {
|
||||
default:
|
||||
case QUIC_EXECUTION_PROFILE_LOW_LATENCY:
|
||||
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC;
|
||||
WorkerThreadFlags =
|
||||
QUIC_THREAD_FLAG_SET_IDEAL_PROC;
|
||||
break;
|
||||
case QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT:
|
||||
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC | QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
||||
WorkerThreadFlags =
|
||||
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
|
||||
QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
||||
break;
|
||||
case QUIC_EXECUTION_PROFILE_TYPE_SCAVENGER:
|
||||
WorkerThreadFlags = 0;
|
||||
|
@ -91,8 +94,7 @@ MsQuicRegistrationOpen(
|
|||
case QUIC_EXECUTION_PROFILE_TYPE_REAL_TIME:
|
||||
WorkerThreadFlags =
|
||||
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
|
||||
QUIC_THREAD_FLAG_SET_AFFINITIZE |
|
||||
QUIC_THREAD_FLAG_HIGH_PRIORITY;
|
||||
QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -324,16 +326,16 @@ QuicRegistrationAcceptConnection(
|
|||
_In_ QUIC_CONNECTION* Connection
|
||||
)
|
||||
{
|
||||
if (Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT) {
|
||||
if (QuicRegistrationIsSplitPartitioning(Registration)) {
|
||||
//
|
||||
// TODO - Figure out how to check to see if hyper-threading was enabled first
|
||||
// TODO - Constrain ++PartitionID to the same NUMA node.
|
||||
// TODO - Constrain PartitionID to the same NUMA node.
|
||||
//
|
||||
// When hyper-threading is enabled, better bulk throughput can sometimes
|
||||
// be gained by sharing the same physical core, but not the logical one.
|
||||
// The shared one is always one greater than the RSS core.
|
||||
//
|
||||
Connection->PartitionID++;
|
||||
Connection->PartitionID += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||
}
|
||||
|
||||
uint8_t Index =
|
||||
|
|
|
@ -89,6 +89,23 @@ typedef struct QUIC_REGISTRATION {
|
|||
#define QUIC_REG_VERIFY(Registration, Expr)
|
||||
#endif
|
||||
|
||||
inline
|
||||
BOOLEAN
|
||||
QuicRegistrationIsSplitPartitioning(
|
||||
_In_ const QUIC_REGISTRATION* Registration
|
||||
)
|
||||
{
|
||||
//
|
||||
// When hyper-threading is enabled, better bulk throughput can sometimes
|
||||
// be gained by sharing the same physical core, but not the logical one.
|
||||
// The shared core is always one greater than the RSS core.
|
||||
//
|
||||
// TODO - Figure out how to check to see if hyper-threading is enabled
|
||||
// TODO - Constrain ++PartitionID to the same NUMA node.
|
||||
//
|
||||
return Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT;
|
||||
}
|
||||
|
||||
//
|
||||
// Tracing rundown for the registration.
|
||||
//
|
||||
|
|
|
@ -50,6 +50,7 @@ PrintUsage()
|
|||
printf(
|
||||
#if _WIN32
|
||||
" -comp:<####> The compartment ID to run in.\n"
|
||||
" -core:<####> The CPU core to use for the main thread.\n"
|
||||
#endif
|
||||
" -alpn:<str> The ALPN to use. (def:%s)\n"
|
||||
" -port:<####> The UDP port of the server. (def:%u)\n"
|
||||
|
@ -109,6 +110,11 @@ ParseCommonCommands(
|
|||
printf("Running in Compartment %d\n", compartmentid);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t cpuCore;
|
||||
if (TryGetValue(argc, argv, "core", &cpuCore)) {
|
||||
SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(1ull << cpuCore));
|
||||
}
|
||||
#endif
|
||||
|
||||
const char* alpn = DEFAULT_ALPN;
|
||||
|
|
Загрузка…
Ссылка в новой задаче