зеркало из https://github.com/microsoft/msquic.git
Adds Several Performance Related Fixes (#405)
This commit is contained in:
Родитель
80fa7359ae
Коммит
5c6f14469e
|
@ -4355,6 +4355,11 @@ QuicConnRecvDatagrams(
|
||||||
uint32_t ReleaseChainCount = 0;
|
uint32_t ReleaseChainCount = 0;
|
||||||
QUIC_RECEIVE_PROCESSING_STATE RecvState = { FALSE, FALSE, 0 };
|
QUIC_RECEIVE_PROCESSING_STATE RecvState = { FALSE, FALSE, 0 };
|
||||||
RecvState.PartitionIndex = QuicPartitionIdGetIndex(Connection->PartitionID);
|
RecvState.PartitionIndex = QuicPartitionIdGetIndex(Connection->PartitionID);
|
||||||
|
if (Connection->Registration &&
|
||||||
|
QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
|
||||||
|
QUIC_DBG_ASSERT(RecvState.PartitionIndex != 0);
|
||||||
|
RecvState.PartitionIndex -= QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||||
|
}
|
||||||
|
|
||||||
UNREFERENCED_PARAMETER(DatagramChainCount);
|
UNREFERENCED_PARAMETER(DatagramChainCount);
|
||||||
|
|
||||||
|
@ -4588,6 +4593,9 @@ QuicConnRecvDatagrams(
|
||||||
if (!Connection->State.UpdateWorker &&
|
if (!Connection->State.UpdateWorker &&
|
||||||
Connection->State.Connected &&
|
Connection->State.Connected &&
|
||||||
RecvState.UpdatePartitionId) {
|
RecvState.UpdatePartitionId) {
|
||||||
|
if (QuicRegistrationIsSplitPartitioning(Connection->Registration)) {
|
||||||
|
RecvState.PartitionIndex += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||||
|
}
|
||||||
QUIC_DBG_ASSERT(RecvState.PartitionIndex != QuicPartitionIdGetIndex(Connection->PartitionID));
|
QUIC_DBG_ASSERT(RecvState.PartitionIndex != QuicPartitionIdGetIndex(Connection->PartitionID));
|
||||||
Connection->PartitionID = QuicPartitionIdCreate(RecvState.PartitionIndex);
|
Connection->PartitionID = QuicPartitionIdCreate(RecvState.PartitionIndex);
|
||||||
QuicConnGenerateNewSourceCids(Connection, TRUE);
|
QuicConnGenerateNewSourceCids(Connection, TRUE);
|
||||||
|
|
|
@ -633,3 +633,8 @@ int64_t
|
||||||
QuicTimeEpochMs64(
|
QuicTimeEpochMs64(
|
||||||
void
|
void
|
||||||
);
|
);
|
||||||
|
|
||||||
|
BOOLEAN
|
||||||
|
QuicRegistrationIsSplitPartitioning(
|
||||||
|
_In_ const QUIC_REGISTRATION* Registration
|
||||||
|
);
|
||||||
|
|
|
@ -113,6 +113,12 @@ typedef struct QUIC_PATH QUIC_PATH;
|
||||||
//
|
//
|
||||||
#define QUIC_MAX_PARTITION_COUNT 64
|
#define QUIC_MAX_PARTITION_COUNT 64
|
||||||
|
|
||||||
|
//
|
||||||
|
// The number of partitions (cores) to offset from the receive (RSS) core when
|
||||||
|
// using the QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT profile.
|
||||||
|
//
|
||||||
|
#define QUIC_MAX_THROUGHPUT_PARTITION_OFFSET 2 // Two to skip over hyper-threaded cores
|
||||||
|
|
||||||
//
|
//
|
||||||
// The fraction ((0 to UINT16_MAX) / UINT16_MAX) of memory that must be
|
// The fraction ((0 to UINT16_MAX) / UINT16_MAX) of memory that must be
|
||||||
// exhausted before enabling retry.
|
// exhausted before enabling retry.
|
||||||
|
|
|
@ -79,10 +79,13 @@ MsQuicRegistrationOpen(
|
||||||
switch (Registration->ExecProfile) {
|
switch (Registration->ExecProfile) {
|
||||||
default:
|
default:
|
||||||
case QUIC_EXECUTION_PROFILE_LOW_LATENCY:
|
case QUIC_EXECUTION_PROFILE_LOW_LATENCY:
|
||||||
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC;
|
WorkerThreadFlags =
|
||||||
|
QUIC_THREAD_FLAG_SET_IDEAL_PROC;
|
||||||
break;
|
break;
|
||||||
case QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT:
|
case QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT:
|
||||||
WorkerThreadFlags = QUIC_THREAD_FLAG_SET_IDEAL_PROC | QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
WorkerThreadFlags =
|
||||||
|
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
|
||||||
|
QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
||||||
break;
|
break;
|
||||||
case QUIC_EXECUTION_PROFILE_TYPE_SCAVENGER:
|
case QUIC_EXECUTION_PROFILE_TYPE_SCAVENGER:
|
||||||
WorkerThreadFlags = 0;
|
WorkerThreadFlags = 0;
|
||||||
|
@ -91,8 +94,7 @@ MsQuicRegistrationOpen(
|
||||||
case QUIC_EXECUTION_PROFILE_TYPE_REAL_TIME:
|
case QUIC_EXECUTION_PROFILE_TYPE_REAL_TIME:
|
||||||
WorkerThreadFlags =
|
WorkerThreadFlags =
|
||||||
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
|
QUIC_THREAD_FLAG_SET_IDEAL_PROC |
|
||||||
QUIC_THREAD_FLAG_SET_AFFINITIZE |
|
QUIC_THREAD_FLAG_SET_AFFINITIZE;
|
||||||
QUIC_THREAD_FLAG_HIGH_PRIORITY;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,16 +326,16 @@ QuicRegistrationAcceptConnection(
|
||||||
_In_ QUIC_CONNECTION* Connection
|
_In_ QUIC_CONNECTION* Connection
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
if (Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT) {
|
if (QuicRegistrationIsSplitPartitioning(Registration)) {
|
||||||
//
|
//
|
||||||
// TODO - Figure out how to check to see if hyper-threading was enabled first
|
// TODO - Figure out how to check to see if hyper-threading was enabled first
|
||||||
// TODO - Constrain ++PartitionID to the same NUMA node.
|
// TODO - Constrain PartitionID to the same NUMA node.
|
||||||
//
|
//
|
||||||
// When hyper-threading is enabled, better bulk throughput can sometimes
|
// When hyper-threading is enabled, better bulk throughput can sometimes
|
||||||
// be gained by sharing the same physical core, but not the logical one.
|
// be gained by sharing the same physical core, but not the logical one.
|
||||||
// The shared one is always one greater than the RSS core.
|
// The shared one is always one greater than the RSS core.
|
||||||
//
|
//
|
||||||
Connection->PartitionID++;
|
Connection->PartitionID += QUIC_MAX_THROUGHPUT_PARTITION_OFFSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t Index =
|
uint8_t Index =
|
||||||
|
|
|
@ -89,6 +89,23 @@ typedef struct QUIC_REGISTRATION {
|
||||||
#define QUIC_REG_VERIFY(Registration, Expr)
|
#define QUIC_REG_VERIFY(Registration, Expr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
inline
|
||||||
|
BOOLEAN
|
||||||
|
QuicRegistrationIsSplitPartitioning(
|
||||||
|
_In_ const QUIC_REGISTRATION* Registration
|
||||||
|
)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// When hyper-threading is enabled, better bulk throughput can sometimes
|
||||||
|
// be gained by sharing the same physical core, but not the logical one.
|
||||||
|
// The shared core is always one greater than the RSS core.
|
||||||
|
//
|
||||||
|
// TODO - Figure out how to check to see if hyper-threading is enabled
|
||||||
|
// TODO - Constrain ++PartitionID to the same NUMA node.
|
||||||
|
//
|
||||||
|
return Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Tracing rundown for the registration.
|
// Tracing rundown for the registration.
|
||||||
//
|
//
|
||||||
|
|
|
@ -50,6 +50,7 @@ PrintUsage()
|
||||||
printf(
|
printf(
|
||||||
#if _WIN32
|
#if _WIN32
|
||||||
" -comp:<####> The compartment ID to run in.\n"
|
" -comp:<####> The compartment ID to run in.\n"
|
||||||
|
" -core:<####> The CPU core to use for the main thread.\n"
|
||||||
#endif
|
#endif
|
||||||
" -alpn:<str> The ALPN to use. (def:%s)\n"
|
" -alpn:<str> The ALPN to use. (def:%s)\n"
|
||||||
" -port:<####> The UDP port of the server. (def:%u)\n"
|
" -port:<####> The UDP port of the server. (def:%u)\n"
|
||||||
|
@ -109,6 +110,11 @@ ParseCommonCommands(
|
||||||
printf("Running in Compartment %d\n", compartmentid);
|
printf("Running in Compartment %d\n", compartmentid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint8_t cpuCore;
|
||||||
|
if (TryGetValue(argc, argv, "core", &cpuCore)) {
|
||||||
|
SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)(1ull << cpuCore));
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const char* alpn = DEFAULT_ALPN;
|
const char* alpn = DEFAULT_ALPN;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче