diff --git a/.azure/azure-pipelines.ci.yml b/.azure/azure-pipelines.ci.yml index b1ab5c65d..77ccc6dd2 100644 --- a/.azure/azure-pipelines.ci.yml +++ b/.azure/azure-pipelines.ci.yml @@ -121,6 +121,26 @@ stages: config: Release extraName: 'static' extraBuildArgs: -EnableTelemetryAsserts -Static -ExtraArtifactDir Static + - template: ./templates/build-config-user.yml + parameters: + image: windows-2019 + platform: windows + arch: x64 + tls: schannel + config: Release + extraName: 'xdp' + extraPrepareArgs: -Xdp + extraBuildArgs: -EnableTelemetryAsserts -UseXdp -ExtraArtifactDir xdp + - template: ./templates/build-config-user.yml + parameters: + image: windows-2019 + platform: windows + arch: x64 + tls: openssl + config: Release + extraName: 'xdp' + extraPrepareArgs: -Xdp + extraBuildArgs: -EnableTelemetryAsserts -UseXdp -ExtraArtifactDir xdp - stage: build_windows_debug displayName: Build Windows - Debug diff --git a/.azure/templates/build-config-user.yml b/.azure/templates/build-config-user.yml index e94442167..488e60a42 100644 --- a/.azure/templates/build-config-user.yml +++ b/.azure/templates/build-config-user.yml @@ -7,6 +7,7 @@ parameters: arch: '' tls: '' config: 'Debug,Release' + extraPrepareArgs: '' extraBuildArgs: '' skipArtifacts: false extraName: '' @@ -50,7 +51,7 @@ jobs: inputs: pwsh: true filePath: scripts/prepare-machine.ps1 - arguments: -Configuration Build -InitSubmodules -Tls ${{ parameters.tls }} -FailOnError -Extra '${{ parameters.extraBuildArgs }}' + arguments: -Configuration Build -InitSubmodules -Tls ${{ parameters.tls }} -FailOnError -Extra '${{ parameters.extraBuildArgs }}' ${{ parameters.extraPrepareArgs }} - task: PowerShell@2 displayName: Build Source Code (Debug) diff --git a/CMakeLists.txt b/CMakeLists.txt index 83426c2a2..c7dc5d72b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,7 @@ option(QUIC_TELEMETRY_ASSERTS "Enable telemetry asserts in release builds" OFF) option(QUIC_USE_SYSTEM_LIBCRYPTO "Use system libcrypto if openssl TLS" OFF) option(QUIC_HIGH_RES_TIMERS "Configure the system to use high resolution timers" OFF) option(QUIC_SHARED_EC "Use shared execution contexts between QUIC and UDP" OFF) +option(QUIC_USE_XDP "Uses XDP instead of socket APIs" OFF) option(QUIC_DISABLE_POSIX_GSO "Disable GSO for systems that say they support it but don't" OFF) set(QUIC_FOLDER_PREFIX "" CACHE STRING "Optional prefix for source group folders when using an IDE generator") set(QUIC_LIBRARY_NAME "msquic" CACHE STRING "Override the output library name") @@ -288,6 +289,10 @@ if(QUIC_SHARED_EC) list(APPEND QUIC_COMMON_DEFINES QUIC_USE_EXECUTION_CONTEXTS=1) endif() +if(QUIC_USE_XDP) + list(APPEND QUIC_COMMON_DEFINES QUIC_USE_EXECUTION_CONTEXTS=1 QUIC_USE_RAW_DATAPATH=1) +endif() + if(QUIC_TLS STREQUAL "schannel") message(STATUS "Enabling Schannel configuration tests") list(APPEND QUIC_COMMON_DEFINES QUIC_TEST_SCHANNEL_FLAGS=1) diff --git a/scripts/build.ps1 b/scripts/build.ps1 index c1de7b521..b46ce74cb 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -87,6 +87,9 @@ This script provides helpers for building msquic. .PARAMETER SharedEC Uses shared execution contexts (threads) where possible. +.PARAMETER UseXdp + Use XDP for the datapath instead of system socket APIs. + .PARAMETER ExtraArtifactDir Add an extra classifier to the artifact directory to allow publishing alternate builds of same base library @@ -187,6 +190,9 @@ param ( [Parameter(Mandatory = $false)] [switch]$SharedEC = $false, + [Parameter(Mandatory = $false)] + [switch]$UseXdp = $false, + [Parameter(Mandatory = $false)] [string]$ExtraArtifactDir = "", @@ -411,6 +417,9 @@ function CMake-Generate { if ($SharedEC) { $Arguments += " -DQUIC_SHARED_EC=on" } + if ($UseXdp) { + $Arguments += " -DQUIC_USE_XDP=on" + } if ($Platform -eq "android") { $env:PATH = "$env:ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin:$env:PATH" switch ($Arch) { diff --git a/scripts/clog.inputs b/scripts/clog.inputs index 3ed0c1f6b..17c3b9796 100644 --- a/scripts/clog.inputs +++ b/scripts/clog.inputs @@ -13,6 +13,10 @@ ../src/platform/certificates_posix.c ../src/platform/hashtable.c ../src/platform/datapath_winuser.c +../src/platform/datapath_raw_dpdk.c +../src/platform/datapath_raw_socket.c +../src/platform/datapath_raw_xdp.c +../src/platform/datapath_raw.c ../src/platform/crypt_bcrypt.c ../src/platform/platform_winuser.c ../src/platform/toeplitz.c diff --git a/scripts/lola-perf.ps1 b/scripts/lola-perf.ps1 new file mode 100644 index 000000000..6cdb6ed53 --- /dev/null +++ b/scripts/lola-perf.ps1 @@ -0,0 +1,106 @@ +<# + +.SYNOPSIS +This script runs performance tests for LoLa using secnetperf and generates results in a table. + +.PARAMETER SecNetPerfBinary + Specifies the secnetperf binary to use. + +.PARAMETER Target + Specifies -target parameter for secnetperf. + +.PARAMETER Bind + Specifies -bind parameter for secnetperf. + +.PARAMETER Responses + Specifies -response parameter for secnetperf. + +.PARAMETER NumIterations + Specifies the number of iterations to be run. +#> + +param ( + [Parameter(Mandatory = $true)] + [string]$SecNetPerfBinary, + [Parameter(Mandatory = $false)] + [string]$Target = "quic-server", + [Parameter(Mandatory = $false)] + [string]$Bind = "0.0.0.0", + [Parameter(Mandatory = $false)] + [Int32[]]$Responses = @(512, 1024, 4096, 8192, 16384, 32768, 65536), + [Parameter(Mandatory = $false)] + [Int32]$NumIterations = 3 +) + +class TestResult { + [string]$ResponseSize + [Int32]$Min + [Int32]$P50 + [Int32]$P90 + [Int32]$P99 + [Int32]$P999 + [Int32]$P9999 +} + +[Int32]$script:TotalNumTestCases = 0 +[Int32]$script:NumTestCasesCompleted = 0 + +function RunTest ( + [string]$ResponseSize, + [Int32]$NumIterations + ) +{ + $Result = [TestResult]::new() + $Result.ResponseSize = $ResponseSize + + [System.Collections.ArrayList]$Min = @() + [System.Collections.ArrayList]$P50 = @(); + [System.Collections.ArrayList]$P90 = @(); + [System.Collections.ArrayList]$P99 = @(); + [System.Collections.ArrayList]$P999 = @(); + [System.Collections.ArrayList]$P9999 = @(); + + for ($i = 0; $i -lt $NumIterations; $i++) { + $Output = Invoke-Expression "$SecNetPerfBinary -test:rps -target:$Target -bind:$Bind -conns:1 -requests:1 -request:512 -response:$ResponseSize" + $MatchResults = $Output | Select-String -Pattern "Result: .*? RPS, Min: (.*?), Max: .*?, 50th: (.*?), 90th: (.*?), 99th: (.*?), 99.9th: (.*?), 99.99th: (.*?)," + if (!$MatchResults) { + Write-Error "Failed to parse secnetperf output" + } + + $Groups = $MatchResults.Matches.Groups + + Write-Debug "$ResponseSize,$([Int32]$Groups[1].Value),$([Int32]$Groups[2].Value),$([Int32]$Groups[3].Value),$([Int32]$Groups[4].Value),$([Int32]$Groups[5].Value),$([Int32]$Groups[6].Value)" + + $_ = $Min.Add([Int32]$Groups[1].Value) + $_ = $P50.Add([Int32]$Groups[2].Value) + $_ = $P90.Add([Int32]$Groups[3].Value) + $_ = $P99.Add([Int32]$Groups[4].Value) + $_ = $P999.Add([Int32]$Groups[5].Value) + $_ = $P9999.Add([Int32]$Groups[6].Value) + + $script:NumTestCasesCompleted += 1 + Write-Progress -Activity "Running tests" -Status "Progress:" -PercentComplete (($script:NumTestCasesCompleted / $script:TotalNumTestCases) * 100) + } + + $Result.Min = ($Min | Sort-Object)[$Min.Count / 2] + $Result.P50 = ($P50 | Sort-Object)[$Min.Count / 2] + $Result.P90 = ($P90 | Sort-Object)[$Min.Count / 2] + $Result.P99 = ($P99 | Sort-Object)[$Min.Count / 2] + $Result.P999 = ($P999 | Sort-Object)[$Min.Count / 2] + $Result.P9999 = ($P9999 | Sort-Object)[$Min.Count / 2] + + return $Result +} + +[System.Collections.ArrayList]$Results = @() + +$script:TotalNumTestCases = $Responses.Count * $NumIterations + +Write-Debug "ResponseSize,Min,P50,P90,P99,P999,P9999" + +foreach ($Response in $Responses) { + $Result = RunTest $Response $NumIterations + $_ = $Results.Add($Result) +} + +$Results | Format-Table -AutoSize diff --git a/scripts/prepare-machine.ps1 b/scripts/prepare-machine.ps1 index bb778e5c8..7b6d089ac 100644 --- a/scripts/prepare-machine.ps1 +++ b/scripts/prepare-machine.ps1 @@ -63,7 +63,10 @@ param ( [switch]$DuoNic, [Parameter(Mandatory = $false)] - [switch]$NoCodeCoverage + [switch]$NoCodeCoverage, + + [Parameter(Mandatory = $false)] + [switch]$Xdp ) #Requires -RunAsAdministrator @@ -133,6 +136,18 @@ function Download-CoreNet-Deps { } } +function Download-Xdp-Kit { + if (!(Test-Path $ArtifactsPath)) { mkdir $ArtifactsPath } + $XdpPath = Join-Path $ArtifactsPath "xdp" + if (!(Test-Path $XdpPath)) { + Write-Host "Downloading XDP Kit" + $ZipPath = Join-Path $ArtifactsPath "xdp.zip" + Invoke-WebRequest -Uri "https://lolafiles.blob.core.windows.net/nibanks/xdp.zip" -OutFile $ZipPath + Expand-Archive -Path $ZipPath -DestinationPath $XdpPath -Force + Remove-Item -Path $ZipPath + } +} + # Installs DuoNic from the CoreNet-CI repo. function Install-DuoNic { # Check to see if test signing is enabled. @@ -323,6 +338,10 @@ if ($IsWindows) { } } + if ($Xdp) { + Download-Xdp-Kit + } + } elseif ($IsLinux) { switch ($Configuration) { "Build" { diff --git a/scripts/update-sidecar.ps1 b/scripts/update-sidecar.ps1 index b25c2a6d1..b22f4f072 100644 --- a/scripts/update-sidecar.ps1 +++ b/scripts/update-sidecar.ps1 @@ -66,6 +66,5 @@ Invoke-Expression "${ClogDir}/clog -p stubs --scopePrefix quic.clog -s $Sidecar Invoke-Expression "${ClogDir}/clog -p linux --dynamicTracepointProvider --scopePrefix quic.clog -s $Sidecar -c $ConfigFile --outputDirectory (Join-Path $OutputDir linux) --inputFiles $allFiles" Invoke-Expression "${ClogDir}/clog -p macos --scopePrefix quic.clog -s $Sidecar -c $ConfigFile --outputDirectory $TmpOutputDir --inputFiles $allFiles" - # Return to where we started Set-Location $OrigDir diff --git a/src/core/connection.c b/src/core/connection.c index 57809c717..ee68911e0 100644 --- a/src/core/connection.c +++ b/src/core/connection.c @@ -184,8 +184,7 @@ QuicConnAlloc( Connection->Stats.QuicVersion = Packet->Invariant->LONG_HDR.Version; QuicConnOnQuicVersionSet(Connection); - - Path->Route = *Datagram->Route; + QuicCopyRouteInfo(&Path->Route, Datagram->Route); Connection->State.LocalAddressSet = TRUE; Connection->State.RemoteAddressSet = TRUE; @@ -758,7 +757,7 @@ QuicConnQueueOper( _In_ QUIC_OPERATION* Oper ) { - #if DEBUG +#if DEBUG if (!Connection->State.Initialized) { CXPLAT_DBG_ASSERT(QuicConnIsServer(Connection)); CXPLAT_DBG_ASSERT(Connection->SourceCids.Next != NULL || CxPlatIsRandomMemoryFailureEnabled()); @@ -3076,6 +3075,46 @@ QuicConnQueueUnreachable( } } +#ifdef QUIC_USE_RAW_DATAPATH +_IRQL_requires_max_(DISPATCH_LEVEL) +_Function_class_(CXPLAT_ROUTE_RESOLUTION_CALLBACK) +void +QuicConnQueueRouteCompletion( + _Inout_ QUIC_CONNECTION* Connection, + _When_(Succeeded == FALSE, _Reserved_) + _When_(Succeeded == TRUE, _In_reads_bytes_(6)) + const uint8_t* PhysicalAddress, + _In_ uint8_t PathId, + _In_ BOOLEAN Succeeded + ) +{ + QUIC_OPERATION* ConnOper = + QuicOperationAlloc(Connection->Worker, QUIC_OPER_TYPE_ROUTE_COMPLETION); + if (ConnOper != NULL) { + ConnOper->ROUTE.Succeeded = Succeeded; + ConnOper->ROUTE.PathId = PathId; + if (Succeeded) { + memcpy(ConnOper->ROUTE.PhysicalAddress, PhysicalAddress, sizeof(ConnOper->ROUTE.PhysicalAddress)); + } + QuicConnQueueOper(Connection, ConnOper); + } else { + if (InterlockedCompareExchange16((short*)&Connection->BackUpOperUsed, 1, 0) == 0) { + QUIC_OPERATION* Oper = &Connection->BackUpOper; + Oper->FreeAfterProcess = FALSE; + Oper->Type = QUIC_OPER_TYPE_API_CALL; + Oper->API_CALL.Context = &Connection->BackupApiContext; + Oper->API_CALL.Context->Type = QUIC_API_TYPE_CONN_SHUTDOWN; + Oper->API_CALL.Context->CONN_SHUTDOWN.Flags = QUIC_CONNECTION_SHUTDOWN_FLAG_SILENT; + Oper->API_CALL.Context->CONN_SHUTDOWN.ErrorCode = QUIC_ERROR_INTERNAL_ERROR; + Oper->API_CALL.Context->CONN_SHUTDOWN.RegistrationShutdown = FALSE; + QuicConnQueueHighestPriorityOper(Connection, Oper); + } + } + + QuicConnRelease(Connection, QUIC_CONN_REF_ROUTE); +} +#endif // QUIC_USE_RAW_DATAPATH + // // Updates the current destination CID to the received packet's source CID, if // not already equal. Only used during the handshake, on the client side. @@ -5656,6 +5695,57 @@ QuicConnProcessUdpUnreachable( } } +#ifdef QUIC_USE_RAW_DATAPATH +_IRQL_requires_max_(PASSIVE_LEVEL) +void +QuicConnProcessRouteCompletion( + _In_ QUIC_CONNECTION* Connection, + _In_ const uint8_t* PhysicalAddress, + _In_ uint8_t PathId, + _In_ BOOLEAN Succeeded + ) +{ + uint8_t PathIndex; + QUIC_PATH* Path = QuicConnGetPathByID(Connection, PathId, &PathIndex); + if (Path != NULL) { + if (Succeeded) { + QuicTraceLogConnInfo( + SuccessfulRouteResolution, + Connection, + "Processing successful route completion Path[%hhu]", + PathId); + CxPlatResolveRouteComplete(Connection, &Path->Route, PhysicalAddress, PathId); + QuicSendQueueFlush(&Connection->Send, REASON_ROUTE_COMPLETION); + } else { + // + // Kill the path that failed route resolution and make the next path active if possible. + // + if (Path->IsActive && Connection->PathsCount > 1) { + QuicTraceLogConnInfo( + FailedRouteResolution, + Connection, + "Processing failed route completion Path[%hhu]", + PathId); + QuicPathSetActive(Connection, &Connection->Paths[1]); + QuicSendQueueFlush(&Connection->Send, REASON_ROUTE_COMPLETION); + } + QuicPathRemove(Connection, PathIndex); + } + } + + if (Connection->PathsCount == 0) { + // + // Close the connection since the peer is unreachable. + // + QuicConnCloseLocally( + Connection, + QUIC_CLOSE_INTERNAL_SILENT | QUIC_CLOSE_QUIC_STATUS, + (uint64_t)QUIC_STATUS_UNREACHABLE, + NULL); + } +} +#endif // QUIC_USE_RAW_DATAPATH + _IRQL_requires_max_(PASSIVE_LEVEL) void QuicConnResetIdleTimeout( @@ -7063,6 +7153,13 @@ QuicConnDrainOperations( QuicConnTraceRundownOper(Connection); break; +#ifdef QUIC_USE_RAW_DATAPATH + case QUIC_OPER_TYPE_ROUTE_COMPLETION: + QuicConnProcessRouteCompletion( + Connection, Oper->ROUTE.PhysicalAddress, Oper->ROUTE.PathId, Oper->ROUTE.Succeeded); + break; +#endif // QUIC_USE_RAW_DATAPATH + default: CXPLAT_FRE_ASSERT(FALSE); break; diff --git a/src/core/connection.h b/src/core/connection.h index aa84e0220..b872f8993 100644 --- a/src/core/connection.h +++ b/src/core/connection.h @@ -195,6 +195,7 @@ typedef enum QUIC_CONNECTION_REF { QUIC_CONN_REF_LOOKUP_TABLE, // Per registered CID. QUIC_CONN_REF_LOOKUP_RESULT, // For connections returned from lookups. QUIC_CONN_REF_WORKER, // Worker is (queued for) processing. + QUIC_CONN_REF_ROUTE, // Route resolution is undergoing. QUIC_CONN_REF_COUNT @@ -1403,6 +1404,23 @@ QuicConnQueueUnreachable( _In_ const QUIC_ADDR* RemoteAddress ); +#ifdef QUIC_USE_RAW_DATAPATH +// +// Queues a route completion event to a connection for processing. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +_Function_class_(CXPLAT_ROUTE_RESOLUTION_CALLBACK) +void +QuicConnQueueRouteCompletion( + _Inout_ QUIC_CONNECTION* Connection, + _When_(Succeeded == FALSE, _Reserved_) + _When_(Succeeded == TRUE, _In_reads_bytes_(6)) + const uint8_t* PhysicalAddress, + _In_ uint8_t PathId, + _In_ BOOLEAN Succeeded + ); +#endif // QUIC_USE_RAW_DATAPATH + // // Queues up an update to the packet tolerance we want the peer to use. // diff --git a/src/core/library.c b/src/core/library.c index 36eb9cd90..6a7713cb1 100644 --- a/src/core/library.c +++ b/src/core/library.c @@ -247,7 +247,7 @@ MsQuicLibraryInitialize( uint32_t DefaultMaxPartitionCount = QUIC_MAX_PARTITION_COUNT; const CXPLAT_UDP_DATAPATH_CALLBACKS DatapathCallbacks = { QuicBindingReceive, - QuicBindingUnreachable + QuicBindingUnreachable, }; Status = CxPlatInitialize(); diff --git a/src/core/operation.h b/src/core/operation.h index dad6356cf..45c30659c 100644 --- a/src/core/operation.h +++ b/src/core/operation.h @@ -30,6 +30,7 @@ typedef enum QUIC_OPERATION_TYPE { QUIC_OPER_TYPE_DEPRECATED, // No longer used. QUIC_OPER_TYPE_TIMER_EXPIRED, // A timer expired. QUIC_OPER_TYPE_TRACE_RUNDOWN, // A trace rundown was triggered. + QUIC_OPER_TYPE_ROUTE_COMPLETION, // Process route completion event. // // All stateless operations follow. @@ -226,6 +227,11 @@ typedef struct QUIC_OPERATION { struct { QUIC_STATELESS_CONTEXT* Context; } STATELESS; // Stateless reset, retry and VN + struct { + uint8_t PhysicalAddress[6]; + uint8_t PathId; + BOOLEAN Succeeded; + } ROUTE; }; } QUIC_OPERATION; diff --git a/src/core/path.c b/src/core/path.c index 79a1e4d82..9619fb446 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -168,6 +168,20 @@ QuicConnGetPathByID( return NULL; } +_IRQL_requires_max_(PASSIVE_LEVEL) +void +QuicCopyRouteInfo( + _Inout_ CXPLAT_ROUTE* DstRoute, + _In_ CXPLAT_ROUTE* SrcRoute + ) +{ +#ifdef QUIC_USE_RAW_DATAPATH + CxPlatCopyMemory(DstRoute, SrcRoute, (uint8_t*)&SrcRoute->State - (uint8_t*)SrcRoute); +#else + *DstRoute = *SrcRoute; +#endif +} + _IRQL_requires_max_(PASSIVE_LEVEL) _Ret_maybenull_ QUIC_PATH* @@ -237,7 +251,7 @@ QuicConnGetPathForDatagram( Path->DestCid = Connection->Paths[0].DestCid; // TODO - Copy instead? } Path->Binding = Connection->Paths[0].Binding; - Path->Route = *Datagram->Route; + QuicCopyRouteInfo(&Path->Route, Datagram->Route); QuicPathValidate(Path); return Path; diff --git a/src/core/path.h b/src/core/path.h index 2b02b9a00..160475214 100644 --- a/src/core/path.h +++ b/src/core/path.h @@ -247,3 +247,10 @@ QuicConnGetPathForDatagram( _In_ QUIC_CONNECTION* Connection, _In_ const CXPLAT_RECV_DATA* Datagram ); + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +QuicCopyRouteInfo( + _Inout_ CXPLAT_ROUTE* DstRoute, + _In_ CXPLAT_ROUTE* SrcRoute + ); diff --git a/src/core/quicdef.h b/src/core/quicdef.h index d4592bb7a..c043de7b4 100644 --- a/src/core/quicdef.h +++ b/src/core/quicdef.h @@ -222,7 +222,7 @@ typedef struct QUIC_PATH QUIC_PATH; // // The initial stream FC window size reported to peers. // -#define QUIC_DEFAULT_STREAM_FC_WINDOW_SIZE 0x8000 // 32768 +#define QUIC_DEFAULT_STREAM_FC_WINDOW_SIZE 0x10000 // 65536 // // The initial stream receive buffer allocation size. diff --git a/src/core/send.c b/src/core/send.c index 623269cd7..c0827556c 100644 --- a/src/core/send.c +++ b/src/core/send.c @@ -114,6 +114,8 @@ QuicSendCanSendFlagsNow( return TRUE; } +#pragma warning(push) +#pragma warning(disable:6001) // SAL thinks Connection could be uninitialized? _IRQL_requires_max_(DISPATCH_LEVEL) void QuicSendQueueFlush( @@ -121,9 +123,38 @@ QuicSendQueueFlush( _In_ QUIC_SEND_FLUSH_REASON Reason ) { + QUIC_CONNECTION* Connection = QuicSendGetConnection(Send); + +#ifdef QUIC_USE_RAW_DATAPATH + QUIC_PATH* Path = &Connection->Paths[0]; + QUIC_STATUS Status; + + CXPLAT_DBG_ASSERT(Path->IsActive); + + if (Path->Route.State == RouteUnresolved) { + QuicConnAddRef(Connection, QUIC_CONN_REF_ROUTE); + Status = + CxPlatResolveRoute( + Path->Binding->Socket, &Path->Route, Path->ID, Connection, QuicConnQueueRouteCompletion); + if (Status == QUIC_STATUS_SUCCESS) { + QuicConnRelease(Connection, QUIC_CONN_REF_ROUTE); + } else { + // + // Route resolution failed or pended. We need to pause sending. + // + CXPLAT_DBG_ASSERT(Status == QUIC_STATUS_PENDING || QUIC_FAILED(Status)); + return; + } + } else if (Path->Route.State == RouteResolving) { + // + // Can't send now. Once route resolution completes, we will resume sending. + // + return; + } +#endif + if (!Send->FlushOperationPending && QuicSendCanSendFlagsNow(Send)) { QUIC_OPERATION* Oper; - QUIC_CONNECTION* Connection = QuicSendGetConnection(Send); if ((Oper = QuicOperationAlloc(Connection->Worker, QUIC_OPER_TYPE_FLUSH_SEND)) != NULL) { Send->FlushOperationPending = TRUE; QuicTraceEvent( @@ -135,6 +166,7 @@ QuicSendQueueFlush( } } } +#pragma warning(pop) _IRQL_requires_max_(PASSIVE_LEVEL) void diff --git a/src/core/send.h b/src/core/send.h index 4325ac41d..e47201370 100644 --- a/src/core/send.h +++ b/src/core/send.h @@ -287,7 +287,8 @@ typedef enum QUIC_SEND_FLUSH_REASON { REASON_STREAM_FLOW_CONTROL, REASON_STREAM_ID_FLOW_CONTROL, REASON_AMP_PROTECTION, - REASON_SCHEDULING + REASON_SCHEDULING, + REASON_ROUTE_COMPLETION, } QUIC_SEND_FLUSH_REASON; // diff --git a/src/generated/linux/connection.c.clog.h b/src/generated/linux/connection.c.clog.h index 6ba5c4f66..a737eebc6 100644 --- a/src/generated/linux/connection.c.clog.h +++ b/src/generated/linux/connection.c.clog.h @@ -698,6 +698,46 @@ tracepoint(CLOG_CONNECTION_C, Unreachable , arg1);\ +/*---------------------------------------------------------- +// Decoder Ring for SuccessfulRouteResolution +// [conn][%p] Processing successful route completion Path[%hhu] +// QuicTraceLogConnInfo( + SuccessfulRouteResolution, + Connection, + "Processing successful route completion Path[%hhu]", + PathId); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_SuccessfulRouteResolution +#define _clog_4_ARGS_TRACE_SuccessfulRouteResolution(uniqueId, arg1, encoded_arg_string, arg3)\ +tracepoint(CLOG_CONNECTION_C, SuccessfulRouteResolution , arg1, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for FailedRouteResolution +// [conn][%p] Processing failed route completion Path[%hhu] +// QuicTraceLogConnInfo( + FailedRouteResolution, + Connection, + "Processing failed route completion Path[%hhu]", + PathId); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_FailedRouteResolution +#define _clog_4_ARGS_TRACE_FailedRouteResolution(uniqueId, arg1, encoded_arg_string, arg3)\ +tracepoint(CLOG_CONNECTION_C, FailedRouteResolution , arg1, arg3);\ + +#endif + + + + /*---------------------------------------------------------- // Decoder Ring for UpdatePeerPacketTolerance // [conn][%p] Updating peer packet tolerance to %hhu diff --git a/src/generated/linux/connection.c.clog.h.lttng.h b/src/generated/linux/connection.c.clog.h.lttng.h index 467d8cf1c..2783bb3db 100644 --- a/src/generated/linux/connection.c.clog.h.lttng.h +++ b/src/generated/linux/connection.c.clog.h.lttng.h @@ -738,6 +738,52 @@ TRACEPOINT_EVENT(CLOG_CONNECTION_C, Unreachable, +/*---------------------------------------------------------- +// Decoder Ring for SuccessfulRouteResolution +// [conn][%p] Processing successful route completion Path[%hhu] +// QuicTraceLogConnInfo( + SuccessfulRouteResolution, + Connection, + "Processing successful route completion Path[%hhu]", + PathId); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_CONNECTION_C, SuccessfulRouteResolution, + TP_ARGS( + const void *, arg1, + unsigned char, arg3), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg1, arg1) + ctf_integer(unsigned char, arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for FailedRouteResolution +// [conn][%p] Processing failed route completion Path[%hhu] +// QuicTraceLogConnInfo( + FailedRouteResolution, + Connection, + "Processing failed route completion Path[%hhu]", + PathId); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_CONNECTION_C, FailedRouteResolution, + TP_ARGS( + const void *, arg1, + unsigned char, arg3), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg1, arg1) + ctf_integer(unsigned char, arg3, arg3) + ) +) + + + /*---------------------------------------------------------- // Decoder Ring for UpdatePeerPacketTolerance // [conn][%p] Updating peer packet tolerance to %hhu diff --git a/src/generated/linux/datapath_raw.c.clog.h b/src/generated/linux/datapath_raw.c.clog.h new file mode 100644 index 000000000..1015d18ff --- /dev/null +++ b/src/generated/linux/datapath_raw.c.clog.h @@ -0,0 +1,167 @@ +#ifndef CLOG_DO_NOT_INCLUDE_HEADER +#include +#endif +#undef TRACEPOINT_PROVIDER +#define TRACEPOINT_PROVIDER CLOG_DATAPATH_RAW_C +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_INCLUDE +#define TRACEPOINT_INCLUDE "datapath_raw.c.clog.h.lttng.h" +#if !defined(DEF_CLOG_DATAPATH_RAW_C) || defined(TRACEPOINT_HEADER_MULTI_READ) +#define DEF_CLOG_DATAPATH_RAW_C +#include +#define __int64 __int64_t +#include "datapath_raw.c.clog.h.lttng.h" +#endif +#include +#ifndef _clog_MACRO_QuicTraceEvent +#define _clog_MACRO_QuicTraceEvent 1 +#define QuicTraceEvent(a, ...) _clog_CAT(_clog_ARGN_SELECTOR(__VA_ARGS__), _clog_CAT(_,a(#a, __VA_ARGS__))) +#endif +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER)); +// arg2 = arg2 = "CXPLAT_DATAPATH" = arg2 +// arg3 = arg3 = sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER) = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_AllocFailure +#define _clog_4_ARGS_TRACE_AllocFailure(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_C, AllocFailure , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); +// arg2 = arg2 = Status = arg2 +// arg3 = arg3 = "CxPlatThreadCreate" = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_LibraryErrorStatus +#define _clog_4_ARGS_TRACE_LibraryErrorStatus(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_C, LibraryErrorStatus , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathCreated +// [data][%p] Created, local=%!ADDR!, remote=%!ADDR! +// QuicTraceEvent( + DatapathCreated, + "[data][%p] Created, local=%!ADDR!, remote=%!ADDR!", + *NewSocket, + CASTED_CLOG_BYTEARRAY(Config->LocalAddress ? sizeof(*Config->LocalAddress) : 0, Config->LocalAddress), + CASTED_CLOG_BYTEARRAY(Config->RemoteAddress ? sizeof(*Config->RemoteAddress) : 0, Config->RemoteAddress)); +// arg2 = arg2 = *NewSocket = arg2 +// arg3 = arg3 = CASTED_CLOG_BYTEARRAY(Config->LocalAddress ? sizeof(*Config->LocalAddress) : 0, Config->LocalAddress) = arg3 +// arg4 = arg4 = CASTED_CLOG_BYTEARRAY(Config->RemoteAddress ? sizeof(*Config->RemoteAddress) : 0, Config->RemoteAddress) = arg4 +----------------------------------------------------------*/ +#ifndef _clog_7_ARGS_TRACE_DatapathCreated +#define _clog_7_ARGS_TRACE_DatapathCreated(uniqueId, encoded_arg_string, arg2, arg3, arg3_len, arg4, arg4_len)\ +tracepoint(CLOG_DATAPATH_RAW_C, DatapathCreated , arg2, arg3_len, arg3, arg4_len, arg4);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathRecv +// [data][%p] Recv %u bytes (segment=%hu) Src=%!ADDR! Dst=%!ADDR! +// QuicTraceEvent( + DatapathRecv, + "[data][%p] Recv %u bytes (segment=%hu) Src=%!ADDR! Dst=%!ADDR!", + Socket, + Packets[i]->BufferLength, + Packets[i]->BufferLength, + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->LocalAddress), &Packets[i]->Route->LocalAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->RemoteAddress), &Packets[i]->Route->RemoteAddress)); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = Packets[i]->BufferLength = arg3 +// arg4 = arg4 = Packets[i]->BufferLength = arg4 +// arg5 = arg5 = CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->LocalAddress), &Packets[i]->Route->LocalAddress) = arg5 +// arg6 = arg6 = CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->RemoteAddress), &Packets[i]->Route->RemoteAddress) = arg6 +----------------------------------------------------------*/ +#ifndef _clog_9_ARGS_TRACE_DatapathRecv +#define _clog_9_ARGS_TRACE_DatapathRecv(uniqueId, encoded_arg_string, arg2, arg3, arg4, arg5, arg5_len, arg6, arg6_len)\ +tracepoint(CLOG_DATAPATH_RAW_C, DatapathRecv , arg2, arg3, arg4, arg5_len, arg5, arg6_len, arg6);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathSend +// [data][%p] Send %u bytes in %hhu buffers (segment=%hu) Dst=%!ADDR!, Src=%!ADDR! +// QuicTraceEvent( + DatapathSend, + "[data][%p] Send %u bytes in %hhu buffers (segment=%hu) Dst=%!ADDR!, Src=%!ADDR!", + Socket, + SendData->Buffer.Length, + 1, + (uint16_t)SendData->Buffer.Length, + CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = SendData->Buffer.Length = arg3 +// arg4 = arg4 = 1 = arg4 +// arg5 = arg5 = (uint16_t)SendData->Buffer.Length = arg5 +// arg6 = arg6 = CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress) = arg6 +// arg7 = arg7 = CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress) = arg7 +----------------------------------------------------------*/ +#ifndef _clog_10_ARGS_TRACE_DatapathSend +#define _clog_10_ARGS_TRACE_DatapathSend(uniqueId, encoded_arg_string, arg2, arg3, arg4, arg5, arg6, arg6_len, arg7, arg7_len)\ +tracepoint(CLOG_DATAPATH_RAW_C, DatapathSend , arg2, arg3, arg4, arg5, arg6_len, arg6, arg7_len, arg7);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathErrorStatus +// [data][%p] ERROR, %u, %s. +// QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Operation, + Status, + "ResolveIpNetEntry2"); +// arg2 = arg2 = Operation = arg2 +// arg3 = arg3 = Status = arg3 +// arg4 = arg4 = "ResolveIpNetEntry2" = arg4 +----------------------------------------------------------*/ +#ifndef _clog_5_ARGS_TRACE_DatapathErrorStatus +#define _clog_5_ARGS_TRACE_DatapathErrorStatus(uniqueId, encoded_arg_string, arg2, arg3, arg4)\ +tracepoint(CLOG_DATAPATH_RAW_C, DatapathErrorStatus , arg2, arg3, arg4);\ + +#endif + + + + +#ifdef __cplusplus +} +#endif +#ifdef CLOG_INLINE_IMPLEMENTATION +#include "quic.clog_datapath_raw.c.clog.h.c" +#endif diff --git a/src/generated/linux/datapath_raw.c.clog.h.lttng.h b/src/generated/linux/datapath_raw.c.clog.h.lttng.h new file mode 100644 index 000000000..a003606c8 --- /dev/null +++ b/src/generated/linux/datapath_raw.c.clog.h.lttng.h @@ -0,0 +1,186 @@ + + + +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER)); +// arg2 = arg2 = "CXPLAT_DATAPATH" = arg2 +// arg3 = arg3 = sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER) = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, AllocFailure, + TP_ARGS( + const char *, arg2, + unsigned long long, arg3), + TP_FIELDS( + ctf_string(arg2, arg2) + ctf_integer(uint64_t, arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); +// arg2 = arg2 = Status = arg2 +// arg3 = arg3 = "CxPlatThreadCreate" = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, LibraryErrorStatus, + TP_ARGS( + unsigned int, arg2, + const char *, arg3), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ctf_string(arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathCreated +// [data][%p] Created, local=%!ADDR!, remote=%!ADDR! +// QuicTraceEvent( + DatapathCreated, + "[data][%p] Created, local=%!ADDR!, remote=%!ADDR!", + *NewSocket, + CASTED_CLOG_BYTEARRAY(Config->LocalAddress ? sizeof(*Config->LocalAddress) : 0, Config->LocalAddress), + CASTED_CLOG_BYTEARRAY(Config->RemoteAddress ? sizeof(*Config->RemoteAddress) : 0, Config->RemoteAddress)); +// arg2 = arg2 = *NewSocket = arg2 +// arg3 = arg3 = CASTED_CLOG_BYTEARRAY(Config->LocalAddress ? sizeof(*Config->LocalAddress) : 0, Config->LocalAddress) = arg3 +// arg4 = arg4 = CASTED_CLOG_BYTEARRAY(Config->RemoteAddress ? sizeof(*Config->RemoteAddress) : 0, Config->RemoteAddress) = arg4 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, DatapathCreated, + TP_ARGS( + const void *, arg2, + unsigned int, arg3_len, + const void *, arg3, + unsigned int, arg4_len, + const void *, arg4), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_integer(unsigned int, arg3_len, arg3_len) + ctf_sequence(char, arg3, arg3, unsigned int, arg3_len) + ctf_integer(unsigned int, arg4_len, arg4_len) + ctf_sequence(char, arg4, arg4, unsigned int, arg4_len) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathRecv +// [data][%p] Recv %u bytes (segment=%hu) Src=%!ADDR! Dst=%!ADDR! +// QuicTraceEvent( + DatapathRecv, + "[data][%p] Recv %u bytes (segment=%hu) Src=%!ADDR! Dst=%!ADDR!", + Socket, + Packets[i]->BufferLength, + Packets[i]->BufferLength, + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->LocalAddress), &Packets[i]->Route->LocalAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->RemoteAddress), &Packets[i]->Route->RemoteAddress)); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = Packets[i]->BufferLength = arg3 +// arg4 = arg4 = Packets[i]->BufferLength = arg4 +// arg5 = arg5 = CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->LocalAddress), &Packets[i]->Route->LocalAddress) = arg5 +// arg6 = arg6 = CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->RemoteAddress), &Packets[i]->Route->RemoteAddress) = arg6 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, DatapathRecv, + TP_ARGS( + const void *, arg2, + unsigned int, arg3, + unsigned short, arg4, + unsigned int, arg5_len, + const void *, arg5, + unsigned int, arg6_len, + const void *, arg6), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_integer(unsigned int, arg3, arg3) + ctf_integer(unsigned short, arg4, arg4) + ctf_integer(unsigned int, arg5_len, arg5_len) + ctf_sequence(char, arg5, arg5, unsigned int, arg5_len) + ctf_integer(unsigned int, arg6_len, arg6_len) + ctf_sequence(char, arg6, arg6, unsigned int, arg6_len) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathSend +// [data][%p] Send %u bytes in %hhu buffers (segment=%hu) Dst=%!ADDR!, Src=%!ADDR! +// QuicTraceEvent( + DatapathSend, + "[data][%p] Send %u bytes in %hhu buffers (segment=%hu) Dst=%!ADDR!, Src=%!ADDR!", + Socket, + SendData->Buffer.Length, + 1, + (uint16_t)SendData->Buffer.Length, + CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = SendData->Buffer.Length = arg3 +// arg4 = arg4 = 1 = arg4 +// arg5 = arg5 = (uint16_t)SendData->Buffer.Length = arg5 +// arg6 = arg6 = CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress) = arg6 +// arg7 = arg7 = CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress) = arg7 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, DatapathSend, + TP_ARGS( + const void *, arg2, + unsigned int, arg3, + unsigned char, arg4, + unsigned short, arg5, + unsigned int, arg6_len, + const void *, arg6, + unsigned int, arg7_len, + const void *, arg7), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_integer(unsigned int, arg3, arg3) + ctf_integer(unsigned char, arg4, arg4) + ctf_integer(unsigned short, arg5, arg5) + ctf_integer(unsigned int, arg6_len, arg6_len) + ctf_sequence(char, arg6, arg6, unsigned int, arg6_len) + ctf_integer(unsigned int, arg7_len, arg7_len) + ctf_sequence(char, arg7, arg7, unsigned int, arg7_len) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathErrorStatus +// [data][%p] ERROR, %u, %s. +// QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Operation, + Status, + "ResolveIpNetEntry2"); +// arg2 = arg2 = Operation = arg2 +// arg3 = arg3 = Status = arg3 +// arg4 = arg4 = "ResolveIpNetEntry2" = arg4 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_C, DatapathErrorStatus, + TP_ARGS( + const void *, arg2, + unsigned int, arg3, + const char *, arg4), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_integer(unsigned int, arg3, arg3) + ctf_string(arg4, arg4) + ) +) diff --git a/src/generated/linux/datapath_raw_dpdk.c.clog.h b/src/generated/linux/datapath_raw_dpdk.c.clog.h new file mode 100644 index 000000000..6e67340cc --- /dev/null +++ b/src/generated/linux/datapath_raw_dpdk.c.clog.h @@ -0,0 +1,67 @@ +#ifndef CLOG_DO_NOT_INCLUDE_HEADER +#include +#endif +#undef TRACEPOINT_PROVIDER +#define TRACEPOINT_PROVIDER CLOG_DATAPATH_RAW_DPDK_C +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_INCLUDE +#define TRACEPOINT_INCLUDE "datapath_raw_dpdk.c.clog.h.lttng.h" +#if !defined(DEF_CLOG_DATAPATH_RAW_DPDK_C) || defined(TRACEPOINT_HEADER_MULTI_READ) +#define DEF_CLOG_DATAPATH_RAW_DPDK_C +#include +#define __int64 __int64_t +#include "datapath_raw_dpdk.c.clog.h.lttng.h" +#endif +#include +#ifndef _clog_MACRO_QuicTraceEvent +#define _clog_MACRO_QuicTraceEvent 1 +#define QuicTraceEvent(a, ...) _clog_CAT(_clog_ARGN_SELECTOR(__VA_ARGS__), _clog_CAT(_,a(#a, __VA_ARGS__))) +#endif +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); +// arg2 = arg2 = Status = arg2 +// arg3 = arg3 = "CxPlatThreadCreate" = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_LibraryErrorStatus +#define _clog_4_ARGS_TRACE_LibraryErrorStatus(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_DPDK_C, LibraryErrorStatus , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryError +// [ lib] ERROR, %s. +// QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No room in DPDK TX ring buffer"); +// arg2 = arg2 = "No room in DPDK TX ring buffer" = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_LibraryError +#define _clog_3_ARGS_TRACE_LibraryError(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_RAW_DPDK_C, LibraryError , arg2);\ + +#endif + + + + +#ifdef __cplusplus +} +#endif +#ifdef CLOG_INLINE_IMPLEMENTATION +#include "quic.clog_datapath_raw_dpdk.c.clog.h.c" +#endif diff --git a/src/generated/linux/datapath_raw_dpdk.c.clog.h.lttng.h b/src/generated/linux/datapath_raw_dpdk.c.clog.h.lttng.h new file mode 100644 index 000000000..33b1f7f72 --- /dev/null +++ b/src/generated/linux/datapath_raw_dpdk.c.clog.h.lttng.h @@ -0,0 +1,42 @@ + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); +// arg2 = arg2 = Status = arg2 +// arg3 = arg3 = "CxPlatThreadCreate" = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_DPDK_C, LibraryErrorStatus, + TP_ARGS( + unsigned int, arg2, + const char *, arg3), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ctf_string(arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryError +// [ lib] ERROR, %s. +// QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No room in DPDK TX ring buffer"); +// arg2 = arg2 = "No room in DPDK TX ring buffer" = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_DPDK_C, LibraryError, + TP_ARGS( + const char *, arg2), + TP_FIELDS( + ctf_string(arg2, arg2) + ) +) diff --git a/src/generated/linux/datapath_raw_socket.c.clog.h b/src/generated/linux/datapath_raw_socket.c.clog.h new file mode 100644 index 000000000..ff1adf7d3 --- /dev/null +++ b/src/generated/linux/datapath_raw_socket.c.clog.h @@ -0,0 +1,169 @@ +#ifndef CLOG_DO_NOT_INCLUDE_HEADER +#include +#endif +#undef TRACEPOINT_PROVIDER +#define TRACEPOINT_PROVIDER CLOG_DATAPATH_RAW_SOCKET_C +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_INCLUDE +#define TRACEPOINT_INCLUDE "datapath_raw_socket.c.clog.h.lttng.h" +#if !defined(DEF_CLOG_DATAPATH_RAW_SOCKET_C) || defined(TRACEPOINT_HEADER_MULTI_READ) +#define DEF_CLOG_DATAPATH_RAW_SOCKET_C +#include +#define __int64 __int64_t +#include "datapath_raw_socket.c.clog.h.lttng.h" +#endif +#include +#ifndef _clog_MACRO_QuicTraceLogConnInfo +#define _clog_MACRO_QuicTraceLogConnInfo 1 +#define QuicTraceLogConnInfo(a, ...) _clog_CAT(_clog_ARGN_SELECTOR(__VA_ARGS__), _clog_CAT(_,a(#a, __VA_ARGS__))) +#endif +#ifndef _clog_MACRO_QuicTraceEvent +#define _clog_MACRO_QuicTraceEvent 1 +#define QuicTraceEvent(a, ...) _clog_CAT(_clog_ARGN_SELECTOR(__VA_ARGS__), _clog_CAT(_,a(#a, __VA_ARGS__))) +#endif +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------- +// Decoder Ring for RouteResolutionEnd +// [conn][%p] Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu +// QuicTraceLogConnInfo( + RouteResolutionEnd, + Connection, + "Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu", + PathId, + PhysicalAddress[0], + PhysicalAddress[1], + PhysicalAddress[2], + PhysicalAddress[3], + PhysicalAddress[4], + PhysicalAddress[5]); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +// arg4 = arg4 = PhysicalAddress[0] = arg4 +// arg5 = arg5 = PhysicalAddress[1] = arg5 +// arg6 = arg6 = PhysicalAddress[2] = arg6 +// arg7 = arg7 = PhysicalAddress[3] = arg7 +// arg8 = arg8 = PhysicalAddress[4] = arg8 +// arg9 = arg9 = PhysicalAddress[5] = arg9 +----------------------------------------------------------*/ +#ifndef _clog_10_ARGS_TRACE_RouteResolutionEnd +#define _clog_10_ARGS_TRACE_RouteResolutionEnd(uniqueId, arg1, encoded_arg_string, arg3, arg4, arg5, arg6, arg7, arg8, arg9)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, RouteResolutionEnd , arg1, arg3, arg4, arg5, arg6, arg7, arg8, arg9);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for RouteResolutionStart +// [conn][%p] Starting to look up neighbor on Path[%hhu] with status %u +// QuicTraceLogConnInfo( + RouteResolutionStart, + Context, + "Starting to look up neighbor on Path[%hhu] with status %u", + PathId, + Status); +// arg1 = arg1 = Context = arg1 +// arg3 = arg3 = PathId = arg3 +// arg4 = arg4 = Status = arg4 +----------------------------------------------------------*/ +#ifndef _clog_5_ARGS_TRACE_RouteResolutionStart +#define _clog_5_ARGS_TRACE_RouteResolutionStart(uniqueId, arg1, encoded_arg_string, arg3, arg4)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, RouteResolutionStart , arg1, arg3, arg4);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + WsaError, + "WSAStartup"); +// arg2 = arg2 = WsaError = arg2 +// arg3 = arg3 = "WSAStartup" = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_LibraryErrorStatus +#define _clog_4_ARGS_TRACE_LibraryErrorStatus(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, LibraryErrorStatus , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathErrorStatus +// [data][%p] ERROR, %u, %s. +// QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "socket"); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = Error = arg3 +// arg4 = arg4 = "socket" = arg4 +----------------------------------------------------------*/ +#ifndef _clog_5_ARGS_TRACE_DatapathErrorStatus +#define _clog_5_ARGS_TRACE_DatapathErrorStatus(uniqueId, encoded_arg_string, arg2, arg3, arg4)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, DatapathErrorStatus , arg2, arg3, arg4);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathError +// [data][%p] ERROR, %s. +// QuicTraceEvent( + DatapathError, + "[data][%p] ERROR, %s.", + Socket, + "no matching interface/queue"); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = "no matching interface/queue" = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_DatapathError +#define _clog_4_ARGS_TRACE_DatapathError(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, DatapathError , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION)); +// arg2 = arg2 = "CXPLAT_DATAPATH" = arg2 +// arg3 = arg3 = sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION) = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_AllocFailure +#define _clog_4_ARGS_TRACE_AllocFailure(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_SOCKET_C, AllocFailure , arg2, arg3);\ + +#endif + + + + +#ifdef __cplusplus +} +#endif +#ifdef CLOG_INLINE_IMPLEMENTATION +#include "quic.clog_datapath_raw_socket.c.clog.h.c" +#endif diff --git a/src/generated/linux/datapath_raw_socket.c.clog.h.lttng.h b/src/generated/linux/datapath_raw_socket.c.clog.h.lttng.h new file mode 100644 index 000000000..b0df51749 --- /dev/null +++ b/src/generated/linux/datapath_raw_socket.c.clog.h.lttng.h @@ -0,0 +1,170 @@ + + + +/*---------------------------------------------------------- +// Decoder Ring for RouteResolutionEnd +// [conn][%p] Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu +// QuicTraceLogConnInfo( + RouteResolutionEnd, + Connection, + "Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu", + PathId, + PhysicalAddress[0], + PhysicalAddress[1], + PhysicalAddress[2], + PhysicalAddress[3], + PhysicalAddress[4], + PhysicalAddress[5]); +// arg1 = arg1 = Connection = arg1 +// arg3 = arg3 = PathId = arg3 +// arg4 = arg4 = PhysicalAddress[0] = arg4 +// arg5 = arg5 = PhysicalAddress[1] = arg5 +// arg6 = arg6 = PhysicalAddress[2] = arg6 +// arg7 = arg7 = PhysicalAddress[3] = arg7 +// arg8 = arg8 = PhysicalAddress[4] = arg8 +// arg9 = arg9 = PhysicalAddress[5] = arg9 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, RouteResolutionEnd, + TP_ARGS( + const void *, arg1, + unsigned char, arg3, + unsigned char, arg4, + unsigned char, arg5, + unsigned char, arg6, + unsigned char, arg7, + unsigned char, arg8, + unsigned char, arg9), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg1, arg1) + ctf_integer(unsigned char, arg3, arg3) + ctf_integer(unsigned char, arg4, arg4) + ctf_integer(unsigned char, arg5, arg5) + ctf_integer(unsigned char, arg6, arg6) + ctf_integer(unsigned char, arg7, arg7) + ctf_integer(unsigned char, arg8, arg8) + ctf_integer(unsigned char, arg9, arg9) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for RouteResolutionStart +// [conn][%p] Starting to look up neighbor on Path[%hhu] with status %u +// QuicTraceLogConnInfo( + RouteResolutionStart, + Context, + "Starting to look up neighbor on Path[%hhu] with status %u", + PathId, + Status); +// arg1 = arg1 = Context = arg1 +// arg3 = arg3 = PathId = arg3 +// arg4 = arg4 = Status = arg4 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, RouteResolutionStart, + TP_ARGS( + const void *, arg1, + unsigned char, arg3, + unsigned int, arg4), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg1, arg1) + ctf_integer(unsigned char, arg3, arg3) + ctf_integer(unsigned int, arg4, arg4) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + WsaError, + "WSAStartup"); +// arg2 = arg2 = WsaError = arg2 +// arg3 = arg3 = "WSAStartup" = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, LibraryErrorStatus, + TP_ARGS( + unsigned int, arg2, + const char *, arg3), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ctf_string(arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathErrorStatus +// [data][%p] ERROR, %u, %s. +// QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "socket"); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = Error = arg3 +// arg4 = arg4 = "socket" = arg4 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, DatapathErrorStatus, + TP_ARGS( + const void *, arg2, + unsigned int, arg3, + const char *, arg4), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_integer(unsigned int, arg3, arg3) + ctf_string(arg4, arg4) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathError +// [data][%p] ERROR, %s. +// QuicTraceEvent( + DatapathError, + "[data][%p] ERROR, %s.", + Socket, + "no matching interface/queue"); +// arg2 = arg2 = Socket = arg2 +// arg3 = arg3 = "no matching interface/queue" = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, DatapathError, + TP_ARGS( + const void *, arg2, + const char *, arg3), + TP_FIELDS( + ctf_integer_hex(uint64_t, arg2, arg2) + ctf_string(arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION)); +// arg2 = arg2 = "CXPLAT_DATAPATH" = arg2 +// arg3 = arg3 = sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION) = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_SOCKET_C, AllocFailure, + TP_ARGS( + const char *, arg2, + unsigned long long, arg3), + TP_FIELDS( + ctf_string(arg2, arg2) + ctf_integer(uint64_t, arg3, arg3) + ) +) diff --git a/src/generated/linux/datapath_raw_xdp.c.clog.h b/src/generated/linux/datapath_raw_xdp.c.clog.h new file mode 100644 index 000000000..792b4c496 --- /dev/null +++ b/src/generated/linux/datapath_raw_xdp.c.clog.h @@ -0,0 +1,87 @@ +#ifndef CLOG_DO_NOT_INCLUDE_HEADER +#include +#endif +#undef TRACEPOINT_PROVIDER +#define TRACEPOINT_PROVIDER CLOG_DATAPATH_RAW_XDP_C +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_INCLUDE +#define TRACEPOINT_INCLUDE "datapath_raw_xdp.c.clog.h.lttng.h" +#if !defined(DEF_CLOG_DATAPATH_RAW_XDP_C) || defined(TRACEPOINT_HEADER_MULTI_READ) +#define DEF_CLOG_DATAPATH_RAW_XDP_C +#include +#define __int64 __int64_t +#include "datapath_raw_xdp.c.clog.h.lttng.h" +#endif +#include +#ifndef _clog_MACRO_QuicTraceEvent +#define _clog_MACRO_QuicTraceEvent 1 +#define QuicTraceEvent(a, ...) _clog_CAT(_clog_ARGN_SELECTOR(__VA_ARGS__), _clog_CAT(_,a(#a, __VA_ARGS__))) +#endif +#ifdef __cplusplus +extern "C" { +#endif +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "ConvertInterfaceIndexToLuid"); +// arg2 = arg2 = ret = arg2 +// arg3 = arg3 = "ConvertInterfaceIndexToLuid" = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_LibraryErrorStatus +#define _clog_4_ARGS_TRACE_LibraryErrorStatus(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_XDP_C, LibraryErrorStatus , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP Queues", + Interface->QueueCount * sizeof(*Interface->Queues)); +// arg2 = arg2 = "XDP Queues" = arg2 +// arg3 = arg3 = Interface->QueueCount * sizeof(*Interface->Queues) = arg3 +----------------------------------------------------------*/ +#ifndef _clog_4_ARGS_TRACE_AllocFailure +#define _clog_4_ARGS_TRACE_AllocFailure(uniqueId, encoded_arg_string, arg2, arg3)\ +tracepoint(CLOG_DATAPATH_RAW_XDP_C, AllocFailure , arg2, arg3);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryError +// [ lib] ERROR, %s. +// QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No more room for rules"); +// arg2 = arg2 = "No more room for rules" = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_LibraryError +#define _clog_3_ARGS_TRACE_LibraryError(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_RAW_XDP_C, LibraryError , arg2);\ + +#endif + + + + +#ifdef __cplusplus +} +#endif +#ifdef CLOG_INLINE_IMPLEMENTATION +#include "quic.clog_datapath_raw_xdp.c.clog.h.c" +#endif diff --git a/src/generated/linux/datapath_raw_xdp.c.clog.h.lttng.h b/src/generated/linux/datapath_raw_xdp.c.clog.h.lttng.h new file mode 100644 index 000000000..b82efff4b --- /dev/null +++ b/src/generated/linux/datapath_raw_xdp.c.clog.h.lttng.h @@ -0,0 +1,65 @@ + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryErrorStatus +// [ lib] ERROR, %u, %s. +// QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "ConvertInterfaceIndexToLuid"); +// arg2 = arg2 = ret = arg2 +// arg3 = arg3 = "ConvertInterfaceIndexToLuid" = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_XDP_C, LibraryErrorStatus, + TP_ARGS( + unsigned int, arg2, + const char *, arg3), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ctf_string(arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for AllocFailure +// Allocation of '%s' failed. (%llu bytes) +// QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP Queues", + Interface->QueueCount * sizeof(*Interface->Queues)); +// arg2 = arg2 = "XDP Queues" = arg2 +// arg3 = arg3 = Interface->QueueCount * sizeof(*Interface->Queues) = arg3 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_XDP_C, AllocFailure, + TP_ARGS( + const char *, arg2, + unsigned long long, arg3), + TP_FIELDS( + ctf_string(arg2, arg2) + ctf_integer(uint64_t, arg3, arg3) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for LibraryError +// [ lib] ERROR, %s. +// QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No more room for rules"); +// arg2 = arg2 = "No more room for rules" = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_RAW_XDP_C, LibraryError, + TP_ARGS( + const char *, arg2), + TP_FIELDS( + ctf_string(arg2, arg2) + ) +) diff --git a/src/generated/linux/quic.clog_datapath_raw.c.clog.h.c b/src/generated/linux/quic.clog_datapath_raw.c.clog.h.c new file mode 100644 index 000000000..00b338060 --- /dev/null +++ b/src/generated/linux/quic.clog_datapath_raw.c.clog.h.c @@ -0,0 +1,7 @@ +#include +#ifdef BUILDING_TRACEPOINT_PROVIDER +#define TRACEPOINT_CREATE_PROBES +#else +#define TRACEPOINT_DEFINE +#endif +#include "datapath_raw.c.clog.h" diff --git a/src/generated/linux/quic.clog_datapath_raw_dpdk.c.clog.h.c b/src/generated/linux/quic.clog_datapath_raw_dpdk.c.clog.h.c new file mode 100644 index 000000000..5ee58d58b --- /dev/null +++ b/src/generated/linux/quic.clog_datapath_raw_dpdk.c.clog.h.c @@ -0,0 +1,7 @@ +#include +#ifdef BUILDING_TRACEPOINT_PROVIDER +#define TRACEPOINT_CREATE_PROBES +#else +#define TRACEPOINT_DEFINE +#endif +#include "datapath_raw_dpdk.c.clog.h" diff --git a/src/generated/linux/quic.clog_datapath_raw_socket.c.clog.h.c b/src/generated/linux/quic.clog_datapath_raw_socket.c.clog.h.c new file mode 100644 index 000000000..d5528d4a2 --- /dev/null +++ b/src/generated/linux/quic.clog_datapath_raw_socket.c.clog.h.c @@ -0,0 +1,7 @@ +#include +#ifdef BUILDING_TRACEPOINT_PROVIDER +#define TRACEPOINT_CREATE_PROBES +#else +#define TRACEPOINT_DEFINE +#endif +#include "datapath_raw_socket.c.clog.h" diff --git a/src/generated/linux/quic.clog_datapath_raw_xdp.c.clog.h.c b/src/generated/linux/quic.clog_datapath_raw_xdp.c.clog.h.c new file mode 100644 index 000000000..381eb046c --- /dev/null +++ b/src/generated/linux/quic.clog_datapath_raw_xdp.c.clog.h.c @@ -0,0 +1,7 @@ +#include +#ifdef BUILDING_TRACEPOINT_PROVIDER +#define TRACEPOINT_CREATE_PROBES +#else +#define TRACEPOINT_DEFINE +#endif +#include "datapath_raw_xdp.c.clog.h" diff --git a/src/inc/quic_datapath.h b/src/inc/quic_datapath.h index 8dbfe3df7..eaff0369a 100644 --- a/src/inc/quic_datapath.h +++ b/src/inc/quic_datapath.h @@ -141,6 +141,15 @@ typedef struct CXPLAT_SEND_DATA CXPLAT_SEND_DATA; // typedef struct QUIC_BUFFER QUIC_BUFFER; +// +// When state is Resolved, LocalLinkLayerAddress and NextHopLinkLayerAddress of CXPLAT_ROUTE are valid. +// +typedef enum CXPLAT_ROUTE_STATE { + RouteUnresolved, + RouteResolving, + RouteResolved, +} CXPLAT_ROUTE_STATE; + // // Structure to represent a network route. // @@ -149,6 +158,14 @@ typedef struct CXPLAT_ROUTE { QUIC_ADDR RemoteAddress; QUIC_ADDR LocalAddress; +#ifdef QUIC_USE_RAW_DATAPATH + uint8_t LocalLinkLayerAddress[6]; + uint8_t NextHopLinkLayerAddress[6]; + void* Queue; + + CXPLAT_ROUTE_STATE State; // Keep this as the last property in the struct. +#endif // QUIC_USE_RAW_DATAPATH + } CXPLAT_ROUTE; // @@ -193,6 +210,7 @@ typedef struct CXPLAT_RECV_DATA { // uint8_t Allocated : 1; // Used for debugging. Set to FALSE on free. uint8_t QueuedOnConnection : 1; // Used for debugging. + uint8_t Reserved : 6; } CXPLAT_RECV_DATA; @@ -633,6 +651,50 @@ CxPlatSocketGetParam( _Out_writes_bytes_opt_(*BufferLength) uint8_t* Buffer ); +#ifdef QUIC_USE_RAW_DATAPATH +// +// Copies L2 address into route object and sets route state to resolved. +// +void +CxPlatResolveRouteComplete( + _In_ void* Connection, + _Inout_ CXPLAT_ROUTE* Route, + _In_reads_bytes_(6) const uint8_t* PhysicalAddress, + _In_ uint8_t PathId + ); + +// +// Function pointer type for datapath route resolution callbacks. +// +typedef +_IRQL_requires_max_(DISPATCH_LEVEL) +_Function_class_(CXPLAT_ROUTE_RESOLUTION_CALLBACK) +void +(CXPLAT_ROUTE_RESOLUTION_CALLBACK)( + _In_ void* Context, + _When_(Succeeded == FALSE, _Reserved_) + _When_(Succeeded == TRUE, _In_reads_bytes_(6)) + uint8_t* PhysicalAddress, + _In_ uint8_t PathId, + _In_ BOOLEAN Succeeded + ); + +typedef CXPLAT_ROUTE_RESOLUTION_CALLBACK *CXPLAT_ROUTE_RESOLUTION_CALLBACK_HANDLER; + +// +// Tries to resolve route and neighbor for the given destination address. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatResolveRoute( + _In_ CXPLAT_SOCKET* Socket, + _Inout_ CXPLAT_ROUTE* Route, + _In_ uint8_t PathId, + _In_ void* Context, + _In_ CXPLAT_ROUTE_RESOLUTION_CALLBACK_HANDLER Callback + ); +#endif // QUIC_USE_RAW_DATAPATH + #if defined(__cplusplus) } #endif diff --git a/src/inc/quic_hashtable.h b/src/inc/quic_hashtable.h index 27901bf33..c8aac0577 100644 --- a/src/inc/quic_hashtable.h +++ b/src/inc/quic_hashtable.h @@ -198,7 +198,7 @@ CxPlatHashtableRemove( _Must_inspect_result_ CXPLAT_HASHTABLE_ENTRY* CxPlatHashtableLookup( - _In_ CXPLAT_HASHTABLE* HashTable, + _In_ const CXPLAT_HASHTABLE* HashTable, _In_ uint64_t Signature, _Out_opt_ CXPLAT_HASHTABLE_LOOKUP_CONTEXT* Context ); @@ -206,7 +206,7 @@ CxPlatHashtableLookup( _Must_inspect_result_ CXPLAT_HASHTABLE_ENTRY* CxPlatHashtableLookupNext( - _In_ CXPLAT_HASHTABLE* HashTable, + _In_ const CXPLAT_HASHTABLE* HashTable, _Inout_ CXPLAT_HASHTABLE_LOOKUP_CONTEXT* Context ); diff --git a/src/inc/quic_platform.h b/src/inc/quic_platform.h index f0669dae5..4673cae74 100644 --- a/src/inc/quic_platform.h +++ b/src/inc/quic_platform.h @@ -141,6 +141,8 @@ typedef struct CXPLAT_SLIST_ENTRY { #define QUIC_POOL_TLS_TICKET_KEY '74cQ' // Qc47 - QUIC Platform TLS ticket key #define QUIC_POOL_TLS_CIPHER_SUITE_STRING '84cQ' // Qc48 - QUIC TLS cipher suite string #define QUIC_POOL_PLATFORM_WORKER '94cQ' // Qc49 - QUIC platform worker +#define QUIC_POOL_ROUTE_RESOLUTION_WORKER 'A4cQ' // Qc4A - QUIC route resolution worker +#define QUIC_POOL_ROUTE_RESOLUTION_OPER 'B4cQ' // Qc4B - QUIC route resolution operation typedef enum CXPLAT_THREAD_FLAGS { CXPLAT_THREAD_FLAG_NONE = 0x0000, diff --git a/src/inc/quic_platform_winuser.h b/src/inc/quic_platform_winuser.h index ad7879cd9..c6f219a17 100644 --- a/src/inc/quic_platform_winuser.h +++ b/src/inc/quic_platform_winuser.h @@ -37,6 +37,7 @@ Environment: #pragma warning(disable:5105) // The conformant preprocessor along with the newest SDK throws this warning for a macro. #include #include +#include #include #include #include diff --git a/src/manifest/MsQuic.wprp b/src/manifest/MsQuic.wprp index 3b55902f3..94d2c3950 100644 --- a/src/manifest/MsQuic.wprp +++ b/src/manifest/MsQuic.wprp @@ -26,6 +26,24 @@ + + + + + + + + + + + + + + + + + + @@ -80,6 +98,12 @@ + + + + + + @@ -95,12 +119,28 @@ + + + + + + + + + + + + + + + + @@ -210,6 +250,8 @@ + + diff --git a/src/manifest/clog.sidecar b/src/manifest/clog.sidecar index 3ac7ba88b..608cc4cb0 100644 --- a/src/manifest/clog.sidecar +++ b/src/manifest/clog.sidecar @@ -826,6 +826,26 @@ "splitArgs": [], "macroName": "QuicTraceLogInfo" }, + "CertCapiVerifiedChain": { + "ModuleProperites": {}, + "TraceString": "CertVerifyChain: %S 0x%x, result=0x%x", + "UniqueId": "CertCapiVerifiedChain", + "splitArgs": [ + { + "DefinationEncoding": "S", + "MacroVariableName": "arg2" + }, + { + "DefinationEncoding": "x", + "MacroVariableName": "arg3" + }, + { + "DefinationEncoding": "x", + "MacroVariableName": "arg4" + } + ], + "macroName": "QuicTraceLogInfo" + }, "CertCapiParsedChain": { "ModuleProperites": {}, "TraceString": "[cert] Successfully parsed chain of %u certificate(s)", @@ -1396,6 +1416,82 @@ ], "macroName": "QuicTraceLogVerbose" }, + "RouteResolutionEnd": { + "ModuleProperites": {}, + "TraceString": "[conn][%p] Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu", + "UniqueId": "RouteResolutionEnd", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg1" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg3" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg4" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg5" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg6" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg7" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg8" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg9" + } + ], + "macroName": "QuicTraceLogConnInfo" + }, + "RouteResolutionStart": { + "ModuleProperites": {}, + "TraceString": "[conn][%p] Starting to look up neighbor on Path[%hhu] with status %u", + "UniqueId": "RouteResolutionStart", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg1" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg3" + }, + { + "DefinationEncoding": "u", + "MacroVariableName": "arg4" + } + ], + "macroName": "QuicTraceLogConnInfo" + }, + "DatapathError": { + "ModuleProperites": {}, + "TraceString": "[data][%p] ERROR, %s.", + "UniqueId": "DatapathError", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg2" + }, + { + "DefinationEncoding": "s", + "MacroVariableName": "arg3" + } + ], + "macroName": "QuicTraceEvent" + }, "WindowsUserLoaded": { "ModuleProperites": {}, "TraceString": "[ dll] Loaded", @@ -1454,6 +1550,26 @@ ], "macroName": "QuicTraceLogInfo" }, + "WindowsUserInitialized2": { + "ModuleProperites": {}, + "TraceString": "[ dll] Initialized (AvailMem = %llu bytes, TimerResolution = [%u, %u])", + "UniqueId": "WindowsUserInitialized2", + "splitArgs": [ + { + "DefinationEncoding": "llu", + "MacroVariableName": "arg2" + }, + { + "DefinationEncoding": "u", + "MacroVariableName": "arg3" + }, + { + "DefinationEncoding": "u", + "MacroVariableName": "arg4" + } + ], + "macroName": "QuicTraceLogInfo" + }, "WindowsUserInitialized": { "ModuleProperites": {}, "TraceString": "[ dll] Initialized (AvailMem = %llu bytes)", @@ -1567,6 +1683,18 @@ ], "macroName": "QuicTraceEvent" }, + "ApiError": { + "ModuleProperites": {}, + "TraceString": "[ api] Error %u", + "UniqueId": "ApiError", + "splitArgs": [ + { + "DefinationEncoding": "u", + "MacroVariableName": "arg2" + } + ], + "macroName": "QuicTraceEvent" + }, "ConnError": { "ModuleProperites": {}, "TraceString": "[conn][%p] ERROR, %s.", @@ -3015,6 +3143,38 @@ ], "macroName": "QuicTraceLogConnInfo" }, + "SuccessfulRouteResolution": { + "ModuleProperites": {}, + "TraceString": "[conn][%p] Processing successful route completion Path[%hhu]", + "UniqueId": "SuccessfulRouteResolution", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg1" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg3" + } + ], + "macroName": "QuicTraceLogConnInfo" + }, + "FailedRouteResolution": { + "ModuleProperites": {}, + "TraceString": "[conn][%p] Processing failed route completion Path[%hhu]", + "UniqueId": "FailedRouteResolution", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg1" + }, + { + "DefinationEncoding": "hhu", + "MacroVariableName": "arg3" + } + ], + "macroName": "QuicTraceLogConnInfo" + }, "UpdatePeerPacketTolerance": { "ModuleProperites": {}, "TraceString": "[conn][%p] Updating peer packet tolerance to %hhu", @@ -7058,6 +7218,26 @@ ], "macroName": "QuicTraceLogVerbose" }, + "FrameLogImmediateAck": { + "ModuleProperites": {}, + "TraceString": "[%c][%cX][%llu] IMMEDIATE_ACK", + "UniqueId": "FrameLogImmediateAck", + "splitArgs": [ + { + "DefinationEncoding": "c", + "MacroVariableName": "arg2" + }, + { + "DefinationEncoding": "c", + "MacroVariableName": "arg3" + }, + { + "DefinationEncoding": "llu", + "MacroVariableName": "arg4" + } + ], + "macroName": "QuicTraceLogVerbose" + }, "IgnoreCryptoFrame": { "ModuleProperites": {}, "TraceString": "[conn][%p] Ignoring received crypto after cleanup", @@ -7854,6 +8034,18 @@ ], "macroName": "QuicTraceLogConnWarning" }, + "ListenerIndicateStopComplete": { + "ModuleProperites": {}, + "TraceString": "[list][%p] Indicating STOP_COMPLETE", + "UniqueId": "ListenerIndicateStopComplete", + "splitArgs": [ + { + "DefinationEncoding": "p", + "MacroVariableName": "arg2" + } + ], + "macroName": "QuicTraceLogVerbose" + }, "ListenerIndicateNewConnection": { "ModuleProperites": {}, "TraceString": "[list][%p] Indicating NEW_CONNECTION %p", @@ -9380,24 +9572,24 @@ "splitArgs": [], "macroName": "QuicTraceLogInfo" }, - "LibraryMsQuicOpenNull": { + "LibraryMsQuicOpenVersionNull": { "ModuleProperites": {}, - "TraceString": "[ api] MsQuicOpen, NULL", - "UniqueId": "LibraryMsQuicOpenNull", + "TraceString": "[ api] MsQuicOpenVersion, NULL", + "UniqueId": "LibraryMsQuicOpenVersionNull", "splitArgs": [], "macroName": "QuicTraceLogVerbose" }, - "LibraryMsQuicOpenEntry": { + "LibraryMsQuicOpenVersionEntry": { "ModuleProperites": {}, - "TraceString": "[ api] MsQuicOpen", - "UniqueId": "LibraryMsQuicOpenEntry", + "TraceString": "[ api] MsQuicOpenVersion", + "UniqueId": "LibraryMsQuicOpenVersionEntry", "splitArgs": [], "macroName": "QuicTraceLogVerbose" }, - "LibraryMsQuicOpenExit": { + "LibraryMsQuicOpenVersionExit": { "ModuleProperites": {}, - "TraceString": "[ api] MsQuicOpen, status=0x%x", - "UniqueId": "LibraryMsQuicOpenExit", + "TraceString": "[ api] MsQuicOpenVersion, status=0x%x", + "UniqueId": "LibraryMsQuicOpenVersionExit", "splitArgs": [ { "DefinationEncoding": "x", @@ -10437,6 +10629,13 @@ ], "macroName": "QuicTraceLogInfo" }, + "PerfControlInitialized": { + "ModuleProperites": {}, + "TraceString": "[perf] Control interface initialized", + "UniqueId": "PerfControlInitialized", + "splitArgs": [], + "macroName": "QuicTraceLogVerbose" + }, "PerfControlUninitializing": { "ModuleProperites": {}, "TraceString": "[perf] Control interface uninitializing", @@ -11467,6 +11666,11 @@ "TraceID": "CertCreationEventAlreadyCreated", "EncodingString": "[test] CreateEvent opened existing event" }, + { + "UniquenessHash": "6fb480dc-d71f-74f2-dc75-559a5591fe3d", + "TraceID": "CertCapiVerifiedChain", + "EncodingString": "CertVerifyChain: %S 0x%x, result=0x%x" + }, { "UniquenessHash": "2e3530fe-5082-ce8a-7275-87755eb70c41", "TraceID": "CertCapiParsedChain", @@ -11677,6 +11881,21 @@ "TraceID": "DatapathTooLarge", "EncodingString": "[data][%p] Received larger than expected datagram from %!ADDR!" }, + { + "UniquenessHash": "0267d256-55e3-42eb-b15d-538473aca440", + "TraceID": "RouteResolutionEnd", + "EncodingString": "[conn][%p] Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu" + }, + { + "UniquenessHash": "8662b462-9871-7631-034d-9d175232ab4f", + "TraceID": "RouteResolutionStart", + "EncodingString": "[conn][%p] Starting to look up neighbor on Path[%hhu] with status %u" + }, + { + "UniquenessHash": "2b127bfd-4623-d1ad-f438-977d808c9514", + "TraceID": "DatapathError", + "EncodingString": "[data][%p] ERROR, %s." + }, { "UniquenessHash": "b34bba29-c6d5-e31d-a099-d73a62330504", "TraceID": "WindowsUserLoaded", @@ -11697,6 +11916,11 @@ "TraceID": "ProcessorInfo", "EncodingString": "[ dll] Proc[%u] Group[%hu] Index[%u] NUMA[%u]" }, + { + "UniquenessHash": "4f262ea3-4eb1-45f3-019c-54d89cf92893", + "TraceID": "WindowsUserInitialized2", + "EncodingString": "[ dll] Initialized (AvailMem = %llu bytes, TimerResolution = [%u, %u])" + }, { "UniquenessHash": "cdce2126-524a-0cae-e4d3-c8bd5ef39930", "TraceID": "WindowsUserInitialized", @@ -11742,6 +11966,11 @@ "TraceID": "StreamAppSend", "EncodingString": "[strm][%p] App queuing send [%llu bytes, %u buffers, 0x%x flags]" }, + { + "UniquenessHash": "dddba4c1-201c-11ae-51de-155523e40b7e", + "TraceID": "ApiError", + "EncodingString": "[ api] Error %u" + }, { "UniquenessHash": "0ebbffbe-69d8-3f2b-949d-d93cdd7f8b99", "TraceID": "ConnError", @@ -12242,6 +12471,16 @@ "TraceID": "Unreachable", "EncodingString": "[conn][%p] Received unreachable event" }, + { + "UniquenessHash": "e8bf302f-52d7-c228-4cc8-a74a099b0baa", + "TraceID": "SuccessfulRouteResolution", + "EncodingString": "[conn][%p] Processing successful route completion Path[%hhu]" + }, + { + "UniquenessHash": "d3e660d6-4f05-fd4d-e6e5-c8967d710df4", + "TraceID": "FailedRouteResolution", + "EncodingString": "[conn][%p] Processing failed route completion Path[%hhu]" + }, { "UniquenessHash": "14f03b98-a434-2f35-2ed0-1fa71aa50e44", "TraceID": "UpdatePeerPacketTolerance", @@ -13367,6 +13606,16 @@ "TraceID": "FrameLogAckFrequencyInvalid", "EncodingString": "[%c][%cX][%llu] ACK_FREQUENCY [Invalid]" }, + { + "UniquenessHash": "7470e68a-8ad6-563a-4957-76dc22e5deeb", + "TraceID": "FrameLogAckFrequency", + "EncodingString": "[%c][%cX][%llu] ACK_FREQUENCY SeqNum:%llu PktTolerance:%llu MaxAckDelay:%llu IgnoreOrder:%hhu IgnoreCE:%hhu" + }, + { + "UniquenessHash": "d0932f9a-e8e8-65d9-692c-3bd379a86d58", + "TraceID": "FrameLogImmediateAck", + "EncodingString": "[%c][%cX][%llu] IMMEDIATE_ACK" + }, { "UniquenessHash": "60d753ab-6710-fe16-e47d-37f046f5973c", "TraceID": "IgnoreCryptoFrame", @@ -13477,6 +13726,11 @@ "TraceID": "ConnKeyPhaseChange", "EncodingString": "[conn][%p] Key phase change (locally initiated=%hhu)." }, + { + "UniquenessHash": "e7d3b50d-c315-3189-9752-de68bf66c705", + "TraceID": "LogPacketVersionNegotiation", + "EncodingString": "[%c][%cX][-] VerNeg DestCid:%s SrcCid:%s (Payload %hu bytes)" + }, { "UniquenessHash": "afa9276d-cc3c-6cb1-4df9-e4d2fdc0e66d", "TraceID": "LogPacketVersionNegotiationVersion", @@ -13557,6 +13811,11 @@ "TraceID": "StillInTimerWheel", "EncodingString": "[conn][%p] Still in timer wheel! Connection was likely leaked!" }, + { + "UniquenessHash": "4687c526-98b4-7014-5ab2-6cf89e76f504", + "TraceID": "ListenerIndicateStopComplete", + "EncodingString": "[list][%p] Indicating STOP_COMPLETE" + }, { "UniquenessHash": "f27f1ff0-edfe-bf39-708a-f9da7a2b4d3d", "TraceID": "ListenerIndicateNewConnection", @@ -14033,19 +14292,19 @@ "EncodingString": "[ lib] No longer in use." }, { - "UniquenessHash": "a92d40e6-9ec9-6dbe-5bce-51a661ed1554", - "TraceID": "LibraryMsQuicOpenNull", - "EncodingString": "[ api] MsQuicOpen, NULL" + "UniquenessHash": "9dbf9e05-d5cd-4ec3-ea93-fb33942ff968", + "TraceID": "LibraryMsQuicOpenVersionNull", + "EncodingString": "[ api] MsQuicOpenVersion, NULL" }, { - "UniquenessHash": "831a9b86-1a75-79e7-88d5-aa7c600531c4", - "TraceID": "LibraryMsQuicOpenEntry", - "EncodingString": "[ api] MsQuicOpen" + "UniquenessHash": "108ce8c8-296c-0dfd-9efc-9ed86bcb1c7d", + "TraceID": "LibraryMsQuicOpenVersionEntry", + "EncodingString": "[ api] MsQuicOpenVersion" }, { - "UniquenessHash": "6f7eb60d-9947-2598-382b-aa6b6053d53d", - "TraceID": "LibraryMsQuicOpenExit", - "EncodingString": "[ api] MsQuicOpen, status=0x%x" + "UniquenessHash": "fc39ba20-c315-583e-6b62-656fbbd3d077", + "TraceID": "LibraryMsQuicOpenVersionExit", + "EncodingString": "[ api] MsQuicOpenVersion, status=0x%x" }, { "UniquenessHash": "ae77005c-231d-7848-7e06-879ecbd5363d", @@ -14422,6 +14681,11 @@ "TraceID": "PerfControlClientIoctlComplete", "EncodingString": "[perf] Client %p completing request, 0x%x" }, + { + "UniquenessHash": "0ba57698-ca5f-449b-4d87-183b7ea2918b", + "TraceID": "PerfControlInitialized", + "EncodingString": "[perf] Control interface initialized" + }, { "UniquenessHash": "bce70700-af58-e03b-5021-93a28f3dd74f", "TraceID": "PerfControlUninitializing", diff --git a/src/perf/lib/RpsClient.cpp b/src/perf/lib/RpsClient.cpp index c8c4cbb22..0637cb9b0 100644 --- a/src/perf/lib/RpsClient.cpp +++ b/src/perf/lib/RpsClient.cpp @@ -25,8 +25,9 @@ PrintHelp( "\n" " -target:<####> The target server to connect to.\n" " -runtime:<####> The total runtime (in ms). (def:%u)\n" + " -encrypt:<0/1> Enables/disables encryption. (def:1)\n" " -port:<####> The UDP port of the server. (def:%u)\n" - " -ip:<0/4/6> A hint for the resolving the hostname to an IP address. (def:0)\n" + " -ip:<0/4/6> A hint for the resolving the hostname to an IP address. (def:0)\n" " -conns:<####> The number of connections to use. (def:%u)\n" " -requests:<####> The number of requests to send at a time. (def:2*conns)\n" " -request:<####> The length of request payloads. (def:%u)\n" @@ -72,6 +73,7 @@ RpsClient::Init( Target[Len] = '\0'; TryGetValue(argc, argv, "runtime", &RunTime); + TryGetValue(argc, argv, "encrypt", &UseEncryption); TryGetValue(argc, argv, "port", &Port); TryGetValue(argc, argv, "conns", &ConnectionCount); RequestCount = 2 * ConnectionCount; @@ -175,11 +177,8 @@ RpsClient::Start( } QUIC_CONNECTION_CALLBACK_HANDLER Handler = - [](HQUIC Conn, void* Context, QUIC_CONNECTION_EVENT* Event) -> QUIC_STATUS { - return ((RpsClient*)Context)-> - ConnectionCallback( - Conn, - Event); + [](HQUIC /* Conn */, void* Context, QUIC_CONNECTION_EVENT* Event) -> QUIC_STATUS { + return ((RpsConnectionContext*)Context)->ConnectionCallback(Event); }; Connections = UniquePtr(new(std::nothrow) RpsConnectionContext[ConnectionCount]); @@ -201,11 +200,13 @@ RpsClient::Start( return Status; } + Connections[i].Client = this; + Status = MsQuic->ConnectionOpen( Registration, Handler, - this, + &Connections[i], &Connections[i].Handle); if (QUIC_FAILED(Status)) { WriteOutput("ConnectionOpen failed, 0x%x\n", Status); @@ -218,6 +219,20 @@ RpsClient::Start( Workers[i % WorkerCount].QueueConnection(&Connections[i]); } + if (!UseEncryption) { + BOOLEAN value = TRUE; + Status = + MsQuic->SetParam( + Connections[i].Handle, + QUIC_PARAM_CONN_DISABLE_1RTT_ENCRYPTION, + sizeof(value), + &value); + if (QUIC_FAILED(Status)) { + WriteOutput("MsQuic->SetParam (CONN_DISABLE_1RTT_ENCRYPTION) failed!\n"); + return Status; + } + } + BOOLEAN Opt = TRUE; Status = MsQuic->SetParam( @@ -347,14 +362,13 @@ RpsClient::GetExtraData( } QUIC_STATUS -RpsClient::ConnectionCallback( - _In_ HQUIC /* ConnectionHandle */, +RpsConnectionContext::ConnectionCallback( _Inout_ QUIC_CONNECTION_EVENT* Event ) { switch (Event->Type) { case QUIC_CONNECTION_EVENT_CONNECTED: - if ((uint32_t)InterlockedIncrement64((int64_t*)&ActiveConnections) == ConnectionCount) { - CxPlatEventSet(AllConnected.Handle); + if ((uint32_t)InterlockedIncrement64((int64_t*)&Client->ActiveConnections) == Client->ConnectionCount) { + CxPlatEventSet(Client->AllConnected.Handle); } break; case QUIC_CONNECTION_EVENT_SHUTDOWN_INITIATED_BY_TRANSPORT: @@ -362,6 +376,12 @@ RpsClient::ConnectionCallback( break; case QUIC_CONNECTION_EVENT_SHUTDOWN_COMPLETE: break; + case QUIC_CONNECTION_EVENT_IDEAL_PROCESSOR_CHANGED: + if ((uint32_t)Event->IDEAL_PROCESSOR_CHANGED.IdealProcessor >= Client->WorkerCount) { + Event->IDEAL_PROCESSOR_CHANGED.IdealProcessor = (uint16_t)(Client->WorkerCount - 1); + } + Client->Workers[Event->IDEAL_PROCESSOR_CHANGED.IdealProcessor].UpdateConnection(this); + break; default: break; } diff --git a/src/perf/lib/RpsClient.h b/src/perf/lib/RpsClient.h index 47639d91f..9859c30a4 100644 --- a/src/perf/lib/RpsClient.h +++ b/src/perf/lib/RpsClient.h @@ -35,11 +35,16 @@ struct StreamContext { struct RpsConnectionContext { CXPLAT_LIST_ENTRY Link; // For Worker's connection queue + RpsClient* Client {nullptr}; RpsWorkerContext* Worker {nullptr}; HQUIC Handle {nullptr}; operator HQUIC() const { return Handle; } ~RpsConnectionContext() noexcept { if (Handle) { MsQuic->ConnectionClose(Handle); } } QUIC_STATUS + ConnectionCallback( + _Inout_ QUIC_CONNECTION_EVENT* Event + ); + QUIC_STATUS StreamCallback( _In_ StreamContext* StrmContext, _In_ HQUIC StreamHandle, @@ -100,6 +105,14 @@ struct RpsWorkerContext { CxPlatListInsertTail(&Connections, &Connection->Link); CxPlatLockRelease(&Lock); } + void UpdateConnection(RpsConnectionContext* Connection) { + if (this != Connection->Worker) { + CxPlatLockAcquire(&Connection->Worker->Lock); + CxPlatListEntryRemove(&Connection->Link); + CxPlatLockRelease(&Connection->Worker->Lock); + QueueConnection(Connection); + } + } void QueueSendRequest(); }; @@ -143,12 +156,6 @@ public: _Inout_ uint32_t* Length ) override; - QUIC_STATUS - ConnectionCallback( - _In_ HQUIC ConnectionHandle, - _Inout_ QUIC_CONNECTION_EVENT* Event - ); - MsQuicRegistration Registration { "secnetperf-client-rps", QUIC_EXECUTION_PROFILE_LOW_LATENCY, @@ -167,6 +174,7 @@ public: uint16_t Port {PERF_DEFAULT_PORT}; QUIC_ADDRESS_FAMILY RemoteFamily {QUIC_ADDRESS_FAMILY_UNSPEC}; UniquePtr Target; + uint8_t UseEncryption {TRUE}; uint32_t RunTime {RPS_DEFAULT_RUN_TIME}; uint32_t ConnectionCount {RPS_DEFAULT_CONNECTION_COUNT}; uint32_t RequestCount {RPS_DEFAULT_CONNECTION_COUNT * 2}; diff --git a/src/platform/CMakeLists.txt b/src/platform/CMakeLists.txt index 6897762b3..28f480d71 100644 --- a/src/platform/CMakeLists.txt +++ b/src/platform/CMakeLists.txt @@ -12,7 +12,12 @@ endif() set(SOURCES crypt.c hashtable.c pcp.c platform_worker.c toeplitz.c) if("${CX_PLATFORM}" STREQUAL "windows") - set(SOURCES ${SOURCES} datapath_winuser.c platform_winuser.c storage_winuser.c) + set(SOURCES ${SOURCES} platform_winuser.c storage_winuser.c) + if(QUIC_USE_XDP) + set(SOURCES ${SOURCES} datapath_raw.c datapath_raw_socket.c datapath_raw_xdp.c) + else() + set(SOURCES ${SOURCES} datapath_winuser.c) + endif() else() set(SOURCES ${SOURCES} inline.c platform_posix.c storage_posix.c cgroup.c) if(CX_PLATFORM STREQUAL "linux") @@ -41,7 +46,16 @@ endif() add_library(platform STATIC ${SOURCES}) -target_link_libraries(platform PUBLIC inc) +if(QUIC_USE_XDP) + target_link_libraries( + platform + PUBLIC + inc + wbemuuid + ${PROJECT_SOURCE_DIR}/artifacts/xdp/lib/xdpapi.lib) +else() + target_link_libraries(platform PUBLIC inc) +endif() if("${CX_PLATFORM}" STREQUAL "windows") target_link_libraries(platform PUBLIC winmm) @@ -51,7 +65,15 @@ target_link_libraries(platform PRIVATE warnings main_binary_link_args) set_property(TARGET platform PROPERTY FOLDER "${QUIC_FOLDER_PREFIX}libraries") -target_include_directories(platform PRIVATE ${EXTRA_PLATFORM_INCLUDE_DIRECTORIES}) +if(QUIC_USE_XDP) + target_include_directories( + platform + PRIVATE + ${EXTRA_PLATFORM_INCLUDE_DIRECTORIES} + ${PROJECT_SOURCE_DIR}/artifacts/xdp/include) +else() + target_include_directories(platform PRIVATE ${EXTRA_PLATFORM_INCLUDE_DIRECTORIES}) +endif() if (MSVC AND (QUIC_TLS STREQUAL "openssl" OR QUIC_TLS STREQUAL "schannel") AND NOT QUIC_ENABLE_SANITIZERS) target_compile_options(platform PRIVATE /analyze) diff --git a/src/platform/datapath_raw.c b/src/platform/datapath_raw.c new file mode 100644 index 000000000..f3cd91743 --- /dev/null +++ b/src/platform/datapath_raw.c @@ -0,0 +1,642 @@ +/*++ + + Copyright (c) Microsoft Corporation. + Licensed under the MIT License. + +Abstract: + + QUIC Raw (i.e. DPDK or XDP) Datapath Implementation (User Mode) + +--*/ + +#include "datapath_raw.h" +#ifdef QUIC_CLOG +#include "datapath_raw.c.clog.h" +#endif + +#pragma warning(disable:4116) // unnamed type definition in parentheses +#pragma warning(disable:4100) // unreferenced formal parameter + +CXPLAT_THREAD_CALLBACK(CxPlatRouteResolutionWorkerThread, Context); + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDataPathRouteWorkerUninitialize( + _In_ CXPLAT_ROUTE_RESOLUTION_WORKER* Worker + ) +{ + Worker->Enabled = FALSE; + CxPlatEventSet(Worker->Ready); + + // + // Wait for the thread to finish. + // + if (Worker->Thread) { + CxPlatThreadWait(&Worker->Thread); + CxPlatThreadDelete(&Worker->Thread); + } + + CxPlatEventUninitialize(Worker->Ready); + CxPlatDispatchLockUninitialize(&Worker->Lock); + CxPlatPoolUninitialize(&Worker->OperationPool); + CXPLAT_FREE(Worker, QUIC_POOL_ROUTE_RESOLUTION_WORKER); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDataPathRouteWorkerInitialize( + _Inout_ CXPLAT_DATAPATH* DataPath + ) +{ + QUIC_STATUS Status; + CXPLAT_ROUTE_RESOLUTION_WORKER* Worker = + CXPLAT_ALLOC_NONPAGED( + sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER), QUIC_POOL_ROUTE_RESOLUTION_WORKER); + if (Worker == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_WORKER)); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + Worker->Enabled = TRUE; + CxPlatEventInitialize(&Worker->Ready, FALSE, FALSE); + CxPlatDispatchLockInitialize(&Worker->Lock); + CxPlatListInitializeHead(&Worker->Operations); + + CxPlatPoolInitialize( + FALSE, + sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION), + QUIC_POOL_ROUTE_RESOLUTION_OPER, + &Worker->OperationPool); + + CXPLAT_THREAD_CONFIG ThreadConfig = { + CXPLAT_THREAD_FLAG_NONE, + 0, + "RouteResolutionWorkerThread", + CxPlatRouteResolutionWorkerThread, + Worker + }; + + Status = CxPlatThreadCreate(&ThreadConfig, &Worker->Thread); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); + goto Error; + } + + DataPath->RouteResolutionWorker = Worker; + +Error: + if (QUIC_FAILED(Status)) { + if (Worker != NULL) { + CxPlatDataPathRouteWorkerUninitialize(Worker); + } + } + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDataPathInitialize( + _In_ uint32_t ClientRecvContextLength, + _In_opt_ const CXPLAT_UDP_DATAPATH_CALLBACKS* UdpCallbacks, + _In_opt_ const CXPLAT_TCP_DATAPATH_CALLBACKS* TcpCallbacks, + _Out_ CXPLAT_DATAPATH** NewDataPath + ) +{ + QUIC_STATUS Status = QUIC_STATUS_SUCCESS; + const size_t DatapathSize = CxPlatDpRawGetDapathSize(); + CXPLAT_FRE_ASSERT(DatapathSize > sizeof(CXPLAT_DATAPATH)); + + UNREFERENCED_PARAMETER(TcpCallbacks); + + *NewDataPath = CXPLAT_ALLOC_PAGED(DatapathSize, QUIC_POOL_DATAPATH); + if (*NewDataPath == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + DatapathSize); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + CxPlatZeroMemory(*NewDataPath, DatapathSize); + + if (UdpCallbacks) { + (*NewDataPath)->UdpHandlers = *UdpCallbacks; + } + + if (!CxPlatSockPoolInitialize(&(*NewDataPath)->SocketPool)) { + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + Status = CxPlatDpRawInitialize(*NewDataPath, ClientRecvContextLength); + if (QUIC_FAILED(Status)) { + CxPlatSockPoolUninitialize(&(*NewDataPath)->SocketPool); + goto Error; + } + + Status = CxPlatDataPathRouteWorkerInitialize(*NewDataPath); + if (QUIC_FAILED(Status)) { + goto Error; + } + +Error: + + if (QUIC_FAILED(Status)) { + if (*NewDataPath != NULL) { + CXPLAT_FREE(*NewDataPath, QUIC_POOL_DATAPATH); + *NewDataPath = NULL; + } + } + + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDataPathUninitialize( + _In_ CXPLAT_DATAPATH* Datapath + ) +{ + if (Datapath == NULL) { + return; + } + CxPlatDataPathRouteWorkerUninitialize(Datapath->RouteResolutionWorker); + CxPlatDpRawUninitialize(Datapath); + CxPlatSockPoolUninitialize(&Datapath->SocketPool); + CXPLAT_FREE(Datapath, QUIC_POOL_DATAPATH); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawGenerateCpuTable( + _Inout_ CXPLAT_DATAPATH* Datapath + ) +{ + Datapath->NumaNode = (uint8_t)CxPlatProcessorInfo[Datapath->Cpu].NumaNode; + + // + // Build up the set of CPUs that are on the same NUMA node as this one. + // + Datapath->CpuTableSize = 0; + for (uint16_t i = 0; i < CxPlatProcMaxCount(); i++) { + if (i != Datapath->Cpu && // Skip raw layer's CPU + CxPlatProcessorInfo[i].NumaNode == Datapath->NumaNode) { + Datapath->CpuTable[Datapath->CpuTableSize++] = i; + } + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +uint32_t +CxPlatDataPathGetSupportedFeatures( + _In_ CXPLAT_DATAPATH* Datapath + ) +{ + return 0; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +BOOLEAN +CxPlatDataPathIsPaddingPreferred( + _In_ CXPLAT_DATAPATH* Datapath + ) +{ + return FALSE; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +_Success_(QUIC_SUCCEEDED(return)) +QUIC_STATUS +CxPlatDataPathGetLocalAddresses( + _In_ CXPLAT_DATAPATH* Datapath, + _Outptr_ _At_(*Addresses, __drv_allocatesMem(Mem)) + CXPLAT_ADAPTER_ADDRESS** Addresses, + _Out_ uint32_t* AddressesCount + ) +{ + return QUIC_STATUS_NOT_SUPPORTED; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +_Success_(QUIC_SUCCEEDED(return)) +QUIC_STATUS +CxPlatDataPathGetGatewayAddresses( + _In_ CXPLAT_DATAPATH* Datapath, + _Outptr_ _At_(*GatewayAddresses, __drv_allocatesMem(Mem)) + QUIC_ADDR** GatewayAddresses, + _Out_ uint32_t* GatewayAddressesCount + ) +{ + return QUIC_STATUS_NOT_SUPPORTED; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDataPathResolveAddress( + _In_ CXPLAT_DATAPATH* Datapath, + _In_z_ const char* HostName, + _Inout_ QUIC_ADDR* Address + ) +{ + if (QuicAddrFromString(HostName, 0, Address)) { + return QUIC_STATUS_SUCCESS; + } + return QUIC_STATUS_NOT_SUPPORTED; // TODO - Support name resolution +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatSocketCreateUdp( + _In_ CXPLAT_DATAPATH* Datapath, + _In_ const CXPLAT_UDP_CONFIG* Config, + _Out_ CXPLAT_SOCKET** NewSocket + ) +{ + QUIC_STATUS Status = QUIC_STATUS_SUCCESS; + + *NewSocket = CXPLAT_ALLOC_PAGED(sizeof(CXPLAT_SOCKET), QUIC_POOL_SOCKET); + if (*NewSocket == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_SOCKET", + sizeof(CXPLAT_SOCKET)); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + QuicTraceEvent( + DatapathCreated, + "[data][%p] Created, local=%!ADDR!, remote=%!ADDR!", + *NewSocket, + CASTED_CLOG_BYTEARRAY(Config->LocalAddress ? sizeof(*Config->LocalAddress) : 0, Config->LocalAddress), + CASTED_CLOG_BYTEARRAY(Config->RemoteAddress ? sizeof(*Config->RemoteAddress) : 0, Config->RemoteAddress)); + + CxPlatZeroMemory(*NewSocket, sizeof(CXPLAT_SOCKET)); + CxPlatRundownInitialize(&(*NewSocket)->Rundown); + (*NewSocket)->Datapath = Datapath; + (*NewSocket)->CallbackContext = Config->CallbackContext; + + if (Config->RemoteAddress) { + CXPLAT_FRE_ASSERT(!QuicAddrIsWildCard(Config->RemoteAddress)); // No wildcard remote addresses allowed. + (*NewSocket)->Connected = TRUE; + (*NewSocket)->RemoteAddress = *Config->RemoteAddress; + } + + if (Config->LocalAddress) { + (*NewSocket)->LocalAddress = *Config->LocalAddress; + if (QuicAddrIsWildCard(Config->LocalAddress)) { + if (!(*NewSocket)->Connected) { + (*NewSocket)->Wildcard = TRUE; + } + } else { + CXPLAT_FRE_ASSERT((*NewSocket)->Connected); // Assumes only connected sockets fully specify local address + } + } else { + QuicAddrSetFamily(&(*NewSocket)->LocalAddress, QUIC_ADDRESS_FAMILY_INET6); + if (!(*NewSocket)->Connected) { + (*NewSocket)->Wildcard = TRUE; + } + } + + CXPLAT_FRE_ASSERT((*NewSocket)->Wildcard ^ (*NewSocket)->Connected); // Assumes either a pure wildcard listener or a + // connected socket; not both. + + if (!CxPlatTryAddSocket(&Datapath->SocketPool, *NewSocket)) { + Status = QUIC_STATUS_ADDRESS_IN_USE; + goto Error; + } + + CxPlatDpRawPlumbRulesOnSocket(*NewSocket, TRUE); + +Error: + + if (QUIC_FAILED(Status)) { + if (*NewSocket != NULL) { + CxPlatRundownUninitialize(&(*NewSocket)->Rundown); + CXPLAT_FREE(*NewSocket, QUIC_POOL_SOCKET); + *NewSocket = NULL; + } + } + + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatSocketCreateTcp( + _In_ CXPLAT_DATAPATH* Datapath, + _In_opt_ const QUIC_ADDR* LocalAddress, + _In_ const QUIC_ADDR* RemoteAddress, + _In_opt_ void* CallbackContext, + _Out_ CXPLAT_SOCKET** Socket + ) +{ + return QUIC_STATUS_NOT_SUPPORTED; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatSocketCreateTcpListener( + _In_ CXPLAT_DATAPATH* Datapath, + _In_opt_ const QUIC_ADDR* LocalAddress, + _In_opt_ void* RecvCallbackContext, + _Out_ CXPLAT_SOCKET** NewSocket + ) +{ + return QUIC_STATUS_NOT_SUPPORTED; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatSocketDelete( + _In_ CXPLAT_SOCKET* Socket + ) +{ + CxPlatDpRawPlumbRulesOnSocket(Socket, FALSE); + CxPlatRemoveSocket(&Socket->Datapath->SocketPool, Socket); + CxPlatRundownReleaseAndWait(&Socket->Rundown); + CXPLAT_FREE(Socket, QUIC_POOL_SOCKET); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +UINT16 +CxPlatSocketGetLocalMtu( + _In_ CXPLAT_SOCKET* Socket + ) +{ + return 1500; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatSocketGetLocalAddress( + _In_ CXPLAT_SOCKET* Socket, + _Out_ QUIC_ADDR* Address + ) +{ + *Address = Socket->LocalAddress; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatSocketGetRemoteAddress( + _In_ CXPLAT_SOCKET* Socket, + _Out_ QUIC_ADDR* Address + ) +{ + *Address = Socket->RemoteAddress; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawRxEthernet( + _In_ const CXPLAT_DATAPATH* Datapath, + _In_reads_(PacketCount) + CXPLAT_RECV_DATA** Packets, + _In_ uint16_t PacketCount + ) +{ + for (uint16_t i = 0; i < PacketCount; i++) { + CXPLAT_SOCKET* Socket = NULL; + CXPLAT_RECV_DATA* PacketChain = Packets[i]; + CXPLAT_DBG_ASSERT(PacketChain->Next == NULL); + + if (PacketChain->Reserved == L4_TYPE_UDP) { + Socket = + CxPlatGetSocket( + &Datapath->SocketPool, + &PacketChain->Route->LocalAddress, + &PacketChain->Route->RemoteAddress); + } + if (Socket) { + // + // Found a match. Chain and deliver contiguous packets with the same 4-tuple. + // + while (i < PacketCount) { + QuicTraceEvent( + DatapathRecv, + "[data][%p] Recv %u bytes (segment=%hu) Src=%!ADDR! Dst=%!ADDR!", + Socket, + Packets[i]->BufferLength, + Packets[i]->BufferLength, + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->LocalAddress), &Packets[i]->Route->LocalAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Packets[i]->Route->RemoteAddress), &Packets[i]->Route->RemoteAddress)); + if (i == PacketCount - 1 || + Packets[i+1]->Reserved != L4_TYPE_UDP || + Packets[i+1]->Route->LocalAddress.Ipv4.sin_port != Socket->LocalAddress.Ipv4.sin_port || + !CxPlatSocketCompare(Socket, &Packets[i+1]->Route->LocalAddress, &Packets[i+1]->Route->RemoteAddress)) { + break; + } + Packets[i]->Next = Packets[i+1]; + CXPLAT_DBG_ASSERT(Packets[i+1]->Next == NULL); + i++; + } + Datapath->UdpHandlers.Receive(Socket, Socket->CallbackContext, (CXPLAT_RECV_DATA*)PacketChain); + CxPlatRundownRelease(&Socket->Rundown); + } else { + CxPlatDpRawRxFree(PacketChain); + } + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatRecvDataReturn( + _In_opt_ CXPLAT_RECV_DATA* RecvDataChain + ) +{ + CxPlatDpRawRxFree((const CXPLAT_RECV_DATA*)RecvDataChain); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +_Success_(return != NULL) +CXPLAT_SEND_DATA* +CxPlatSendDataAlloc( + _In_ CXPLAT_SOCKET* Socket, + _In_ CXPLAT_ECN_TYPE ECN, + _In_ uint16_t MaxPacketSize, + _Inout_ CXPLAT_ROUTE* Route + ) +{ + return CxPlatDpRawTxAlloc(Socket->Datapath, ECN, MaxPacketSize, Route); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +_Success_(return != NULL) +QUIC_BUFFER* +CxPlatSendDataAllocBuffer( + _In_ CXPLAT_SEND_DATA* SendData, + _In_ uint16_t MaxBufferLength + ) +{ + SendData->Buffer.Length = MaxBufferLength; + return &SendData->Buffer; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatSendDataFree( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + CxPlatDpRawTxFree(SendData); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatSendDataFreeBuffer( + _In_ CXPLAT_SEND_DATA* SendData, + _In_ QUIC_BUFFER* Buffer + ) +{ + // No-op +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +BOOLEAN +CxPlatSendDataIsFull( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + return TRUE; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +QUIC_STATUS +CxPlatSocketSend( + _In_ CXPLAT_SOCKET* Socket, + _In_ const CXPLAT_ROUTE* Route, + _In_ CXPLAT_SEND_DATA* SendData, + _In_ uint16_t IdealProcessor + ) +{ + QuicTraceEvent( + DatapathSend, + "[data][%p] Send %u bytes in %hhu buffers (segment=%hu) Dst=%!ADDR!, Src=%!ADDR!", + Socket, + SendData->Buffer.Length, + 1, + (uint16_t)SendData->Buffer.Length, + CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress), + CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); + CXPLAT_DBG_ASSERT(Route->State == RouteResolved); + CXPLAT_DBG_ASSERT(Route->Queue != NULL); + const CXPLAT_INTERFACE* Interface = CxPlatDpRawGetInterfaceFromQueue(Route->Queue); + CxPlatFramingWriteHeaders( + Socket, Route, &SendData->Buffer, + Interface->OffloadStatus.Transmit.NetworkLayerXsum, + Interface->OffloadStatus.Transmit.TransportLayerXsum); + CxPlatDpRawTxEnqueue(SendData); + return QUIC_STATUS_SUCCESS; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatSocketSetParam( + _In_ CXPLAT_SOCKET* Socket, + _In_ uint32_t Param, + _In_ uint32_t BufferLength, + _In_reads_bytes_(BufferLength) const UINT8 * Buffer + ) +{ + UNREFERENCED_PARAMETER(Socket); + UNREFERENCED_PARAMETER(Param); + UNREFERENCED_PARAMETER(BufferLength); + UNREFERENCED_PARAMETER(Buffer); + return QUIC_STATUS_NOT_SUPPORTED; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatSocketGetParam( + _In_ CXPLAT_SOCKET* Socket, + _In_ uint32_t Param, + _Inout_ PUINT32 BufferLength, + _Out_writes_bytes_opt_(*BufferLength) UINT8 * Buffer + ) +{ + UNREFERENCED_PARAMETER(Socket); + UNREFERENCED_PARAMETER(Param); + UNREFERENCED_PARAMETER(BufferLength); + UNREFERENCED_PARAMETER(Buffer); + return QUIC_STATUS_NOT_SUPPORTED; +} + +CXPLAT_THREAD_CALLBACK(CxPlatRouteResolutionWorkerThread, Context) +{ + CXPLAT_ROUTE_RESOLUTION_WORKER* Worker = (CXPLAT_ROUTE_RESOLUTION_WORKER*)Context; + + while (Worker->Enabled) { + CxPlatEventWaitForever(Worker->Ready); + CXPLAT_LIST_ENTRY Operations; + CxPlatListInitializeHead(&Operations); + + CxPlatDispatchLockAcquire(&Worker->Lock); + if (!CxPlatListIsEmpty(&Worker->Operations)) { + CxPlatListMoveItems(&Worker->Operations, &Operations); + } + CxPlatDispatchLockRelease(&Worker->Lock); + + while (!CxPlatListIsEmpty(&Operations)) { + CXPLAT_ROUTE_RESOLUTION_OPERATION* Operation = + CXPLAT_CONTAINING_RECORD( + CxPlatListRemoveHead(&Operations), CXPLAT_ROUTE_RESOLUTION_OPERATION, WorkerLink); + NETIO_STATUS Status = + Status = GetIpNetEntry2(&Operation->IpnetRow); + if (Status != ERROR_SUCCESS || Operation->IpnetRow.State <= NlnsIncomplete) { + Status = + ResolveIpNetEntry2(&Operation->IpnetRow, NULL); + if (Status != 0) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Operation, + Status, + "ResolveIpNetEntry2"); + Operation->Callback( + Operation->Context, NULL, Operation->PathId, FALSE); + } else { + Operation->Callback( + Operation->Context, Operation->IpnetRow.PhysicalAddress, Operation->PathId, TRUE); + } + CxPlatPoolFree(&Worker->OperationPool ,Operation); + } else { + Operation->Callback( + Operation->Context, Operation->IpnetRow.PhysicalAddress, Operation->PathId, TRUE); + } + } + } + + // + // Clean up leftover work. + // + CXPLAT_LIST_ENTRY Operations; + CxPlatListInitializeHead(&Operations); + + CxPlatDispatchLockAcquire(&Worker->Lock); + if (!CxPlatListIsEmpty(&Worker->Operations)) { + CxPlatListMoveItems(&Worker->Operations, &Operations); + } + CxPlatDispatchLockRelease(&Worker->Lock); + + while (!CxPlatListIsEmpty(&Operations)) { + CXPLAT_ROUTE_RESOLUTION_OPERATION* Operation = + CXPLAT_CONTAINING_RECORD( + CxPlatListRemoveHead(&Operations), CXPLAT_ROUTE_RESOLUTION_OPERATION, WorkerLink); + Operation->Callback(Operation->Context, NULL, Operation->PathId, FALSE); + CXPLAT_FREE(Operation, QUIC_POOL_ROUTE_RESOLUTION_OPER); + } + + return 0; +} diff --git a/src/platform/datapath_raw.h b/src/platform/datapath_raw.h new file mode 100644 index 000000000..3973baae1 --- /dev/null +++ b/src/platform/datapath_raw.h @@ -0,0 +1,342 @@ +/*++ + + Copyright (c) Microsoft Corporation. + Licensed under the MIT License. + +--*/ + +#include "platform_internal.h" +#include "quic_hashtable.h" + +typedef struct CXPLAT_SOCKET_POOL { + + CXPLAT_RW_LOCK Lock; + CXPLAT_HASHTABLE Sockets; + +} CXPLAT_SOCKET_POOL; + +typedef struct CXPLAT_DATAPATH CXPLAT_DATAPATH; + +// +// A worker thread for draining queued route resolution operations. +// +typedef struct QUIC_CACHEALIGN CXPLAT_ROUTE_RESOLUTION_WORKER { + // + // TRUE if the worker is currently running. + // + BOOLEAN Enabled; + + // + // An event to kick the thread. + // + CXPLAT_EVENT Ready; + + CXPLAT_THREAD Thread; + CXPLAT_POOL OperationPool; + + // + // Serializes access to the route resolution opreations. + // + CXPLAT_DISPATCH_LOCK Lock; + CXPLAT_LIST_ENTRY Operations; +} CXPLAT_ROUTE_RESOLUTION_WORKER; + +typedef struct CXPLAT_ROUTE_RESOLUTION_OPERATION { + // + // Link in the worker's operation queue. + // N.B. Multi-threaded access, synchronized by worker's operation lock. + // + CXPLAT_LIST_ENTRY WorkerLink; + MIB_IPNET_ROW2 IpnetRow; + void* Context; + uint8_t PathId; + CXPLAT_ROUTE_RESOLUTION_CALLBACK_HANDLER Callback; +} CXPLAT_ROUTE_RESOLUTION_OPERATION; + +typedef struct CXPLAT_DATAPATH { + + CXPLAT_UDP_DATAPATH_CALLBACKS UdpHandlers; + + CXPLAT_SOCKET_POOL SocketPool; + + CXPLAT_ROUTE_RESOLUTION_WORKER* RouteResolutionWorker; + + // RSS stuff + uint16_t Cpu; + uint8_t NumaNode; + uint8_t CpuTableSize; + uint16_t CpuTable[64]; + + CXPLAT_LIST_ENTRY Interfaces; + +} CXPLAT_DATAPATH; + +#define ETH_MAC_ADDR_LEN 6 + +typedef struct CXPLAT_INTERFACE { + CXPLAT_LIST_ENTRY Link; + uint32_t IfIndex; + UCHAR PhysicalAddress[ETH_MAC_ADDR_LEN]; + struct { + struct { + BOOLEAN NetworkLayerXsum : 1; + BOOLEAN TransportLayerXsum : 1; + } Transmit; + struct { + BOOLEAN NetworkLayerXsum : 1; + BOOLEAN TransportLayerXsum : 1; + } Receive; + } OffloadStatus; +} CXPLAT_INTERFACE; + +typedef struct CXPLAT_SEND_DATA { + + QUIC_BUFFER Buffer; + +} CXPLAT_SEND_DATA; + +// +// Queries the raw datapath stack for the total size needed to allocate the +// datapath structure. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +size_t +CxPlatDpRawGetDapathSize( + void + ); + +// +// Initializes the raw datapath stack. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDpRawInitialize( + _Inout_ CXPLAT_DATAPATH* Datapath, + _In_ uint32_t ClientRecvContextLength + ); + +// +// Cleans up the raw datapath stack. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawUninitialize( + _In_ CXPLAT_DATAPATH* Datapath + ); + +// +// Upcall from raw datapath to generate the CPU table used for RSS. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawGenerateCpuTable( + _Inout_ CXPLAT_DATAPATH* Datapath + ); + +// +// Called on creation and deletion of a socket. It indicates to the raw datapath +// that it should update any filtering rules as necessary. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawPlumbRulesOnSocket( + _In_ CXPLAT_SOCKET* Socket, + _In_ BOOLEAN IsCreated + ); + +// +// Assigns a raw datapath queue to a new route. +// +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawAssignQueue( + _In_ const CXPLAT_INTERFACE* Interface, + _Inout_ CXPLAT_ROUTE* Route + ); + +// +// Returns the raw interface for a given queue. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +const CXPLAT_INTERFACE* +CxPlatDpRawGetInterfaceFromQueue( + _In_ const void* Queue + ); + +typedef struct HEADER_BACKFILL { + uint16_t TransportLayer; + uint16_t NetworkLayer; + uint16_t LinkLayer; + uint16_t AllLayer; // Sum of the above three. +} HEADER_BACKFILL; + +// +// Calculate how much space we should reserve for headers. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +HEADER_BACKFILL +CxPlatDpRawCalculateHeaderBackFill( + _In_ QUIC_ADDRESS_FAMILY Family + ); + +// +// Upcall from raw datapath to indicate a received chain of packets. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawParseEthernet( + _In_ const CXPLAT_DATAPATH* Datapath, + _Inout_ CXPLAT_RECV_DATA* Packet, + _In_reads_bytes_(Length) + const uint8_t* Payload, + _In_ uint16_t Length + ); + +// +// Upcall from raw datapath to indicate a received chain of packets. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawRxEthernet( + _In_ const CXPLAT_DATAPATH* Datapath, + _In_reads_(PacketCount) + CXPLAT_RECV_DATA** Packets, + _In_ uint16_t PacketCount + ); + +// +// Frees a chain of previous received packets. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawRxFree( + _In_opt_ const CXPLAT_RECV_DATA* PacketChain + ); + +// +// Allocates a new TX send object. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +CXPLAT_SEND_DATA* +CxPlatDpRawTxAlloc( + _In_ CXPLAT_DATAPATH* Datapath, + _In_ CXPLAT_ECN_TYPE ECN, + _In_ uint16_t MaxPacketSize, + _Inout_ CXPLAT_ROUTE* Route + ); + +// +// Frees a previously allocated TX send object. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxFree( + _In_ CXPLAT_SEND_DATA* SendData + ); + +// +// Enqueues a TX send object to be sent out on the raw datapath device. +// +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxEnqueue( + _In_ CXPLAT_SEND_DATA* SendData + ); + +// +// Raw Socket Interface +// + +typedef struct CXPLAT_SOCKET { + + CXPLAT_HASHTABLE_ENTRY Entry; + CXPLAT_RUNDOWN_REF Rundown; + CXPLAT_DATAPATH* Datapath; + SOCKET AuxSocket; + void* CallbackContext; + QUIC_ADDR LocalAddress; + QUIC_ADDR RemoteAddress; + BOOLEAN Wildcard; // Using a wildcard local address. Optimization to avoid always reading LocalAddress. + BOOLEAN Connected; // Bound to a remote address + +} CXPLAT_SOCKET; + +BOOLEAN +CxPlatSockPoolInitialize( + _Inout_ CXPLAT_SOCKET_POOL* Pool + ); + +void +CxPlatSockPoolUninitialize( + _Inout_ CXPLAT_SOCKET_POOL* Pool + ); + +// +// Returns TRUE if the socket matches the given addresses. This code is used in +// conjunction with the hash table lookup, which already compares local UDP port +// so it assumes that matches already. +// +inline +BOOL +CxPlatSocketCompare( + _In_ CXPLAT_SOCKET* Socket, + _In_ const QUIC_ADDR* LocalAddress, + _In_ const QUIC_ADDR* RemoteAddress + ) +{ + CXPLAT_DBG_ASSERT(QuicAddrGetPort(&Socket->LocalAddress) == QuicAddrGetPort(LocalAddress)); + if (Socket->Wildcard) { + return TRUE; // The local port match is all that is needed. + } + + // + // Make sure the local IP matches and the full remote address matches. + // + CXPLAT_DBG_ASSERT(Socket->Connected); + return + QuicAddrCompareIp(&Socket->LocalAddress, LocalAddress) && + QuicAddrCompare(&Socket->RemoteAddress, RemoteAddress); +} + +// +// Finds a socket to deliver received packets with the given addresses. +// +CXPLAT_SOCKET* +CxPlatGetSocket( + _In_ const CXPLAT_SOCKET_POOL* Pool, + _In_ const QUIC_ADDR* LocalAddress, + _In_ const QUIC_ADDR* RemoteAddress + ); + +BOOLEAN +CxPlatTryAddSocket( + _In_ CXPLAT_SOCKET_POOL* Pool, + _In_ CXPLAT_SOCKET* Socket + ); + +void +CxPlatRemoveSocket( + _In_ CXPLAT_SOCKET_POOL* Pool, + _In_ CXPLAT_SOCKET* Socket + ); + +// +// Network framing helpers. Used for Ethernet, IP (v4 & v6) and UDP. +// + +typedef enum PACKET_TYPE { + L3_TYPE_ICMPV4, + L3_TYPE_ICMPV6, + L4_TYPE_TCP, + L4_TYPE_UDP, +} PACKET_TYPE; + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatFramingWriteHeaders( + _In_ const CXPLAT_SOCKET* Socket, + _In_ const CXPLAT_ROUTE* Route, + _Inout_ QUIC_BUFFER* Buffer, + _In_ BOOLEAN SkipNetworkLayerXsum, + _In_ BOOLEAN SkipTransportLayerXsum + ); diff --git a/src/platform/datapath_raw_dpdk.c b/src/platform/datapath_raw_dpdk.c new file mode 100644 index 000000000..76ce46132 --- /dev/null +++ b/src/platform/datapath_raw_dpdk.c @@ -0,0 +1,720 @@ +/*++ + + Copyright (c) Microsoft Corporation. + Licensed under the MIT License. + +Abstract: + + QUIC DPDK Datapath Implementation (User Mode) + + - Requires Clang to build + - Leverages Mellanox PMD (requires CX4 or CX5) + +--*/ + +#define _CRT_SECURE_NO_WARNINGS 1 // TODO - Remove + +#define QUIC_USE_EXECUTION_CONTEXTS 1 + +#include "datapath_raw.h" +#ifdef QUIC_CLOG +#include "datapath_raw_dpdk.c.clog.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define RX_BURST_SIZE 16 +#define TX_BURST_SIZE 16 +#define TX_RING_SIZE 1024 + +typedef struct DPDK_INTERFACE { + + CXPLAT_INTERFACE; + + uint16_t Port; + CXPLAT_LOCK TxLock; + struct rte_mempool* MemoryPool; + struct rte_ring* TxRingBuffer; + + // Constants + char DeviceName[32]; +} DPDK_INTERFACE; + +typedef struct DPDK_DATAPATH { + + CXPLAT_DATAPATH; + + BOOLEAN Running; + CXPLAT_THREAD DpdkThread; + QUIC_STATUS StartStatus; + CXPLAT_EVENT StartComplete; + + CXPLAT_POOL AdditionalInfoPool; + + DPDK_INTERFACE Interface; // TODO: support multiple NIC interfaces. + +} DPDK_DATAPATH; + +typedef struct DPDK_RX_PACKET { + CXPLAT_RECV_DATA; + CXPLAT_ROUTE RouteStorage; + struct rte_mbuf* Mbuf; + CXPLAT_POOL* OwnerPool; +} DPDK_RX_PACKET; + +typedef struct DPDK_TX_PACKET { + CXPLAT_SEND_DATA; + struct rte_mbuf* Mbuf; + DPDK_DATAPATH* Dpdk; + DPDK_INTERFACE* Interface; +} DPDK_TX_PACKET; + +CXPLAT_STATIC_ASSERT( + sizeof(DPDK_TX_PACKET) <= sizeof(DPDK_RX_PACKET), + "Code assumes memory allocated for RX is enough for TX"); + +CXPLAT_THREAD_CALLBACK(CxPlatDpdkMainThread, Context); +static int CxPlatDpdkWorkerThread(_In_ void* Context); + +CXPLAT_RECV_DATA* +CxPlatDataPathRecvPacketToRecvData( + _In_ const CXPLAT_RECV_PACKET* const Context + ) +{ + return (CXPLAT_RECV_DATA*)(((uint8_t*)Context) - sizeof(DPDK_RX_PACKET)); +} + +CXPLAT_RECV_PACKET* +CxPlatDataPathRecvDataToRecvPacket( + _In_ const CXPLAT_RECV_DATA* const Datagram + ) +{ + return (CXPLAT_RECV_PACKET*)(((uint8_t*)Datagram) + sizeof(DPDK_RX_PACKET)); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpdkReadConfig( + _Inout_ DPDK_DATAPATH* Dpdk + ) +{ + Dpdk->Cpu = (uint16_t)(CxPlatProcMaxCount() - 1); + + FILE *File = fopen("dpdk.ini", "r"); + if (File == NULL) { + return; + } + + char Line[256]; + while (fgets(Line, sizeof(Line), File) != NULL) { + char* Value = strchr(Line, '='); + if (Value == NULL) { + continue; + } + *Value++ = '\0'; + if (Value[strlen(Value) - 1] == '\n') { + Value[strlen(Value) - 1] = '\0'; + } + + if (strcmp(Line, "CPU") == 0) { + Dpdk->Cpu = (uint16_t)strtoul(Value, NULL, 10); + } else if (strcmp(Line, "DeviceName") == 0) { + strcpy(Dpdk->Interface.DeviceName, Value); + } + } + + fclose(File); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +size_t +CxPlatDpRawGetDapathSize( + void + ) +{ + return sizeof(DPDK_DATAPATH); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDpRawInitialize( + _Inout_ CXPLAT_DATAPATH* Datapath, + _In_ uint32_t ClientRecvContextLength + ) +{ + DPDK_DATAPATH* Dpdk = (DPDK_DATAPATH*)Datapath; + CXPLAT_THREAD_CONFIG Config = { + 0, 0, "DpdkMain", CxPlatDpdkMainThread, Dpdk + }; + const uint32_t AdditionalBufferSize = + sizeof(DPDK_RX_PACKET) + ClientRecvContextLength; + + CxPlatDpdkReadConfig(Dpdk); + CxPlatDpRawGenerateCpuTable(Datapath); + + BOOLEAN CleanUpThread = FALSE; + CxPlatEventInitialize(&Dpdk->StartComplete, TRUE, FALSE); + CxPlatPoolInitialize(FALSE, AdditionalBufferSize, QUIC_POOL_DATAPATH, &Dpdk->AdditionalInfoPool); + CxPlatLockInitialize(&Dpdk->Interface.TxLock); + CxPlatListInitializeHead(&Dpdk->Interfaces); + CxPlatListInsertTail(&Dpdk->Interfaces, &Dpdk->Interface.Link); + + // + // This starts a new thread to do all the DPDK initialization because DPDK + // effectively takes that thread over. It waits for the initialization part + // to complete before returning. After that, the DPDK main thread starts + // running the DPDK main loop until clean up. + // + + QUIC_STATUS Status = CxPlatThreadCreate(&Config, &Dpdk->DpdkThread); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); + goto Error; + } + CleanUpThread = TRUE; + + CxPlatEventWaitForever(Dpdk->StartComplete); + Status = Dpdk->StartStatus; + +Error: + + if (QUIC_FAILED(Status)) { + if (CleanUpThread) { + CxPlatLockUninitialize(&Dpdk->Interface.TxLock); + CxPlatPoolUninitialize(&Dpdk->AdditionalInfoPool); + CxPlatThreadWait(&Dpdk->DpdkThread); + CxPlatThreadDelete(&Dpdk->DpdkThread); + } + CxPlatEventUninitialize(Dpdk->StartComplete); + } + + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawUninitialize( + _In_ CXPLAT_DATAPATH* Datapath + ) +{ + DPDK_DATAPATH* Dpdk = (DPDK_DATAPATH*)Datapath; + Dpdk->Running = FALSE; + CxPlatLockUninitialize(&Dpdk->Interface.TxLock); + CxPlatPoolUninitialize(&Dpdk->AdditionalInfoPool); + CxPlatThreadWait(&Dpdk->DpdkThread); + CxPlatThreadDelete(&Dpdk->DpdkThread); + CxPlatEventUninitialize(Dpdk->StartComplete); +} + +CXPLAT_THREAD_CALLBACK(CxPlatDpdkMainThread, Context) +{ + DPDK_DATAPATH* Dpdk = (DPDK_DATAPATH*)Context; + + char DpdpCpuStr[16]; + sprintf(DpdpCpuStr, "%hu", Dpdk->Cpu); + + const char* argv[] = { + "msquic", + "-n", "4", + "-l", DpdpCpuStr, + "-d", "rte_mempool_ring-21.dll", + "-d", "rte_bus_pci-21.dll", + "-d", "rte_common_mlx5-21.dll", + "-d", "rte_net_mlx5-21.dll" + }; + + QUIC_STATUS Status = QUIC_STATUS_SUCCESS; + BOOLEAN CleanUpRte = FALSE; + uint16_t Port; + struct rte_eth_conf PortConfig = { + .rxmode = { + .max_rx_pkt_len = RTE_ETHER_MAX_LEN, + }, + }; + uint16_t nb_rxd = 1024; + uint16_t nb_txd = 1024; + const uint16_t rx_rings = 1, tx_rings = 1; + struct rte_eth_dev_info DeviceInfo; + struct rte_eth_rxconf rxconf; + struct rte_eth_txconf txconf; + struct rte_ether_addr addr; + + int ret = rte_eal_init(ARRAYSIZE(argv), (char**)argv); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eal_init"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + CleanUpRte = TRUE; + + if (Dpdk->Interface.DeviceName[0] != '\0') { + ret = rte_eth_dev_get_port_by_name(Dpdk->Interface.DeviceName, &Port); + } else { + ret = rte_eth_dev_get_port_by_name("0000:81:00.0", &Port); + if (ret < 0) { + ret = rte_eth_dev_get_port_by_name("0000:81:00.1", &Port); + } + } + + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_dev_get_port_by_name"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + Dpdk->Interface.Port = Port; + Dpdk->Interface.MemoryPool = + rte_pktmbuf_pool_create( + "MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, rte_eth_dev_socket_id(Port)); + if (Dpdk->Interface.MemoryPool == NULL) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + 0, + "rte_pktmbuf_pool_create"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + Dpdk->Interface.TxRingBuffer = + rte_ring_create( + "TxRing", TX_RING_SIZE, rte_eth_dev_socket_id(Port), + RING_F_MP_HTS_ENQ | RING_F_SC_DEQ); + if (Dpdk->Interface.TxRingBuffer == NULL) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_ring_create"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + ret = rte_eth_dev_info_get(Port, &DeviceInfo); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_dev_info_get"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + Dpdk->Interface.IfIndex = DeviceInfo.if_index; + + if (DeviceInfo.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) { + printf("TX IPv4 Checksum Offload Enabled\n"); + PortConfig.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; + Dpdk->Interface.OffloadStatus.Transmit.NetworkLayerXsum = TRUE; + } + if (DeviceInfo.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) { + printf("TX UDP Checksum Offload Enabled\n"); + PortConfig.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM; + Dpdk->Interface.OffloadStatus.Transmit.TransportLayerXsum = TRUE; + } + if (DeviceInfo.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) { + printf("RX IPv4 Checksum Offload Enabled\n"); + PortConfig.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM; + Dpdk->Interface.OffloadStatus.Receive.NetworkLayerXsum = TRUE; + } + if (DeviceInfo.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) { + printf("RX UDP Checksum Offload Enabled\n"); + PortConfig.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM; + Dpdk->Interface.OffloadStatus.Receive.TransportLayerXsum = TRUE; + } + + ret = rte_eth_dev_configure(Port, rx_rings, tx_rings, &PortConfig); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_dev_configure"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + ret = rte_eth_dev_adjust_nb_rx_tx_desc(Port, &nb_rxd, &nb_txd); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_dev_configure"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + rxconf = DeviceInfo.default_rxconf; + for (uint16_t q = 0; q < rx_rings; q++) { + ret = rte_eth_rx_queue_setup(Port, q, nb_rxd, rte_eth_dev_socket_id(Port), &rxconf, Dpdk->Interface.MemoryPool); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_rx_queue_setup"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + } + + txconf = DeviceInfo.default_txconf; + txconf.offloads = PortConfig.txmode.offloads; + for (uint16_t q = 0; q < tx_rings; q++) { + ret = rte_eth_tx_queue_setup(Port, q, nb_txd, rte_eth_dev_socket_id(Port), &txconf); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_tx_queue_setup"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + } + + ret = rte_eth_dev_start(Port); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_dev_start"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + ret = rte_eth_macaddr_get(Port, &addr); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eth_macaddr_get"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + // + // Retrieve ifindex of the interface to which DPDK is binding. + // + MIB_IF_TABLE2* IfTable; + Status = GetIfTable2(&IfTable); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "GetIfTable2"); + goto Error; + } + + for (uint32_t i = 0; i < IfTable->NumEntries; i++) { + MIB_IF_ROW2* IfRow = (MIB_IF_ROW2*)&IfTable->Table[i]; + if (!IfRow->InterfaceAndOperStatusFlags.FilterInterface && + !IfRow->InterfaceAndOperStatusFlags.NotMediaConnected && + !IfRow->InterfaceAndOperStatusFlags.Paused && + IfRow->OperStatus == IfOperStatusUp && + IfRow->MediaType == NdisMedium802_3 && + IfRow->PhysicalAddressLength == 6 && + memcmp(IfRow->PhysicalAddress, addr.addr_bytes, IfRow->PhysicalAddressLength) == 0) { + Dpdk->Interface.IfIndex = IfRow->InterfaceIndex; + break; + } + } + + printf( + "\nStarting Port %hu on Interface %u, %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n", + Dpdk->Interface.Port, Dpdk->Interface.IfIndex, + addr.addr_bytes[0], addr.addr_bytes[1], addr.addr_bytes[2], + addr.addr_bytes[3], addr.addr_bytes[4], addr.addr_bytes[5]); + + Dpdk->Running = TRUE; + ret = rte_eal_mp_remote_launch(CxPlatDpdkWorkerThread, Dpdk, SKIP_MAIN); + if (ret < 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "rte_eal_mp_remote_launch"); + Status = QUIC_STATUS_INTERNAL_ERROR; + goto Error; + } + + Dpdk->StartStatus = Status; + CxPlatEventSet(Dpdk->StartComplete); + + CxPlatDpdkWorkerThread(Dpdk); + + rte_eal_mp_wait_lcore(); // Wait on the other cores/threads + +Error: + + if (QUIC_FAILED(Status)) { + Dpdk->StartStatus = Status; + CxPlatEventSet(Dpdk->StartComplete); + } + + if (Dpdk->Interface.TxRingBuffer) { + rte_ring_free(Dpdk->Interface.TxRingBuffer); + } + + if (Dpdk->Interface.MemoryPool) { + rte_mempool_free(Dpdk->Interface.MemoryPool); + } + + if (CleanUpRte) { + rte_eal_cleanup(); + } + + CXPLAT_THREAD_RETURN(0); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawPlumbRulesOnSocket( + _In_ CXPLAT_SOCKET* Socket, + _In_ BOOLEAN IsCreated + ) +{ + UNREFERENCED_PARAMETER(Socket); + UNREFERENCED_PARAMETER(IsCreated); + // no-op currently since DPDK simply steals all traffic +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawAssignQueue( + _In_ const CXPLAT_INTERFACE* Interface, + _Inout_ CXPLAT_ROUTE* Route + ) +{ + Route->Queue = Interface; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +const CXPLAT_INTERFACE* +CxPlatDpRawGetInterfaceFromQueue( + _In_ const void* Queue + ) +{ + return (const CXPLAT_INTERFACE*)Queue; +} + +static +void +CxPlatDpdkRx( + _In_ DPDK_DATAPATH* Dpdk, + _In_ const uint16_t Core, + _In_ DPDK_INTERFACE* Interface + ) +{ + void* Buffers[RX_BURST_SIZE]; + const uint16_t BuffersCount = + rte_eth_rx_burst(Interface->Port, 0, (struct rte_mbuf**)Buffers, RX_BURST_SIZE); + if (unlikely(BuffersCount == 0)) { + return; + } + + DPDK_RX_PACKET Packet; // Working space + CxPlatZeroMemory(&Packet, sizeof(DPDK_RX_PACKET)); + Packet.Route = &Packet.RouteStorage; + Packet.Route->Queue = Interface; + + uint16_t PacketCount = 0; + for (uint16_t i = 0; i < BuffersCount; i++) { + struct rte_mbuf* Buffer = (struct rte_mbuf*)Buffers[i]; + Packet.Buffer = NULL; + if ((Buffer->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) == 0) { + CxPlatDpRawParseEthernet( + (CXPLAT_DATAPATH*)Dpdk, + (CXPLAT_RECV_DATA*)&Packet, + ((uint8_t*)Buffer->buf_addr) + Buffer->data_off, + Buffer->pkt_len); + } else { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Buffer->ol_flags, + "L3/L4 checksum incorrect"); + CXPLAT_DBG_ASSERT( + Interface->OffloadStatus.Receive.NetworkLayerXsum != 0 || + Interface->OffloadStatus.Receive.TransportLayerXsum != 0); + } + + DPDK_RX_PACKET* NewPacket; + if (likely(Packet.Buffer && (NewPacket = CxPlatPoolAlloc(&Dpdk->AdditionalInfoPool)) != NULL)) { + CxPlatCopyMemory(NewPacket, &Packet, sizeof(DPDK_RX_PACKET)); + NewPacket->Allocated = TRUE; + NewPacket->Mbuf = Buffer; + NewPacket->OwnerPool = &Dpdk->AdditionalInfoPool; + NewPacket->Route = &NewPacket->RouteStorage; + Buffers[PacketCount++] = NewPacket; + } else { + rte_pktmbuf_free(Buffer); + } + } + + if (likely(PacketCount)) { + CxPlatDpRawRxEthernet((CXPLAT_DATAPATH*)Dpdk, (CXPLAT_RECV_DATA**)Buffers, PacketCount); + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawRxFree( + _In_opt_ const CXPLAT_RECV_DATA* PacketChain + ) +{ + while (PacketChain) { + const DPDK_RX_PACKET* Packet = (DPDK_RX_PACKET*)PacketChain; + PacketChain = PacketChain->Next; + rte_pktmbuf_free(Packet->Mbuf); + CxPlatPoolFree(Packet->OwnerPool, (void*)Packet); + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +CXPLAT_SEND_DATA* +CxPlatDpRawTxAlloc( + _In_ CXPLAT_DATAPATH* Datapath, + _In_ CXPLAT_ECN_TYPE ECN, // unused currently + _In_ uint16_t MaxPacketSize, + _Inout_ CXPLAT_ROUTE* Route + ) +{ + DPDK_DATAPATH* Dpdk = (DPDK_DATAPATH*)Datapath; + DPDK_TX_PACKET* Packet = CxPlatPoolAlloc(&Dpdk->AdditionalInfoPool); + QUIC_ADDRESS_FAMILY Family = QuicAddrGetFamily(&Route->RemoteAddress); + DPDK_INTERFACE* Interface = (DPDK_INTERFACE*)Route->Queue; + + if (likely(Packet)) { + Packet->Interface = Interface; + Packet->Mbuf = rte_pktmbuf_alloc(Interface->MemoryPool); + if (likely(Packet->Mbuf)) { + HEADER_BACKFILL HeaderFill = CxPlatDpRawCalculateHeaderBackFill(Family); + Packet->Dpdk = Dpdk; + Packet->Buffer.Length = MaxPacketSize; + Packet->Mbuf->data_off = 0; + Packet->Buffer.Buffer = ((uint8_t*)Packet->Mbuf->buf_addr) + HeaderFill.AllLayer; + Packet->Mbuf->l2_len = HeaderFill.LinkLayer; + Packet->Mbuf->l3_len = HeaderFill.NetworkLayer; + } else { + CxPlatPoolFree(&Dpdk->AdditionalInfoPool, Packet); + Packet = NULL; + } + } + return (CXPLAT_SEND_DATA*)Packet; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxFree( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + DPDK_TX_PACKET* Packet = (DPDK_TX_PACKET*)SendData; + rte_pktmbuf_free(Packet->Mbuf); + CxPlatPoolFree(&Packet->Dpdk->AdditionalInfoPool, SendData); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxEnqueue( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + DPDK_TX_PACKET* Packet = (DPDK_TX_PACKET*)SendData; + DPDK_INTERFACE* Interface = Packet->Interface; + Packet->Mbuf->data_len = (uint16_t)Packet->Buffer.Length; + Packet->Mbuf->ol_flags = PKT_TX_IPV4 | PKT_TX_IP_CKSUM | PKT_TX_UDP_CKSUM; + + DPDK_DATAPATH* Dpdk = Packet->Dpdk; + if (unlikely(rte_ring_mp_enqueue(Interface->TxRingBuffer, Packet->Mbuf) != 0)) { + rte_pktmbuf_free(Packet->Mbuf); + QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No room in DPDK TX ring buffer"); + } + + CxPlatPoolFree(&Dpdk->AdditionalInfoPool, Packet); +} + +static +void +CxPlatDpdkTx( + _In_ DPDK_DATAPATH* Dpdk, + _In_ DPDK_INTERFACE* Interface + ) +{ + struct rte_mbuf* Buffers[TX_BURST_SIZE]; + const uint16_t BufferCount = + (uint16_t)rte_ring_sc_dequeue_burst( + Interface->TxRingBuffer, (void**)Buffers, TX_BURST_SIZE, NULL); + if (unlikely(BufferCount == 0)) { + return; + } + + const uint16_t TxCount = rte_eth_tx_burst(Interface->Port, 0, Buffers, BufferCount); + if (unlikely(TxCount < BufferCount)) { + for (uint16_t buf = TxCount; buf < BufferCount; buf++) { + rte_pktmbuf_free(Buffers[buf]); + } + } +} + +static +int +CxPlatDpdkWorkerThread( + _In_ void* Context + ) +{ + DPDK_DATAPATH* Dpdk = (DPDK_DATAPATH*)Context; + const uint16_t Core = (uint16_t)rte_lcore_id(); + CXPLAT_LIST_ENTRY* Entry; + + printf("Core %u worker running...\n", Core); + for (Entry = Dpdk->Interfaces.Flink; Entry != &Dpdk->Interfaces; Entry = Entry->Flink) { + if (rte_eth_dev_socket_id(Dpdk->Interface.Port) > 0 && + rte_eth_dev_socket_id(Dpdk->Interface.Port) != (int)rte_socket_id()) { + printf("\nWARNING, port %u is on remote NUMA node to polling thread.\n" + "\tPerformance will not be optimal.\n\n", + Dpdk->Interface.Port); + } + } + + while (likely(Dpdk->Running)) { + for (Entry = Dpdk->Interfaces.Flink; Entry != &Dpdk->Interfaces; Entry = Entry->Flink) { + DPDK_INTERFACE* Interface = CONTAINING_RECORD(Entry, DPDK_INTERFACE, Link); + CxPlatDpdkRx(Dpdk, Core, Interface); + CxPlatDpdkTx(Dpdk, Interface); + } + } + + return 0; +} diff --git a/src/platform/datapath_raw_socket.c b/src/platform/datapath_raw_socket.c new file mode 100644 index 000000000..2c1fa981a --- /dev/null +++ b/src/platform/datapath_raw_socket.c @@ -0,0 +1,874 @@ +/*++ + + Copyright (c) Microsoft Corporation. + Licensed under the MIT License. + +Abstract: + + QUIC raw datapath socket and IP framing abstractions + +--*/ + +#include "datapath_raw.h" +#ifdef QUIC_CLOG +#include "datapath_raw_socket.c.clog.h" +#endif + +#include + +#pragma warning(disable:4116) // unnamed type definition in parentheses +#pragma warning(disable:4100) // unreferenced formal parameter + +#ifdef _WIN32 +#define SocketError() WSAGetLastError() +#else +#define SocketError() errno +#endif // _WIN32 + +// +// Socket Pool Logic +// + +BOOLEAN +CxPlatSockPoolInitialize( + _Inout_ CXPLAT_SOCKET_POOL* Pool + ) +{ + if (!CxPlatHashtableInitializeEx(&Pool->Sockets, CXPLAT_HASH_MIN_SIZE)) { + return FALSE; + } +#ifdef _WIN32 + int WsaError; + WSADATA WsaData; + if ((WsaError = WSAStartup(MAKEWORD(2, 2), &WsaData)) != 0) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + WsaError, + "WSAStartup"); + CxPlatHashtableUninitialize(&Pool->Sockets); + return FALSE; + } +#endif // _WIN32 + CxPlatRwLockInitialize(&Pool->Lock); + return TRUE; +} + +void +CxPlatSockPoolUninitialize( + _Inout_ CXPLAT_SOCKET_POOL* Pool + ) +{ +#ifdef _WIN32 + (void)WSACleanup(); +#endif // _WIN32 + CxPlatRwLockUninitialize(&Pool->Lock); + CxPlatHashtableUninitialize(&Pool->Sockets); +} + +CXPLAT_SOCKET* +CxPlatGetSocket( + _In_ const CXPLAT_SOCKET_POOL* Pool, + _In_ const QUIC_ADDR* LocalAddress, + _In_ const QUIC_ADDR* RemoteAddress + ) +{ + CXPLAT_SOCKET* Socket = NULL; + CXPLAT_HASHTABLE_LOOKUP_CONTEXT Context; + CXPLAT_HASHTABLE_ENTRY* Entry; + CxPlatRwLockAcquireShared(&((CXPLAT_SOCKET_POOL*)Pool)->Lock); + Entry = CxPlatHashtableLookup(&Pool->Sockets, LocalAddress->Ipv4.sin_port, &Context); + while (Entry != NULL) { + CXPLAT_SOCKET* Temp = CONTAINING_RECORD(Entry, CXPLAT_SOCKET, Entry); + if (CxPlatSocketCompare(Temp, LocalAddress, RemoteAddress)) { + if (CxPlatRundownAcquire(&Temp->Rundown)) { + Socket = Temp; + } + break; + } + Entry = CxPlatHashtableLookupNext(&Pool->Sockets, &Context); + } + CxPlatRwLockReleaseShared(&((CXPLAT_SOCKET_POOL*)Pool)->Lock); + return Socket; +} + +BOOLEAN +CxPlatTryAddSocket( + _In_ CXPLAT_SOCKET_POOL* Pool, + _In_ CXPLAT_SOCKET* Socket + ) +{ + int Result; + BOOLEAN Success = FALSE; + CXPLAT_HASHTABLE_LOOKUP_CONTEXT Context; + CXPLAT_HASHTABLE_ENTRY* Entry; + QUIC_ADDR MappedAddress = {0}; + + // + // Get (and reserve) a transport layer port from the OS networking stack by + // binding an auxiliary (dual stack) socket. + // + + Socket->AuxSocket = + socket( + AF_INET6, + SOCK_DGRAM, + IPPROTO_UDP); + if (Socket->AuxSocket == INVALID_SOCKET) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "socket"); + goto Error; + } + + int Option = FALSE; + Result = + setsockopt( + Socket->AuxSocket, + IPPROTO_IPV6, + IPV6_V6ONLY, + (char*)&Option, + sizeof(Option)); + if (Result == SOCKET_ERROR) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "Set IPV6_V6ONLY"); + goto Error; + } + + CxPlatConvertToMappedV6(&Socket->LocalAddress, &MappedAddress); +#if QUIC_ADDRESS_FAMILY_INET6 != AF_INET6 + if (MappedAddress.Ipv6.sin6_family == QUIC_ADDRESS_FAMILY_INET6) { + MappedAddress.Ipv6.sin6_family = AF_INET6; + } +#endif + + CxPlatRwLockAcquireExclusive(&Pool->Lock); + + Result = + bind( + Socket->AuxSocket, + (struct sockaddr*)&MappedAddress, + sizeof(MappedAddress)); + if (Result == SOCKET_ERROR) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "bind"); + CxPlatRwLockReleaseExclusive(&Pool->Lock); + goto Error; + } + + if (Socket->Connected) { + CxPlatZeroMemory(&MappedAddress, sizeof(MappedAddress)); + CxPlatConvertToMappedV6(&Socket->RemoteAddress, &MappedAddress); + +#if QUIC_ADDRESS_FAMILY_INET6 != AF_INET6 + if (MappedAddress.Ipv6.sin6_family == QUIC_ADDRESS_FAMILY_INET6) { + MappedAddress.Ipv6.sin6_family = AF_INET6; + } +#endif + + Result = + connect( + Socket->AuxSocket, + (struct sockaddr*)&MappedAddress, + sizeof(MappedAddress)); + if (Result == SOCKET_ERROR) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "connect failed"); + CxPlatRwLockReleaseExclusive(&Pool->Lock); + goto Error; + } + } + + int AssignedLocalAddressLength = sizeof(Socket->LocalAddress); + Result = + getsockname( + Socket->AuxSocket, + (struct sockaddr*)&Socket->LocalAddress, + &AssignedLocalAddressLength); + if (Result == SOCKET_ERROR) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "getsockname"); + CxPlatRwLockReleaseExclusive(&Pool->Lock); + goto Error; + } + + CxPlatConvertFromMappedV6(&Socket->LocalAddress, &Socket->LocalAddress); + + Success = TRUE; + Entry = CxPlatHashtableLookup(&Pool->Sockets, Socket->LocalAddress.Ipv4.sin_port, &Context); + while (Entry != NULL) { + CXPLAT_SOCKET* Temp = CONTAINING_RECORD(Entry, CXPLAT_SOCKET, Entry); + if (CxPlatSocketCompare(Temp, &Socket->LocalAddress, &Socket->RemoteAddress)) { + Success = FALSE; + break; + } + Entry = CxPlatHashtableLookupNext(&Pool->Sockets, &Context); + } + if (Success) { + CxPlatHashtableInsert(&Pool->Sockets, &Socket->Entry, Socket->LocalAddress.Ipv4.sin_port, &Context); + } + + CxPlatRwLockReleaseExclusive(&Pool->Lock); + +Error: + + if (!Success && Socket->AuxSocket != INVALID_SOCKET) { + closesocket(Socket->AuxSocket); + } + + return Success; +} + +void +CxPlatRemoveSocket( + _In_ CXPLAT_SOCKET_POOL* Pool, + _In_ CXPLAT_SOCKET* Socket + ) +{ + CxPlatRwLockAcquireExclusive(&Pool->Lock); + CxPlatHashtableRemove(&Pool->Sockets, &Socket->Entry, NULL); + + if (closesocket(Socket->AuxSocket) == SOCKET_ERROR) { + int Error = SocketError(); + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Error, + "closesocket"); + } + + CxPlatRwLockReleaseExclusive(&Pool->Lock); +} + +void +CxPlatResolveRouteComplete( + _In_ QUIC_CONNECTION* Connection, + _Inout_ CXPLAT_ROUTE* Route, + _In_reads_bytes_(6) const uint8_t* PhysicalAddress, + _In_ uint8_t PathId + ) +{ + CxPlatCopyMemory(&Route->NextHopLinkLayerAddress, PhysicalAddress, sizeof(Route->NextHopLinkLayerAddress)); + Route->State = RouteResolved; + QuicTraceLogConnInfo( + RouteResolutionEnd, + Connection, + "Route resolution completed on Path[%hhu] with L2 address %hhu:%hhu:%hhu:%hhu:%hhu:%hhu", + PathId, + PhysicalAddress[0], + PhysicalAddress[1], + PhysicalAddress[2], + PhysicalAddress[3], + PhysicalAddress[4], + PhysicalAddress[5]); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatResolveRoute( + _In_ CXPLAT_SOCKET* Socket, + _Inout_ CXPLAT_ROUTE* Route, + _In_ uint8_t PathId, + _In_ void* Context, + _In_ CXPLAT_ROUTE_RESOLUTION_CALLBACK_HANDLER Callback + ) +{ +#ifdef _WIN32 + NETIO_STATUS Status = 0; + MIB_IPFORWARD_ROW2 IpforwardRow = {0}; + + CXPLAT_DBG_ASSERT(!QuicAddrIsWildCard(&Route->RemoteAddress)); + + // + // Find the best next hop IP address. + // + uint16_t SavedLocalPort = Route->LocalAddress.Ipv4.sin_port; + Status = + GetBestRoute2( + NULL, // InterfaceLuid + IFI_UNSPECIFIED, // InterfaceIndex + &Route->LocalAddress, // SourceAddress + &Route->RemoteAddress, // DestinationAddress + 0, // AddressSortOptions + &IpforwardRow, + &Route->LocalAddress); // BestSourceAddress + Route->LocalAddress.Ipv4.sin_port = SavedLocalPort; + if (Status != ERROR_SUCCESS) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Status, + "GetBestRoute2"); + goto Done; + } + + // + // Find the interface that matches the route we just looked up. + // + CXPLAT_LIST_ENTRY* Entry = Socket->Datapath->Interfaces.Flink; + for (; Entry != &Socket->Datapath->Interfaces; Entry = Entry->Flink) { + CXPLAT_INTERFACE* Interface = CONTAINING_RECORD(Entry, CXPLAT_INTERFACE, Link); + if (Interface->IfIndex == IpforwardRow.InterfaceIndex) { + CxPlatDpRawAssignQueue(Interface, Route); + break; + } + } + + if (Route->Queue == NULL) { + Status = QUIC_STATUS_NOT_FOUND; + QuicTraceEvent( + DatapathError, + "[data][%p] ERROR, %s.", + Socket, + "no matching interface/queue"); + goto Done; + } + + // + // Look up the source interface link-layer address. + // + MIB_IF_ROW2 IfRow = {0}; + IfRow.InterfaceLuid = IpforwardRow.InterfaceLuid; + Status = GetIfEntry2(&IfRow); + if (Status != ERROR_SUCCESS) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Socket, + Status, + "GetIfEntry2"); + goto Done; + } + CXPLAT_DBG_ASSERT(IfRow.PhysicalAddressLength == sizeof(Route->LocalLinkLayerAddress)); + CxPlatCopyMemory(&Route->LocalLinkLayerAddress, IfRow.PhysicalAddress, sizeof(Route->LocalLinkLayerAddress)); + + // + // Map the next hop IP address to a link-layer address. + // + MIB_IPNET_ROW2 IpnetRow = {0}; + IpnetRow.InterfaceLuid = IpforwardRow.InterfaceLuid; + if (QuicAddrIsWildCard(&IpforwardRow.NextHop)) { // On-link? + IpnetRow.Address = Route->RemoteAddress; + } else { + IpnetRow.Address = IpforwardRow.NextHop; + } + + // + // First call GetIpNetEntry2 to see if there's already a cached neighbor. If there + // isn't one, or if the cached neighbor's state is unreachable (which, NB, can happen + // in the case where a route lookup resulted in a dummy neighbor entry being created + // in TCPIP.sys) or incomplete, then queue up a solicitation event. + // + Status = GetIpNetEntry2(&IpnetRow); + QuicTraceLogConnInfo( + RouteResolutionStart, + Context, + "Starting to look up neighbor on Path[%hhu] with status %u", + PathId, + Status); + if (Status != ERROR_SUCCESS || IpnetRow.State <= NlnsIncomplete) { + CXPLAT_ROUTE_RESOLUTION_WORKER* Worker = Socket->Datapath->RouteResolutionWorker; + CXPLAT_ROUTE_RESOLUTION_OPERATION* Operation = CxPlatPoolAlloc(&Worker->OperationPool); + if (Operation == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "CXPLAT_DATAPATH", + sizeof(CXPLAT_ROUTE_RESOLUTION_OPERATION)); + Status = ERROR_NOT_ENOUGH_MEMORY; + goto Done; + } + Operation->IpnetRow = IpnetRow; + Operation->Context = Context; + Operation->Callback = Callback; + Operation->PathId = PathId; + CxPlatDispatchLockAcquire(&Worker->Lock); + CxPlatListInsertTail(&Worker->Operations, &Operation->WorkerLink); + CxPlatDispatchLockRelease(&Worker->Lock); + CxPlatEventSet(Worker->Ready); + Status = ERROR_IO_PENDING; + } else { + CxPlatResolveRouteComplete(Context, Route, IpnetRow.PhysicalAddress, PathId); + } + +Done: + if (Status != ERROR_IO_PENDING && Status != ERROR_SUCCESS) { + Callback(Context, NULL, PathId, FALSE); + } + + return HRESULT_FROM_WIN32(Status); +#else // _WIN32 + return QUIC_STATUS_NOT_SUPPORTED; +#endif // _WIN32 +} + +// +// Ethernet / IP Framing Logic +// + +#pragma pack(push) +#pragma pack(1) + +typedef struct ETHERNET_HEADER { + uint8_t Destination[6]; + uint8_t Source[6]; + uint16_t Type; + uint8_t Data[0]; +} ETHERNET_HEADER; + +typedef struct IPV4_HEADER { + uint8_t VersionAndHeaderLength; + uint8_t TypeOfServiceAndEcnField; + uint16_t TotalLength; + uint16_t Identification; + uint16_t FlagsAndFragmentOffset; + uint8_t TimeToLive; + uint8_t Protocol; + uint16_t HeaderChecksum; + uint8_t Source[4]; + uint8_t Destination[4]; + uint8_t Data[0]; +} IPV4_HEADER; + +typedef struct IPV6_HEADER { + uint32_t VersionClassEcnFlow; + uint16_t PayloadLength; + uint8_t NextHeader; + uint8_t HopLimit; + uint8_t Source[16]; + uint8_t Destination[16]; + uint8_t Data[0]; +} IPV6_HEADER; + +typedef struct IPV6_EXTENSION { + uint8_t NextHeader; + uint8_t Length; + uint16_t Reserved0; + uint32_t Reserved1; + uint8_t Data[0]; +} IPV6_EXTENSION; + +typedef struct UDP_HEADER { + uint16_t SourcePort; + uint16_t DestinationPort; + uint16_t Length; + uint16_t Checksum; + uint8_t Data[0]; +} UDP_HEADER; + +#pragma pack(pop) + +// +// Constants for headers in wire format. +// +#define IPV4_VERSION 4 +#define IPV6_VERSION 6 +#define IPV4_VERSION_BYTE (IPV4_VERSION << 4) +#define IPV4_DEFAULT_VERHLEN ((IPV4_VERSION_BYTE) | (sizeof(IPV4_HEADER) / sizeof(uint32_t))) + +#define IP_DEFAULT_HOP_LIMIT 128 + +#define ETHERNET_TYPE_IPV4 0x0008 +#define ETHERNET_TYPE_IPV6 0xdd86 + +_IRQL_requires_max_(DISPATCH_LEVEL) +static +void +CxPlatDpRawParseUdp( + _In_ const CXPLAT_DATAPATH* Datapath, + _Inout_ CXPLAT_RECV_DATA* Packet, + _In_reads_bytes_(Length) + const UDP_HEADER* Udp, + _In_ uint16_t Length + ) +{ + if (Length < sizeof(UDP_HEADER)) { + return; + } + Length -= sizeof(UDP_HEADER); + Packet->Reserved = L4_TYPE_UDP; + + Packet->Route->RemoteAddress.Ipv4.sin_port = Udp->SourcePort; + Packet->Route->LocalAddress.Ipv4.sin_port = Udp->DestinationPort; + + Packet->Buffer = (uint8_t*)Udp->Data; + Packet->BufferLength = Length; + + //const uint32_t Hash = CxPlatHashSimple(sizeof(*Packet->Route), (uint8_t*)Packet->Route); + const uint32_t Hash = Udp->SourcePort + Udp->DestinationPort; + Packet->PartitionIndex = Datapath->CpuTable[Hash % Datapath->CpuTableSize]; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +static +void +CxPlatDpRawParseIPv4( + _In_ const CXPLAT_DATAPATH* Datapath, + _Inout_ CXPLAT_RECV_DATA* Packet, + _In_reads_bytes_(Length) + const IPV4_HEADER* IP, + _In_ uint16_t Length + ) +{ + if (Length < sizeof(IPV4_HEADER)) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + Length, + "packet is too small for an IPv4 header"); + return; + } + + if (IP->VersionAndHeaderLength != IPV4_DEFAULT_VERHLEN) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + IP->VersionAndHeaderLength, + "unexpected IPv4 header length and version"); + return; + } + + if (IP->Protocol == IPPROTO_UDP) { + uint16_t IPTotalLength; + IPTotalLength = CxPlatByteSwapUint16(IP->TotalLength); + + if (Length != IPTotalLength) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + Length, + "unexpected IPv4 packet size"); + return; + } + + Packet->Route->RemoteAddress.Ipv4.sin_family = AF_INET; + CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv4.sin_addr, IP->Source, sizeof(IP->Source)); + Packet->Route->LocalAddress.Ipv4.sin_family = AF_INET; + CxPlatCopyMemory(&Packet->Route->LocalAddress.Ipv4.sin_addr, IP->Destination, sizeof(IP->Destination)); + CxPlatDpRawParseUdp(Datapath, Packet, (UDP_HEADER*)IP->Data, IPTotalLength - sizeof(IPV4_HEADER)); + } else { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + IP->Protocol, + "unacceptable v4 transport"); + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +static +void +CxPlatDpRawParseIPv6( + _In_ const CXPLAT_DATAPATH* Datapath, + _Inout_ CXPLAT_RECV_DATA* Packet, + _In_reads_bytes_(Length) + const IPV6_HEADER* IP, + _In_ uint16_t Length + ) +{ + + if (Length < sizeof(IPV6_HEADER)) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + Length, + "packet is too small for an IPv6 header"); + return; + } + + if (IP->NextHeader == IPPROTO_UDP) { + uint16_t IPPayloadLength; + IPPayloadLength = CxPlatByteSwapUint16(IP->PayloadLength); + if (IPPayloadLength != Length - sizeof(IPV6_HEADER)) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + IPPayloadLength, + "incorrect IP payload length"); + return; + } + + Packet->Route->RemoteAddress.Ipv6.sin6_family = AF_INET6; + CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv6.sin6_addr, IP->Source, sizeof(IP->Source)); + Packet->Route->LocalAddress.Ipv6.sin6_family = AF_INET6; + CxPlatCopyMemory(&Packet->Route->LocalAddress.Ipv6.sin6_addr, IP->Destination, sizeof(IP->Destination)); + CxPlatDpRawParseUdp(Datapath, Packet, (UDP_HEADER*)IP->Data, IPPayloadLength); + } else { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + IP->NextHeader, + "unacceptable v6 transport"); + } +} + +BOOLEAN IsEthernetBroadcast(_In_reads_(6) const uint8_t Address[6]) +{ + return (Address[0] == 0xFF) && (Address[1] == 0xFF) && (Address[2] == 0xFF) && (Address[3] == 0xFF) && (Address[4] == 0xFF) && (Address[5] == 0xFF); +} + +BOOLEAN IsEthernetMulticast(_In_reads_(6) const uint8_t Address[6]) +{ + return (Address[0] & 0x01) == 0x01; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawParseEthernet( + _In_ const CXPLAT_DATAPATH* Datapath, + _Inout_ CXPLAT_RECV_DATA* Packet, + _In_reads_bytes_(Length) + const uint8_t* Payload, + _In_ uint16_t Length + ) +{ + if (Length < sizeof(ETHERNET_HEADER)) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + Length, + "packet is too small for an ethernet header"); + return; + } + + Length -= sizeof(ETHERNET_HEADER); + + const ETHERNET_HEADER* Ethernet = (const ETHERNET_HEADER*)Payload; + + if (IsEthernetBroadcast(Ethernet->Destination) || IsEthernetMulticast(Ethernet->Destination)) { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + 0, + "not a unicast packet"); + return; + } + + uint16_t EthernetType = Ethernet->Type; + if (EthernetType == ETHERNET_TYPE_IPV4) { + CxPlatDpRawParseIPv4(Datapath, Packet, (IPV4_HEADER*)Ethernet->Data, Length); + } else if (EthernetType == ETHERNET_TYPE_IPV6) { + CxPlatDpRawParseIPv6(Datapath, Packet, (IPV6_HEADER*)Ethernet->Data, Length); + } else { + QuicTraceEvent( + DatapathErrorStatus, + "[data][%p] ERROR, %u, %s.", + Datapath, + EthernetType, + "unacceptable ethernet type"); + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +HEADER_BACKFILL +CxPlatDpRawCalculateHeaderBackFill( + _In_ QUIC_ADDRESS_FAMILY Family + ) +{ + HEADER_BACKFILL HeaderBackFill; + HeaderBackFill.TransportLayer = sizeof(UDP_HEADER); + HeaderBackFill.NetworkLayer = + Family == QUIC_ADDRESS_FAMILY_INET ? sizeof(IPV4_HEADER) : sizeof(IPV6_HEADER); + HeaderBackFill.LinkLayer = sizeof(ETHERNET_HEADER); + HeaderBackFill.AllLayer = + HeaderBackFill.TransportLayer + HeaderBackFill.NetworkLayer + HeaderBackFill.LinkLayer; + return HeaderBackFill; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +uint16_t +CxPlatFramingChecksum( + _In_reads_(Length) uint8_t* Data, + _In_ uint32_t Length, + _In_ uint64_t InitialChecksum + ) +{ + // + // Add up all bytes in 3 steps: + // 1. Add the odd byte to the checksum if the length is odd. + // 2. If the length is divisible by 2 but not 4, add the last 2 bytes. + // 3. Sum up the rest as 32-bit words. + // + + if ((Length & 1) != 0) { + --Length; + InitialChecksum += Data[Length]; + } + + if ((Length & 2) != 0) { + Length -= 2; + InitialChecksum += *((uint16_t*)(&Data[Length])); + } + + for (uint32_t i = 0; i < Length; i += 4) { + InitialChecksum += *((uint32_t*)(&Data[i])); + } + + // + // Fold all carries into the final checksum. + // + while (InitialChecksum >> 16) { + InitialChecksum = (InitialChecksum & 0xffff) + (InitialChecksum >> 16); + } + + return (uint16_t)InitialChecksum; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +uint16_t +CxPlatFramingUdpChecksum( + _In_reads_(AddrLength) uint8_t* SrcAddr, + _In_reads_(AddrLength) uint8_t* DstAddr, + _In_ uint32_t AddrLength, + _In_ uint16_t NextHeader, + _In_reads_(IPPayloadLength) uint8_t* UDP, + _In_ uint32_t IPPayloadLength + ) +{ + uint64_t Checksum = + CxPlatFramingChecksum(SrcAddr, AddrLength, 0) + + CxPlatFramingChecksum(DstAddr, AddrLength, 0); + Checksum += CxPlatByteSwapUint16(NextHeader); + Checksum += CxPlatByteSwapUint16((uint16_t)IPPayloadLength); + + // + // Pseudoheader is always in 32-bit words. So, cross 16-bit boundary adjustment isn't needed. + // + return ~CxPlatFramingChecksum(UDP, IPPayloadLength, Checksum); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatFramingWriteHeaders( + _In_ const CXPLAT_SOCKET* Socket, + _In_ const CXPLAT_ROUTE* Route, + _Inout_ QUIC_BUFFER* Buffer, + _In_ BOOLEAN SkipNetworkLayerXsum, + _In_ BOOLEAN SkipTransportLayerXsum + ) +{ + UDP_HEADER* UDP = (UDP_HEADER*)(Buffer->Buffer - sizeof(UDP_HEADER)); + ETHERNET_HEADER* Ethernet; + uint16_t EthType; + uint16_t IpHeaderLen; + QUIC_ADDRESS_FAMILY Family = QuicAddrGetFamily(&Route->RemoteAddress); + + CXPLAT_DBG_ASSERT( + Family == QUIC_ADDRESS_FAMILY_INET || Family == QUIC_ADDRESS_FAMILY_INET6); + + // + // Fill UDP header. + // + UDP->DestinationPort = Route->RemoteAddress.Ipv4.sin_port; + UDP->SourcePort = Route->LocalAddress.Ipv4.sin_port; + UDP->Length = QuicNetByteSwapShort((uint16_t)Buffer->Length + sizeof(UDP_HEADER)); + UDP->Checksum = 0; + + // + // Fill IPv4/IPv6 header. + // + if (Family == QUIC_ADDRESS_FAMILY_INET) { + IPV4_HEADER* IPv4 = (IPV4_HEADER*)(((uint8_t*)UDP) - sizeof(IPV4_HEADER)); + IPv4->VersionAndHeaderLength = IPV4_DEFAULT_VERHLEN; + IPv4->TypeOfServiceAndEcnField = 0; + IPv4->TotalLength = htons(sizeof(IPV4_HEADER) + sizeof(UDP_HEADER) + (uint16_t)Buffer->Length); + IPv4->Identification = 0; + IPv4->FlagsAndFragmentOffset = 0; + IPv4->TimeToLive = IP_DEFAULT_HOP_LIMIT; + IPv4->Protocol = IPPROTO_UDP; + IPv4->HeaderChecksum = 0; + CxPlatCopyMemory(IPv4->Source, &Route->LocalAddress.Ipv4.sin_addr, sizeof(Route->LocalAddress.Ipv4.sin_addr)); + CxPlatCopyMemory(IPv4->Destination, &Route->RemoteAddress.Ipv4.sin_addr, sizeof(Route->RemoteAddress.Ipv4.sin_addr)); + IPv4->HeaderChecksum = SkipNetworkLayerXsum ? 0 : ~CxPlatFramingChecksum((uint8_t*)IPv4, sizeof(IPV4_HEADER), 0); + EthType = ETHERNET_TYPE_IPV4; + Ethernet = (ETHERNET_HEADER*)(((uint8_t*)IPv4) - sizeof(ETHERNET_HEADER)); + IpHeaderLen = sizeof(IPV4_HEADER); + if (!SkipTransportLayerXsum) { + UDP->Checksum = + CxPlatFramingUdpChecksum( + IPv4->Source, IPv4->Destination, + sizeof(Route->LocalAddress.Ipv4.sin_addr), IPPROTO_UDP, (uint8_t*)UDP, sizeof(UDP_HEADER) + Buffer->Length); + } + } else { + IPV6_HEADER* IPv6 = (IPV6_HEADER*)(((uint8_t*)UDP) - sizeof(IPV6_HEADER)); + // + // IPv6 Version, Traffic Class, ECN Field and Flow Label fields in host + // byte order. + // + union { + struct { + uint32_t Flow : 20; + uint32_t EcnField : 2; + uint32_t Class : 6; + uint32_t Version : 4; // Most significant bits. + }; + uint32_t Value; + } VersionClassEcnFlow = {0}; + + VersionClassEcnFlow.Version = IPV6_VERSION; + VersionClassEcnFlow.Class = 0; + VersionClassEcnFlow.EcnField = 0; // Not ECN capable currently. + VersionClassEcnFlow.Flow = (uint32_t)(uintptr_t)Socket; + + IPv6->VersionClassEcnFlow = CxPlatByteSwapUint32(VersionClassEcnFlow.Value); + IPv6->PayloadLength = htons(sizeof(UDP_HEADER) + (uint16_t)Buffer->Length); + IPv6->HopLimit = IP_DEFAULT_HOP_LIMIT; + IPv6->NextHeader = IPPROTO_UDP; + CxPlatCopyMemory(IPv6->Source, &Route->LocalAddress.Ipv6.sin6_addr, sizeof(Route->LocalAddress.Ipv6.sin6_addr)); + CxPlatCopyMemory(IPv6->Destination, &Route->RemoteAddress.Ipv6.sin6_addr, sizeof(Route->RemoteAddress.Ipv6.sin6_addr)); + EthType = ETHERNET_TYPE_IPV6; + Ethernet = (ETHERNET_HEADER*)(((uint8_t*)IPv6) - sizeof(ETHERNET_HEADER)); + IpHeaderLen = sizeof(IPV6_HEADER); + if (!SkipTransportLayerXsum) { + UDP->Checksum = + CxPlatFramingUdpChecksum( + IPv6->Source, IPv6->Destination, + sizeof(Route->LocalAddress.Ipv6.sin6_addr), IPPROTO_UDP, (uint8_t*)UDP, sizeof(UDP_HEADER) + Buffer->Length); + } + } + + // + // Fill Ethernet header. + // + Ethernet->Type = EthType; + CxPlatCopyMemory(Ethernet->Destination, Route->NextHopLinkLayerAddress, sizeof(Route->NextHopLinkLayerAddress)); + CxPlatCopyMemory(Ethernet->Source, Route->LocalLinkLayerAddress, sizeof(Route->LocalLinkLayerAddress)); + + Buffer->Length += sizeof(UDP_HEADER) + IpHeaderLen + sizeof(ETHERNET_HEADER); + Buffer->Buffer -= sizeof(UDP_HEADER) + IpHeaderLen + sizeof(ETHERNET_HEADER); +} diff --git a/src/platform/datapath_raw_xdp.c b/src/platform/datapath_raw_xdp.c new file mode 100644 index 000000000..06a902507 --- /dev/null +++ b/src/platform/datapath_raw_xdp.c @@ -0,0 +1,1403 @@ +/*++ + + Copyright (c) Microsoft Corporation. + Licensed under the MIT License. + +Abstract: + + QUIC XDP Datapath Implementation (User Mode) + +--*/ + +#define _CRT_SECURE_NO_WARNINGS 1 // TODO - Remove + +#include "datapath_raw.h" +#ifdef QUIC_CLOG +#include "datapath_raw_xdp.c.clog.h" +#endif + +#include +#include +#include +#include + +#define RX_BATCH_SIZE 16 +#define MAX_ETH_FRAME_SIZE 1514 + +#define ADAPTER_TAG 'ApdX' // XdpA +#define IF_TAG 'IpdX' // XdpI +#define QUEUE_TAG 'QpdX' // XdpQ +#define RULE_TAG 'UpdX' // XdpU +#define RX_BUFFER_TAG 'RpdX' // XdpR +#define TX_BUFFER_TAG 'TpdX' // XdpT + +typedef struct XDP_INTERFACE XDP_INTERFACE; + +typedef struct _XDP_QUEUE { + const XDP_INTERFACE* Interface; + uint8_t* RxBuffers; + HANDLE RxXsk; + XSK_RING RxFillRing; + XSK_RING RxRing; + HANDLE RxProgram; + uint8_t* TxBuffers; + HANDLE TxXsk; + XSK_RING TxRing; + XSK_RING TxCompletionRing; + BOOL Error; + + CXPLAT_LIST_ENTRY WorkerTxQueue; + CXPLAT_SLIST_ENTRY WorkerRxPool; + + // Move contended buffer pools to their own cache lines. + // TODO: Use better (more scalable) buffer algorithms. + DECLSPEC_CACHEALIGN SLIST_HEADER RxPool; + DECLSPEC_CACHEALIGN SLIST_HEADER TxPool; + + // Move TX queue to its own cache line. + DECLSPEC_CACHEALIGN + CXPLAT_LOCK TxLock; + CXPLAT_LIST_ENTRY TxQueue; +} XDP_QUEUE; + +typedef struct XDP_INTERFACE { + CXPLAT_INTERFACE; + uint8_t QueueCount; + uint8_t RuleCount; + CXPLAT_LOCK RuleLock; + XDP_RULE* Rules; + XDP_QUEUE* Queues; +} XDP_INTERFACE; + +typedef struct XDP_DATAPATH { + CXPLAT_DATAPATH; + + BOOLEAN Running; + HANDLE CompletionEvent; + + // Constants + DECLSPEC_CACHEALIGN + uint16_t DatapathCpuGroup; + // + // Currently, all XDP interfaces share the same config. + // + uint32_t RxBufferCount; + uint32_t RxRingSize; + uint32_t TxBufferCount; + uint32_t TxRingSize; + BOOLEAN TxAlwaysPoke; + BOOLEAN SkipXsum; +} XDP_DATAPATH; + +typedef struct DECLSPEC_ALIGN(MEMORY_ALLOCATION_ALIGNMENT) XDP_RX_PACKET { + CXPLAT_RECV_DATA; + CXPLAT_ROUTE RouteStorage; + XDP_QUEUE* Queue; + // Followed by: + // uint8_t ClientContext[...]; + // uint8_t FrameBuffer[MAX_ETH_FRAME_SIZE]; +} XDP_RX_PACKET; + +typedef struct DECLSPEC_ALIGN(MEMORY_ALLOCATION_ALIGNMENT) XDP_TX_PACKET { + CXPLAT_SEND_DATA; + XDP_QUEUE* Queue; + CXPLAT_LIST_ENTRY Link; + uint8_t FrameBuffer[MAX_ETH_FRAME_SIZE]; +} XDP_TX_PACKET; + +CXPLAT_RECV_DATA* +CxPlatDataPathRecvPacketToRecvData( + _In_ const CXPLAT_RECV_PACKET* const Context + ) +{ + return (CXPLAT_RECV_DATA*)(((uint8_t*)Context) - sizeof(XDP_RX_PACKET)); +} + +CXPLAT_RECV_PACKET* +CxPlatDataPathRecvDataToRecvPacket( + _In_ const CXPLAT_RECV_DATA* const Datagram + ) +{ + return (CXPLAT_RECV_PACKET*)(((uint8_t*)Datagram) + sizeof(XDP_RX_PACKET)); +} + +QUIC_STATUS +CxPlatGetInterfaceRssQueueCount( + _In_ uint32_t InterfaceIndex, + _Out_ uint8_t* Count + ) +{ + HRESULT hRes; + IWbemLocator *pLoc = NULL; + IEnumWbemClassObject *pEnum = NULL; + IWbemServices *pSvc = NULL; + DWORD ret = 0; + uint8_t cnt = 0; + NET_LUID if_luid = { 0 }; + WCHAR if_alias[256 + 1] = { 0 }; + + ret = ConvertInterfaceIndexToLuid(InterfaceIndex, &if_luid); + if (ret != NO_ERROR) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "ConvertInterfaceIndexToLuid"); + return HRESULT_FROM_WIN32(ret); + } + + ret = ConvertInterfaceLuidToAlias(&if_luid, if_alias, RTL_NUMBER_OF(if_alias)); + if (ret != NO_ERROR) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + ret, + "ConvertInterfaceLuidToAlias"); + return HRESULT_FROM_WIN32(ret); + } + + // Step 1: -------------------------------------------------- + // Initialize COM. ------------------------------------------ + hRes = CoInitializeEx(0, COINIT_MULTITHREADED); + if (FAILED(hRes)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "CoInitializeEx"); + return hRes; + } + + // Step 2: --------------------------------------------------- + // Obtain the initial locator to WMI ------------------------- + hRes = CoCreateInstance( + &CLSID_WbemLocator, + 0, + CLSCTX_INPROC_SERVER, + &IID_IWbemLocator, (LPVOID *) &pLoc); + if (FAILED(hRes)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "CoCreateInstance IWbemLocator"); + goto Cleanup; + } + + // Step 3: ----------------------------------------------------- + // Connect to WMI through the IWbemLocator::ConnectServer method + // Connect to the root\cimv2 namespace with + // the current user and obtain pointer pSvc + // to make IWbemServices calls. + BSTR Namespace = SysAllocString(L"ROOT\\STANDARDCIMV2"); + hRes = pLoc->lpVtbl->ConnectServer(pLoc, + Namespace, // Object path of WMI namespace + NULL, // User name. NULL = current user + NULL, // User password. NULL = current + 0, // Locale. NULL indicates current + 0, // Security flags. + 0, // Authority (for example, Kerberos) + 0, // Context object + &pSvc // pointer to IWbemServices proxy + ); + SysFreeString(Namespace); + if (FAILED(hRes)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "ConnectServer"); + goto Cleanup; + } + + // Step 4: -------------------------------------------------- + // Set security levels on the proxy ------------------------- + hRes = CoSetProxyBlanket( + (IUnknown*)pSvc, // Indicates the proxy to set + RPC_C_AUTHN_WINNT, // RPC_C_AUTHN_xxx + RPC_C_AUTHZ_NONE, // RPC_C_AUTHZ_xxx + NULL, // Server principal name + RPC_C_AUTHN_LEVEL_CALL, // RPC_C_AUTHN_LEVEL_xxx + RPC_C_IMP_LEVEL_IMPERSONATE, // RPC_C_IMP_LEVEL_xxx + NULL, // client identity + EOAC_NONE // proxy capabilities + ); + if (FAILED(hRes)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "CoSetProxyBlanket"); + goto Cleanup; + } + + // Step 5: -------------------------------------------------- + // Use the IWbemServices pointer to make requests of WMI ---- + wchar_t query[512] = { '\0' }; + (void)wcscat_s(query, 512, L"SELECT * FROM MSFT_NetAdapterRssSettingData WHERE Name='"); + (void)wcscat_s(query, 512, if_alias); + (void)wcscat_s(query, 512, L"'"); + //AF_XDP_LOG(INFO, "WMI query = \"%ws\"\n", query); + + BSTR Language = SysAllocString(L"WQL"); + BSTR Query = SysAllocString(query); + hRes = pSvc->lpVtbl->ExecQuery(pSvc, + Language, + Query, + WBEM_FLAG_FORWARD_ONLY, // Flags + 0, // Context + &pEnum + ); + SysFreeString(Query); + SysFreeString(Language); + if (FAILED(hRes)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "ExecQuery"); + goto Cleanup; + } + + // Step 6: ------------------------------------------------- + // Get the data from the query in step 6 ------------------- + IWbemClassObject *pclsObj = NULL; + ULONG uReturn = 0; + while (pEnum) { + HRESULT hr = pEnum->lpVtbl->Next(pEnum, WBEM_INFINITE, 1, + &pclsObj, &uReturn); + + if (0 == uReturn) { + break; + } + + VARIANT vtProp; + + // Get the value of the IndirectionTable property + hr = pclsObj->lpVtbl->Get(pclsObj, L"IndirectionTable", 0, &vtProp, 0, 0); + if ((vtProp.vt == VT_NULL) || (vtProp.vt == VT_EMPTY)) { + //AF_XDP_LOG(INFO, "No RSS indirection table, assuming 1 default queue\n"); + cnt++; + CXPLAT_FRE_ASSERT(cnt != 0); + } else if ((vtProp.vt & VT_ARRAY) == 0) { + //AF_XDP_LOG(ERR, "not ARRAY\n"); + } else { + long lLower, lUpper; + SAFEARRAY *pSafeArray = vtProp.parray; + UINT8 *rssIndicesBitset = NULL; + DWORD rssIndicesBitsetBytes; + DWORD numberOfProcs; + + SafeArrayGetLBound(pSafeArray, 1, &lLower); + SafeArrayGetUBound(pSafeArray, 1, &lUpper); + + IUnknown** rawArray; + SafeArrayAccessData(pSafeArray, (void**)&rawArray); + + // Set up the RSS bitset according to number of processors + numberOfProcs = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + rssIndicesBitsetBytes = (numberOfProcs / 8) + 1; + rssIndicesBitset = malloc(rssIndicesBitsetBytes); + memset(rssIndicesBitset, 0, rssIndicesBitsetBytes); + + for (long i = lLower; i <= lUpper; i++) + { + IUnknown* pIUnk = rawArray[i]; + IWbemClassObject *obj = NULL; + pIUnk->lpVtbl->QueryInterface(pIUnk, &IID_IWbemClassObject, (void **)&obj); + if (obj == NULL) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + hRes, + "QueryInterface"); + //AF_XDP_LOG(ERR, "QueryInterface failed\n"); + free(rssIndicesBitset); + goto Cleanup; + } + + hr = obj->lpVtbl->Get(obj, L"ProcessorNumber", 0, &vtProp, 0, 0); + UINT32 index = vtProp.iVal / (sizeof(rssIndicesBitset[0])*8); + UINT32 offset = vtProp.iVal % (sizeof(rssIndicesBitset[0])*8); + rssIndicesBitset[index] |= 1 << offset; + + VariantClear(&vtProp); + obj->lpVtbl->Release(obj); + } + + SafeArrayUnaccessData(pSafeArray); + + for (DWORD i = 0; i < numberOfProcs/(sizeof(rssIndicesBitset[0])*8) + 1; i++) { + for (SIZE_T j = 0; j < sizeof(rssIndicesBitset[0])*8; j++) { + if (rssIndicesBitset[i] & (1 << j)) { + //AF_XDP_LOG(INFO, "detected active RSS queue %u on proc index %llu\n", cnt, (i*8 + j)); + cnt++; + CXPLAT_FRE_ASSERT(cnt != 0); + } + } + } + + free(rssIndicesBitset); + } + + VariantClear(&vtProp); + pclsObj->lpVtbl->Release(pclsObj); + } + + //AF_XDP_LOG(INFO, "counted %u active queues on %s\n", cnt, if_name); + *Count = cnt; + +Cleanup: + + if (pEnum != NULL) { + pEnum->lpVtbl->Release(pEnum); + } + if (pSvc != NULL) { + pSvc->lpVtbl->Release(pSvc); + } + if (pLoc != NULL) { + pLoc->lpVtbl->Release(pLoc); + } + CoUninitialize(); + + return hRes; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatXdpReadConfig( + _Inout_ XDP_DATAPATH* Xdp + ) +{ + // Default config + Xdp->RxBufferCount = 4096; + Xdp->RxRingSize = 128; + Xdp->TxBufferCount = 4096; + Xdp->TxRingSize = 128; + Xdp->TxAlwaysPoke = FALSE; + Xdp->Cpu = (uint16_t)(CxPlatProcMaxCount() - 1); + + FILE *File = fopen("xdp.ini", "r"); + if (File == NULL) { + return; + } + + char Line[256]; + while (fgets(Line, sizeof(Line), File) != NULL) { + char* Value = strchr(Line, '='); + if (Value == NULL) { + continue; + } + *Value++ = '\0'; + if (Value[strlen(Value) - 1] == '\n') { + Value[strlen(Value) - 1] = '\0'; + } + + if (strcmp(Line, "CpuGroup") == 0) { + Xdp->DatapathCpuGroup = (uint16_t)strtoul(Value, NULL, 10); + } else if (strcmp(Line, "CpuNumber") == 0) { + Xdp->Cpu = (uint16_t)strtoul(Value, NULL, 10); + } else if (strcmp(Line, "RxBufferCount") == 0) { + Xdp->RxBufferCount = strtoul(Value, NULL, 10); + } else if (strcmp(Line, "RxRingSize") == 0) { + Xdp->RxRingSize = strtoul(Value, NULL, 10); + } else if (strcmp(Line, "TxBufferCount") == 0) { + Xdp->TxBufferCount = strtoul(Value, NULL, 10); + } else if (strcmp(Line, "TxRingSize") == 0) { + Xdp->TxRingSize = strtoul(Value, NULL, 10); + } else if (strcmp(Line, "TxAlwaysPoke") == 0) { + Xdp->TxAlwaysPoke = !!strtoul(Value, NULL, 10); + } else if (strcmp(Line, "SkipXsum") == 0) { + BOOLEAN State = !!strtoul(Value, NULL, 10); + Xdp->SkipXsum = State; + printf("SkipXsum: %u\n", State); + } + } + + fclose(File); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +size_t +CxPlatDpRawGetDapathSize( + void + ) +{ + return sizeof(XDP_DATAPATH); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawInterfaceUninitialize( + _Inout_ XDP_INTERFACE* Interface + ) +{ + #pragma warning(push) + #pragma warning(disable:6001) // Using uninitialized memory + + for (uint32_t i = 0; Interface->Queues != NULL && i < Interface->QueueCount; i++) { + XDP_QUEUE *Queue = &Interface->Queues[i]; + + if (Queue->TxXsk != NULL) { +#if DEBUG + QUIC_STATUS Status; + XSK_STATISTICS Stats; + uint32_t StatsSize = sizeof(Stats); + Status = XskGetSockopt(Queue->TxXsk, XSK_SOCKOPT_STATISTICS, &Stats, &StatsSize); + if (QUIC_SUCCEEDED(Status)) { + printf("[%u-%u]txInvalidDescriptors: %llu\n", Interface->IfIndex, i, Stats.txInvalidDescriptors); + } +#endif + CloseHandle(Queue->TxXsk); + } + + if (Queue->TxBuffers != NULL) { + CxPlatFree(Queue->TxBuffers, TX_BUFFER_TAG); + } + + if (Queue->RxProgram != NULL) { + CloseHandle(Queue->RxProgram); + } + + if (Queue->RxXsk != NULL) { +#if DEBUG + QUIC_STATUS Status; + XSK_STATISTICS Stats; + uint32_t StatsSize = sizeof(Stats); + Status = XskGetSockopt(Queue->RxXsk, XSK_SOCKOPT_STATISTICS, &Stats, &StatsSize); + if (QUIC_SUCCEEDED(Status)) { + printf("[%u-%u]rxDropped: %llu\n", Interface->IfIndex, i, Stats.rxDropped); + printf("[%u-%u]rxInvalidDescriptors: %llu\n", Interface->IfIndex, i, Stats.rxInvalidDescriptors); + } +#endif + CloseHandle(Queue->RxXsk); + } + + if (Queue->RxBuffers != NULL) { + CxPlatFree(Queue->RxBuffers, RX_BUFFER_TAG); + } + + CxPlatLockUninitialize(&Queue->TxLock); + } + + if (Interface->Queues != NULL) { + CxPlatFree(Interface->Queues, QUEUE_TAG); + } + + if (Interface->Rules != NULL) { + CxPlatFree(Interface->Rules, RULE_TAG); + } + + CxPlatLockUninitialize(&Interface->RuleLock); + + #pragma warning(pop) +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDpRawInterfaceInitialize( + _In_ XDP_DATAPATH* Xdp, + _Inout_ XDP_INTERFACE* Interface, + _In_ uint32_t ClientRecvContextLength + ) +{ + const uint32_t RxHeadroom = sizeof(XDP_RX_PACKET) + ALIGN_UP(ClientRecvContextLength, uint32_t); + const uint32_t RxPacketSize = ALIGN_UP(RxHeadroom + MAX_ETH_FRAME_SIZE, XDP_RX_PACKET); + QUIC_STATUS Status; + + CxPlatLockInitialize(&Interface->RuleLock); + Interface->OffloadStatus.Receive.NetworkLayerXsum = Xdp->SkipXsum; + Interface->OffloadStatus.Receive.TransportLayerXsum = Xdp->SkipXsum; + Interface->OffloadStatus.Transmit.NetworkLayerXsum = Xdp->SkipXsum; + Interface->OffloadStatus.Transmit.NetworkLayerXsum = Xdp->SkipXsum; + + Status = CxPlatGetInterfaceRssQueueCount(Interface->IfIndex, &Interface->QueueCount); + if (QUIC_FAILED(Status)) { + goto Error; + } + + Interface->Queues = CxPlatAlloc(Interface->QueueCount * sizeof(*Interface->Queues), QUEUE_TAG); + if (Interface->Queues == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP Queues", + Interface->QueueCount * sizeof(*Interface->Queues)); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + CxPlatZeroMemory(Interface->Queues, Interface->QueueCount * sizeof(*Interface->Queues)); + + for (uint32_t i = 0; i < Interface->QueueCount; i++) { + XDP_QUEUE* Queue = &Interface->Queues[i]; + + Queue->Interface = Interface; + InitializeSListHead(&Queue->RxPool); + InitializeSListHead(&Queue->TxPool); + CxPlatLockInitialize(&Queue->TxLock); + CxPlatListInitializeHead(&Queue->TxQueue); + CxPlatListInitializeHead(&Queue->WorkerTxQueue); + + // + // RX datapath. + // + + Queue->RxBuffers = CxPlatAlloc(Xdp->RxBufferCount * RxPacketSize, RX_BUFFER_TAG); + if (Queue->RxBuffers == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP RX Buffers", + Xdp->RxBufferCount * RxPacketSize); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + Status = XskCreate(&Queue->RxXsk); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskCreate"); + goto Error; + } + + XSK_UMEM_REG RxUmem = {0}; + RxUmem.address = Queue->RxBuffers; + RxUmem.chunkSize = RxPacketSize; + RxUmem.headroom = RxHeadroom; + RxUmem.totalSize = Xdp->RxBufferCount * RxPacketSize; + + Status = XskSetSockopt(Queue->RxXsk, XSK_SOCKOPT_UMEM_REG, &RxUmem, sizeof(RxUmem)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_UMEM_REG)"); + goto Error; + } + + Status = + XskSetSockopt( + Queue->RxXsk, XSK_SOCKOPT_RX_FILL_RING_SIZE, &Xdp->RxRingSize, + sizeof(Xdp->RxRingSize)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_RX_FILL_RING_SIZE)"); + goto Error; + } + + Status = + XskSetSockopt( + Queue->RxXsk, XSK_SOCKOPT_RX_RING_SIZE, &Xdp->RxRingSize, sizeof(Xdp->RxRingSize)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_RX_RING_SIZE)"); + goto Error; + } + + uint32_t Flags = 0; // TODO: support native/generic forced flags. + Status = XskBind(Queue->RxXsk, Interface->IfIndex, i, Flags, NULL); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskBind"); + goto Error; + } + + XSK_RING_INFO_SET RxRingInfo; + uint32_t RxRingInfoSize = sizeof(RxRingInfo); + Status = XskGetSockopt(Queue->RxXsk, XSK_SOCKOPT_RING_INFO, &RxRingInfo, &RxRingInfoSize); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskGetSockopt(XSK_SOCKOPT_RING_INFO)"); + goto Error; + } + + XskRingInitialize(&Queue->RxFillRing, &RxRingInfo.fill); + XskRingInitialize(&Queue->RxRing, &RxRingInfo.rx); + + for (uint32_t j = 0; j < Xdp->RxBufferCount; j++) { + InterlockedPushEntrySList( + &Queue->RxPool, (PSLIST_ENTRY)&Queue->RxBuffers[j * RxPacketSize]); + } + + // + // TX datapath. + // + + Queue->TxBuffers = CxPlatAlloc(Xdp->TxBufferCount * sizeof(XDP_TX_PACKET), TX_BUFFER_TAG); + if (Queue->TxBuffers == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP TX Buffers", + Xdp->TxBufferCount * sizeof(XDP_TX_PACKET)); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + Status = XskCreate(&Queue->TxXsk); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskCreate"); + goto Error; + } + + XSK_UMEM_REG TxUmem = {0}; + TxUmem.address = Queue->TxBuffers; + TxUmem.chunkSize = sizeof(XDP_TX_PACKET); + TxUmem.headroom = FIELD_OFFSET(XDP_TX_PACKET, FrameBuffer); + TxUmem.totalSize = Xdp->TxBufferCount * sizeof(XDP_TX_PACKET); + + Status = XskSetSockopt(Queue->TxXsk, XSK_SOCKOPT_UMEM_REG, &TxUmem, sizeof(TxUmem)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_UMEM_REG)"); + goto Error; + } + + Status = + XskSetSockopt( + Queue->TxXsk, XSK_SOCKOPT_TX_RING_SIZE, &Xdp->TxRingSize, sizeof(Xdp->TxRingSize)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_TX_RING_SIZE)"); + goto Error; + } + + Status = + XskSetSockopt( + Queue->TxXsk, XSK_SOCKOPT_TX_COMPLETION_RING_SIZE, &Xdp->TxRingSize, + sizeof(Xdp->TxRingSize)); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskSetSockopt(XSK_SOCKOPT_TX_COMPLETION_RING_SIZE)"); + goto Error; + } + + Flags = 0; // TODO: support native/generic forced flags. + Status = XskBind(Queue->TxXsk, Interface->IfIndex, i, Flags, NULL); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskBind"); + goto Error; + } + + XSK_RING_INFO_SET TxRingInfo; + uint32_t TxRingInfoSize = sizeof(TxRingInfo); + Status = XskGetSockopt(Queue->TxXsk, XSK_SOCKOPT_RING_INFO, &TxRingInfo, &TxRingInfoSize); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XskGetSockopt(XSK_SOCKOPT_RING_INFO)"); + goto Error; + } + + XskRingInitialize(&Queue->TxRing, &TxRingInfo.tx); + XskRingInitialize(&Queue->TxCompletionRing, &TxRingInfo.completion); + + for (uint32_t j = 0; j < Xdp->TxBufferCount; j++) { + InterlockedPushEntrySList( + &Queue->TxPool, (PSLIST_ENTRY)&Queue->TxBuffers[j * sizeof(XDP_TX_PACKET)]); + } + } + +Error: + if (QUIC_FAILED(Status)) { + CxPlatDpRawInterfaceUninitialize(Interface); + } + + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +_Requires_lock_held_(Interface->RuleLock) +void +CxPlatDpRawInterfaceUpdateRules( + _In_ XDP_INTERFACE* Interface + ) +{ + static const XDP_HOOK_ID RxHook = { + .Layer = XDP_HOOK_L2, + .Direction = XDP_HOOK_RX, + .SubLayer = XDP_HOOK_INSPECT, + }; + + const UINT32 Flags = 0; // TODO: support native/generic forced flags. + + for (uint32_t i = 0; i < Interface->QueueCount; i++) { + + XDP_QUEUE* Queue = &Interface->Queues[i]; + for (uint8_t j = 0; j < Interface->RuleCount; j++) { + Interface->Rules[j].Redirect.Target = Queue->RxXsk; + } + + HANDLE NewRxProgram; + QUIC_STATUS Status = + XdpCreateProgram( + Interface->IfIndex, + &RxHook, + i, + Flags, + Interface->Rules, + Interface->RuleCount, + &NewRxProgram); + if (QUIC_FAILED(Status)) { + // + // TODO - Figure out how to better handle failure and revert changes. + // This will likely require working with XDP to get an improved API; + // possibly to update all queues at once. + // + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "XdpCreateProgram"); + continue; + } + + if (Queue->RxProgram != NULL) { + CloseHandle(Queue->RxProgram); + } + + Queue->RxProgram = NewRxProgram; + } +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawInterfaceAddRule( + _In_ XDP_INTERFACE* Interface, + _In_ const XDP_RULE* NewRule + ) +{ +#pragma warning(push) +#pragma warning(disable:6386) // Buffer overrun while writing to 'NewRules' - FALSE POSITIVE + + CxPlatLockAcquire(&Interface->RuleLock); + // TODO - Don't always allocate a new array? + + if (Interface->RuleCount + 1 == 0) { + QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "No more room for rules"); + CxPlatLockRelease(&Interface->RuleLock); + return; + } + + const size_t OldSize = sizeof(XDP_RULE) * (size_t)Interface->RuleCount; + const size_t NewSize = sizeof(XDP_RULE) * ((size_t)Interface->RuleCount + 1); + + XDP_RULE* NewRules = CxPlatAlloc(NewSize, RULE_TAG); + if (NewRules == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP_RULE", + NewSize); + CxPlatLockRelease(&Interface->RuleLock); + return; + } + + if (Interface->RuleCount > 0) { + memcpy(NewRules, Interface->Rules, OldSize); + } + NewRules[Interface->RuleCount] = *NewRule; + Interface->RuleCount++; + + if (Interface->Rules != NULL) { + CxPlatFree(Interface->Rules, RULE_TAG); + } + Interface->Rules = NewRules; + + CxPlatDpRawInterfaceUpdateRules(Interface); + + CxPlatLockRelease(&Interface->RuleLock); + +#pragma warning(pop) +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawInterfaceRemoveRule( + _In_ XDP_INTERFACE* Interface, + _In_ const XDP_RULE* Rule + ) +{ + CxPlatLockAcquire(&Interface->RuleLock); + + for (uint8_t i = 0; i < Interface->RuleCount; i++) { + if (Interface->Rules[i].Match != Rule->Match) { + continue; + } + + if (Rule->Match == XDP_MATCH_UDP_DST) { + if (Rule->Pattern.Port != Interface->Rules[i].Pattern.Port) { + continue; + } + } else if (Rule->Match == XDP_MATCH_IPV4_UDP_TUPLE) { + if (Rule->Pattern.Tuple.DestinationPort != Interface->Rules[i].Pattern.Tuple.DestinationPort || + Rule->Pattern.Tuple.SourcePort != Interface->Rules[i].Pattern.Tuple.SourcePort || + memcmp(&Rule->Pattern.Tuple.DestinationAddress.Ipv4, &Interface->Rules[i].Pattern.Tuple.DestinationAddress.Ipv4, sizeof(IN_ADDR)) != 0 || + memcmp(&Rule->Pattern.Tuple.SourceAddress.Ipv4, &Interface->Rules[i].Pattern.Tuple.SourceAddress.Ipv4, sizeof(IN_ADDR)) != 0) { + continue; + } + } else if (Rule->Match == XDP_MATCH_IPV6_UDP_TUPLE) { + if (Rule->Pattern.Tuple.DestinationPort != Interface->Rules[i].Pattern.Tuple.DestinationPort || + Rule->Pattern.Tuple.SourcePort != Interface->Rules[i].Pattern.Tuple.SourcePort || + memcmp(&Rule->Pattern.Tuple.DestinationAddress.Ipv6, &Interface->Rules[i].Pattern.Tuple.DestinationAddress.Ipv6, sizeof(IN6_ADDR)) != 0 || + memcmp(&Rule->Pattern.Tuple.SourceAddress.Ipv6, &Interface->Rules[i].Pattern.Tuple.SourceAddress.Ipv6, sizeof(IN6_ADDR)) != 0) { + continue; + } + } else { + CXPLAT_FRE_ASSERT(FALSE); // Should not be possible! + } + + if (i < Interface->RuleCount - 1) { + memmove(&Interface->Rules[i], &Interface->Rules[i + 1], sizeof(XDP_RULE) * (Interface->RuleCount - i - 1)); + } + Interface->RuleCount--; + CxPlatDpRawInterfaceUpdateRules(Interface); + break; + } + + CxPlatLockRelease(&Interface->RuleLock); +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +QUIC_STATUS +CxPlatDpRawInitialize( + _Inout_ CXPLAT_DATAPATH* Datapath, + _In_ uint32_t ClientRecvContextLength + ) +{ + XDP_DATAPATH* Xdp = (XDP_DATAPATH*)Datapath; + QUIC_STATUS Status; + + CxPlatXdpReadConfig(Xdp); + CxPlatDpRawGenerateCpuTable(Datapath); + CxPlatListInitializeHead(&Xdp->Interfaces); + + PIP_ADAPTER_ADDRESSES Adapters = NULL; + ULONG Error; + ULONG AdaptersBufferSize = 15000; // 15 KB buffer for GAA to start with. + ULONG Iterations = 0; + ULONG flags = // skip info that we don't need. + GAA_FLAG_INCLUDE_PREFIX | + GAA_FLAG_SKIP_UNICAST | + GAA_FLAG_SKIP_ANYCAST | + GAA_FLAG_SKIP_MULTICAST | + GAA_FLAG_SKIP_DNS_SERVER | + GAA_FLAG_SKIP_DNS_INFO; + + do { + Adapters = (IP_ADAPTER_ADDRESSES*)CxPlatAlloc(AdaptersBufferSize, ADAPTER_TAG); + if (Adapters == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP interface", + AdaptersBufferSize); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + Error = + GetAdaptersAddresses(AF_UNSPEC, flags, NULL, Adapters, &AdaptersBufferSize); + if (Error == ERROR_BUFFER_OVERFLOW) { + CxPlatFree(Adapters, ADAPTER_TAG); + Adapters = NULL; + } else { + break; + } + + Iterations++; + } while ((Error == ERROR_BUFFER_OVERFLOW) && (Iterations < 3)); // retry up to 3 times. + + if (Error == NO_ERROR) { + for (PIP_ADAPTER_ADDRESSES Adapter = Adapters; Adapter != NULL; Adapter = Adapter->Next) { + if (Adapter->IfType == IF_TYPE_ETHERNET_CSMACD && + Adapter->OperStatus == IfOperStatusUp && + Adapter->PhysicalAddressLength == ETH_MAC_ADDR_LEN) { + XDP_INTERFACE* Interface = CxPlatAlloc(sizeof(XDP_INTERFACE), IF_TAG); + if (Interface == NULL) { + QuicTraceEvent( + AllocFailure, + "Allocation of '%s' failed. (%llu bytes)", + "XDP interface", + sizeof(*Interface)); + Status = QUIC_STATUS_OUT_OF_MEMORY; + goto Error; + } + + CxPlatZeroMemory(Interface, sizeof(*Interface)); + Interface->IfIndex = Adapter->IfIndex; + memcpy( + Interface->PhysicalAddress, Adapter->PhysicalAddress, + sizeof(Interface->PhysicalAddress)); + + Status = + CxPlatDpRawInterfaceInitialize( + Xdp, Interface, ClientRecvContextLength); + if (QUIC_FAILED(Status)) { + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatDpRawInterfaceInitialize"); + CxPlatFree(Interface, IF_TAG); + continue; + } +#if DEBUG + printf("Bound XDP to interface %u (%wS)\n", Adapter->IfIndex, Adapter->Description); +#endif + CxPlatListInsertTail(&Xdp->Interfaces, &Interface->Link); + } + } + } else { + Status = HRESULT_FROM_WIN32(Error); + QuicTraceEvent( + LibraryErrorStatus, + "[ lib] ERROR, %u, %s.", + Status, + "CxPlatThreadCreate"); + goto Error; + } + + if (CxPlatListIsEmpty(&Xdp->Interfaces)) { + QuicTraceEvent( + LibraryError, + "[ lib] ERROR, %s.", + "no XDP capable interface"); + Status = QUIC_STATUS_NOT_FOUND; + goto Error; + } + + Xdp->Running = TRUE; + CxPlatEventInitialize(&Xdp->CompletionEvent, TRUE, FALSE); + CxPlatWorkerRegisterDataPath(Xdp->Cpu, Xdp); + Status = QUIC_STATUS_SUCCESS; + +Error: + + if (QUIC_FAILED(Status)) { + CxPlatDpRawUninitialize(Datapath); + } + + return Status; +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawUninitialize( + _In_ CXPLAT_DATAPATH* Datapath + ) +{ + XDP_DATAPATH* Xdp = (XDP_DATAPATH*)Datapath; + + if (Xdp->Running) { + Xdp->Running = FALSE; + CxPlatEventWaitForever(Xdp->CompletionEvent); + CxPlatEventUninitialize(Xdp->CompletionEvent); + } + + while (!CxPlatListIsEmpty(&Xdp->Interfaces)) { + XDP_INTERFACE* Interface = + CONTAINING_RECORD(CxPlatListRemoveHead(&Xdp->Interfaces), XDP_INTERFACE, Link); + CxPlatDpRawInterfaceUninitialize(Interface); + CxPlatFree(Interface, IF_TAG); + } +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawPlumbRulesOnSocket( + _In_ CXPLAT_SOCKET* Socket, + _In_ BOOLEAN IsCreated + ) +{ + XDP_DATAPATH* Xdp = (XDP_DATAPATH*)Socket->Datapath; + + if (Socket->Wildcard) { + const XDP_RULE Rule = { + .Match = XDP_MATCH_UDP_DST, + .Pattern.Port = Socket->LocalAddress.Ipv4.sin_port, + .Action = XDP_PROGRAM_ACTION_REDIRECT, + .Redirect.TargetType = XDP_REDIRECT_TARGET_TYPE_XSK, + .Redirect.Target = NULL, + }; + + CXPLAT_LIST_ENTRY* Entry; + for (Entry = Xdp->Interfaces.Flink; Entry != &Xdp->Interfaces; Entry = Entry->Flink) { + XDP_INTERFACE* Interface = CONTAINING_RECORD(Entry, XDP_INTERFACE, Link); + if (IsCreated) { + CxPlatDpRawInterfaceAddRule(Interface, &Rule); + } else { + CxPlatDpRawInterfaceRemoveRule(Interface, &Rule); + } + } + + } else { + + XDP_RULE Rule = { + .Pattern.Tuple.SourcePort = Socket->RemoteAddress.Ipv4.sin_port, + .Pattern.Tuple.DestinationPort = Socket->LocalAddress.Ipv4.sin_port, + .Action = XDP_PROGRAM_ACTION_REDIRECT, + .Redirect.TargetType = XDP_REDIRECT_TARGET_TYPE_XSK, + .Redirect.Target = NULL, + }; + + if (Socket->LocalAddress.si_family == QUIC_ADDRESS_FAMILY_INET) { + Rule.Match = XDP_MATCH_IPV4_UDP_TUPLE; + Rule.Pattern.Tuple.SourceAddress.Ipv4 = Socket->RemoteAddress.Ipv4.sin_addr; + Rule.Pattern.Tuple.DestinationAddress.Ipv4 = Socket->LocalAddress.Ipv4.sin_addr; + } else { + Rule.Match = XDP_MATCH_IPV6_UDP_TUPLE; + Rule.Pattern.Tuple.SourceAddress.Ipv6 = Socket->RemoteAddress.Ipv6.sin6_addr; + Rule.Pattern.Tuple.DestinationAddress.Ipv6 = Socket->LocalAddress.Ipv6.sin6_addr; + } + + // + // TODO - Optimization: apply only to the correct interface. + // + + CXPLAT_LIST_ENTRY* Entry; + for (Entry = Xdp->Interfaces.Flink; Entry != &Xdp->Interfaces; Entry = Entry->Flink) { + XDP_INTERFACE* Interface = CONTAINING_RECORD(Entry, XDP_INTERFACE, Link); + if (IsCreated) { + CxPlatDpRawInterfaceAddRule(Interface, &Rule); + } else { + CxPlatDpRawInterfaceRemoveRule(Interface, &Rule); + } + } + } +} + +_IRQL_requires_max_(PASSIVE_LEVEL) +void +CxPlatDpRawAssignQueue( + _In_ const CXPLAT_INTERFACE* _Interface, + _Inout_ CXPLAT_ROUTE* Route + ) +{ + const XDP_INTERFACE* Interface = (const XDP_INTERFACE*)_Interface; + Route->Queue = &Interface->Queues[0]; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +const CXPLAT_INTERFACE* +CxPlatDpRawGetInterfaceFromQueue( + _In_ const void* Queue + ) +{ + return (const CXPLAT_INTERFACE*)((XDP_QUEUE*)Queue)->Interface; +} + +static +void +CxPlatXdpRx( + _In_ XDP_DATAPATH* Xdp, + _In_ uint8_t QueueId, + _In_ XDP_INTERFACE* Interface + ) +{ + CXPLAT_RECV_DATA* Buffers[RX_BATCH_SIZE]; + uint32_t RxIndex; + uint32_t FillIndex; + uint32_t ProdCount = 0; + uint32_t PacketCount = 0; + XDP_QUEUE* Queue = &Interface->Queues[QueueId]; + const uint32_t BuffersCount = XskRingConsumerReserve(&Queue->RxRing, RX_BATCH_SIZE, &RxIndex); + + for (uint32_t i = 0; i < BuffersCount; i++) { + XSK_BUFFER_DESCRIPTOR* Buffer = XskRingGetElement(&Queue->RxRing, RxIndex++); + XDP_RX_PACKET* Packet = + (XDP_RX_PACKET*)(Queue->RxBuffers + XskDescriptorGetAddress(Buffer->address)); + uint8_t* FrameBuffer = (uint8_t*)Packet + XskDescriptorGetOffset(Buffer->address); + + CxPlatZeroMemory(Packet, sizeof(XDP_RX_PACKET)); + Packet->Route = &Packet->RouteStorage; + Packet->RouteStorage.Queue = Queue; + + CxPlatDpRawParseEthernet( + (CXPLAT_DATAPATH*)Xdp, + (CXPLAT_RECV_DATA*)Packet, + FrameBuffer, + (uint16_t)Buffer->length); + + if (Packet->Buffer) { + Packet->Allocated = TRUE; + Packet->Queue = Queue; + Buffers[PacketCount++] = (CXPLAT_RECV_DATA*)Packet; + } else { + CxPlatListPushEntry(&Queue->WorkerRxPool, (CXPLAT_SLIST_ENTRY*)Packet); + } + } + + if (BuffersCount > 0) { + XskRingConsumerRelease(&Queue->RxRing, BuffersCount); + } + + uint32_t FillAvailable = XskRingProducerReserve(&Queue->RxFillRing, MAXUINT32, &FillIndex); + while (FillAvailable-- > 0) { + if (Queue->WorkerRxPool.Next == NULL) { + Queue->WorkerRxPool.Next = (CXPLAT_SLIST_ENTRY*)InterlockedFlushSList(&Queue->RxPool); + } + + XDP_RX_PACKET* Packet = (XDP_RX_PACKET*)CxPlatListPopEntry(&Queue->WorkerRxPool); + if (Packet == NULL) { + break; + } + + uint64_t* FillDesc = XskRingGetElement(&Queue->RxFillRing, FillIndex++); + *FillDesc = (uint8_t*)Packet - Queue->RxBuffers; + ProdCount++; + } + + if (ProdCount > 0) { + XskRingProducerSubmit(&Queue->RxFillRing, ProdCount); + } + + if (PacketCount > 0) { + CxPlatDpRawRxEthernet((CXPLAT_DATAPATH*)Xdp, Buffers, (uint16_t)PacketCount); + } + + if (XskRingError(&Queue->RxRing) && !Queue->Error) { + XSK_ERROR ErrorStatus; + QUIC_STATUS XskStatus; + uint32_t ErrorSize = sizeof(ErrorStatus); + XskStatus = XskGetSockopt(Queue->RxXsk, XSK_SOCKOPT_RX_ERROR, &ErrorStatus, &ErrorSize); + printf("RX ring error: 0x%x\n", SUCCEEDED(XskStatus) ? ErrorStatus : XskStatus); + Queue->Error = TRUE; + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawRxFree( + _In_opt_ const CXPLAT_RECV_DATA* PacketChain + ) +{ + uint32_t Count = 0; + SLIST_ENTRY* Head = NULL; + SLIST_ENTRY** Tail = &Head; + SLIST_HEADER* Pool = NULL; + + while (PacketChain) { + const XDP_RX_PACKET* Packet = (XDP_RX_PACKET*)PacketChain; + PacketChain = PacketChain->Next; + // Packet->Allocated = FALSE; (other data paths don't clear this flag?) + + if (Pool != &Packet->Queue->RxPool) { + if (Count > 0) { + InterlockedPushListSList( + Pool, Head, CONTAINING_RECORD(Tail, SLIST_ENTRY, Next), Count); + Head = NULL; + Tail = &Head; + Count = 0; + } + + Pool = &Packet->Queue->RxPool; + } + + *Tail = (SLIST_ENTRY*)Packet; + Tail = &((SLIST_ENTRY*)Packet)->Next; + Count++; + } + + if (Count > 0) { + InterlockedPushListSList(Pool, Head, CONTAINING_RECORD(Tail, SLIST_ENTRY, Next), Count); + } +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +CXPLAT_SEND_DATA* +CxPlatDpRawTxAlloc( + _In_ CXPLAT_DATAPATH* Datapath, + _In_ CXPLAT_ECN_TYPE ECN, // unused currently + _In_ uint16_t MaxPacketSize, + _Inout_ CXPLAT_ROUTE* Route + ) +{ + QUIC_ADDRESS_FAMILY Family = QuicAddrGetFamily(&Route->RemoteAddress); + XDP_QUEUE* Queue = Route->Queue; + XDP_TX_PACKET* Packet = (XDP_TX_PACKET*)InterlockedPopEntrySList(&Queue->TxPool); + + UNREFERENCED_PARAMETER(ECN); + UNREFERENCED_PARAMETER(Datapath); + + if (Packet) { + HEADER_BACKFILL HeaderBackfill = CxPlatDpRawCalculateHeaderBackFill(Family); // TODO - Cache in Route? + CXPLAT_DBG_ASSERT(MaxPacketSize <= sizeof(Packet->FrameBuffer) - HeaderBackfill.AllLayer); + Packet->Queue = Queue; + Packet->Buffer.Length = MaxPacketSize; + Packet->Buffer.Buffer = &Packet->FrameBuffer[HeaderBackfill.AllLayer]; + } + + return (CXPLAT_SEND_DATA*)Packet; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxFree( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + XDP_TX_PACKET* Packet = (XDP_TX_PACKET*)SendData; + InterlockedPushEntrySList(&Packet->Queue->TxPool, (PSLIST_ENTRY)Packet); +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +CxPlatDpRawTxEnqueue( + _In_ CXPLAT_SEND_DATA* SendData + ) +{ + XDP_TX_PACKET* Packet = (XDP_TX_PACKET*)SendData; + + CxPlatLockAcquire(&Packet->Queue->TxLock); + CxPlatListInsertTail(&Packet->Queue->TxQueue, &Packet->Link); + CxPlatLockRelease(&Packet->Queue->TxLock); +} + +static +void +CxPlatXdpTx( + _In_ XDP_DATAPATH* Xdp, + _In_ uint8_t QueueId, + _In_ XDP_INTERFACE* Interface + ) +{ + uint32_t ProdCount = 0; + uint32_t CompCount = 0; + SLIST_ENTRY* TxCompleteHead = NULL; + SLIST_ENTRY** TxCompleteTail = &TxCompleteHead; + XDP_QUEUE* Queue = &Interface->Queues[QueueId]; + + if (CxPlatListIsEmpty(&Queue->WorkerTxQueue) && + ReadPointerNoFence(&Queue->TxQueue.Flink) != &Queue->TxQueue) { + CxPlatLockAcquire(&Queue->TxLock); + CxPlatListMoveItems(&Queue->TxQueue, &Queue->WorkerTxQueue); + CxPlatLockRelease(&Queue->TxLock); + } + + uint32_t TxIndex; + uint32_t TxAvailable = XskRingProducerReserve(&Queue->TxRing, MAXUINT32, &TxIndex); + while (TxAvailable-- > 0 && !CxPlatListIsEmpty(&Queue->WorkerTxQueue)) { + XSK_BUFFER_DESCRIPTOR* Buffer = XskRingGetElement(&Queue->TxRing, TxIndex++); + CXPLAT_LIST_ENTRY* Entry = CxPlatListRemoveHead(&Queue->WorkerTxQueue); + XDP_TX_PACKET* Packet = CONTAINING_RECORD(Entry, XDP_TX_PACKET, Link); + + Buffer->address = (uint8_t*)Packet - Queue->TxBuffers; + XskDescriptorSetOffset(&Buffer->address, FIELD_OFFSET(XDP_TX_PACKET, FrameBuffer)); + Buffer->length = Packet->Buffer.Length; + ProdCount++; + } + + if (ProdCount > 0) { + XskRingProducerSubmit(&Queue->TxRing, ProdCount); + if (Xdp->TxAlwaysPoke || XskRingProducerNeedPoke(&Queue->TxRing)) { + uint32_t OutFlags; + QUIC_STATUS Status = XskNotifySocket(Queue->TxXsk, XSK_NOTIFY_POKE_TX, 0, &OutFlags); + CXPLAT_DBG_ASSERT(QUIC_SUCCEEDED(Status)); + UNREFERENCED_PARAMETER(Status); + } + } + + uint32_t CompIndex; + uint32_t CompAvailable = + XskRingConsumerReserve(&Queue->TxCompletionRing, MAXUINT32, &CompIndex); + while (CompAvailable-- > 0) { + uint64_t* CompDesc = XskRingGetElement(&Queue->TxCompletionRing, CompIndex++); + XDP_TX_PACKET* Packet = (XDP_TX_PACKET*)(Queue->TxBuffers + *CompDesc); + *TxCompleteTail = (PSLIST_ENTRY)Packet; + TxCompleteTail = &((PSLIST_ENTRY)Packet)->Next; + CompCount++; + } + + if (CompCount > 0) { + XskRingConsumerRelease(&Queue->TxCompletionRing, CompCount); + InterlockedPushListSList( + &Queue->TxPool, TxCompleteHead, CONTAINING_RECORD(TxCompleteTail, SLIST_ENTRY, Next), + CompCount); + } + + if (XskRingError(&Queue->TxRing) && !Queue->Error) { + XSK_ERROR ErrorStatus; + QUIC_STATUS XskStatus; + uint32_t ErrorSize = sizeof(ErrorStatus); + XskStatus = XskGetSockopt(Queue->TxXsk, XSK_SOCKOPT_TX_ERROR, &ErrorStatus, &ErrorSize); + printf("TX ring error: 0x%x\n", SUCCEEDED(XskStatus) ? ErrorStatus : XskStatus); + Queue->Error = TRUE; + } +} + +void +CxPlatDataPathWake( + _In_ void* Context + ) +{ + // No-op - XDP never sleeps! + UNREFERENCED_PARAMETER(Context); +} + +void +CxPlatDataPathRunEC( + _In_ void** Context, + _In_ CXPLAT_THREAD_ID CurThreadId, + _In_ uint32_t WaitTime + ) +{ + XDP_DATAPATH* Xdp = *(XDP_DATAPATH**)Context; + + UNREFERENCED_PARAMETER(CurThreadId); + UNREFERENCED_PARAMETER(WaitTime); + + if (!Xdp->Running) { + *Context = NULL; + CxPlatEventSet(Xdp->CompletionEvent); + return; + } + + CXPLAT_LIST_ENTRY* Entry; + for (Entry = Xdp->Interfaces.Flink; Entry != &Xdp->Interfaces; Entry = Entry->Flink) { + XDP_INTERFACE* Interface = CONTAINING_RECORD(Entry, XDP_INTERFACE, Link); + for (uint8_t QueueId = 0; QueueId < Interface->QueueCount; QueueId++) { + CxPlatXdpRx(Xdp, QueueId, Interface); + CxPlatXdpTx(Xdp, QueueId, Interface); + } + } +} diff --git a/src/platform/hashtable.c b/src/platform/hashtable.c index 45fdcea8a..fac9a62a1 100644 --- a/src/platform/hashtable.c +++ b/src/platform/hashtable.c @@ -365,7 +365,7 @@ Return Value: static void CxPlatPopulateContext( - _In_ CXPLAT_HASHTABLE* HashTable, + _In_ const CXPLAT_HASHTABLE* HashTable, _Out_ CXPLAT_HASHTABLE_LOOKUP_CONTEXT* Context, _In_ uint64_t Signature ) @@ -804,7 +804,7 @@ Arguments: _Must_inspect_result_ CXPLAT_HASHTABLE_ENTRY* CxPlatHashtableLookup( - _In_ CXPLAT_HASHTABLE* HashTable, + _In_ const CXPLAT_HASHTABLE* HashTable, _In_ uint64_t Signature, _Out_opt_ CXPLAT_HASHTABLE_LOOKUP_CONTEXT* Context ) @@ -870,7 +870,7 @@ Return Value: _Must_inspect_result_ CXPLAT_HASHTABLE_ENTRY* CxPlatHashtableLookupNext( - _In_ CXPLAT_HASHTABLE* HashTable, + _In_ const CXPLAT_HASHTABLE* HashTable, _Inout_ CXPLAT_HASHTABLE_LOOKUP_CONTEXT* Context ) /*++ diff --git a/submodules/googletest b/submodules/googletest index 0e402173c..c9461a9b5 160000 --- a/submodules/googletest +++ b/submodules/googletest @@ -1 +1 @@ -Subproject commit 0e402173c97aea7a00749e825b194bfede4f2e45 +Subproject commit c9461a9b55ba954df0489bab6420eb297bed846b