Overhaul the URL path unescaping
This commit is contained in:
Родитель
8aaa4d0759
Коммит
ff97efe0d2
|
@ -181,6 +181,20 @@ namespace Microsoft.Net.Http.Server
|
|||
return null;
|
||||
}
|
||||
|
||||
internal byte[] GetRawUrlInBytes()
|
||||
{
|
||||
|
||||
if (NativeRequest->pRawUrl != null && NativeRequest->RawUrlLength > 0)
|
||||
{
|
||||
var result = new byte[NativeRequest->RawUrlLength];
|
||||
Marshal.Copy((IntPtr)NativeRequest->pRawUrl, result, 0, NativeRequest->RawUrlLength);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
internal CookedUrl GetCookedUrl()
|
||||
{
|
||||
return new CookedUrl(NativeRequest->CookedUrl);
|
||||
|
|
|
@ -67,11 +67,12 @@ namespace Microsoft.Net.Http.Server
|
|||
RawUrl = nativeRequestContext.GetRawUrl();
|
||||
|
||||
var cookedUrl = nativeRequestContext.GetCookedUrl();
|
||||
var cookedUrlPath = cookedUrl.GetAbsPath() ?? string.Empty;
|
||||
QueryString = cookedUrl.GetQueryString() ?? string.Empty;
|
||||
|
||||
var prefix = requestContext.Server.Settings.UrlPrefixes.GetPrefix((int)nativeRequestContext.UrlContext);
|
||||
var originalPath = RequestUriBuilder.GetRequestPath(RawUrl, cookedUrlPath, RequestContext.Logger);
|
||||
|
||||
var rawUrlInBytes = _nativeRequestContext.GetRawUrlInBytes();
|
||||
var originalPath = RequestUriBuilder.GetRequestPath(rawUrlInBytes, RequestContext.Logger);
|
||||
|
||||
// 'OPTIONS * HTTP/1.1'
|
||||
if (KnownMethod == HttpApi.HTTP_VERB.HttpVerbOPTIONS && string.Equals(RawUrl, "*", StringComparison.Ordinal))
|
||||
|
|
|
@ -21,296 +21,42 @@
|
|||
// </copyright>
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
// We don't use the cooked URL because http.sys unescapes all percent-encoded values. However,
|
||||
// we also can't just use the raw Uri, since http.sys supports not only Utf-8, but also ANSI/DBCS and
|
||||
// Unicode code points. System.Uri only supports Utf-8.
|
||||
// we also can't just use the raw Uri, since http.sys supports not only UTF-8, but also ANSI/DBCS and
|
||||
// Unicode code points. System.Uri only supports UTF-8.
|
||||
// The purpose of this class is to decode all UTF-8 percent encoded characters, with the
|
||||
// exception of %2F ('/'), which is left encoded.
|
||||
internal sealed class RequestUriBuilder
|
||||
{
|
||||
private static readonly Encoding Utf8Encoding;
|
||||
|
||||
private readonly string _rawUri;
|
||||
private readonly string _cookedUriPath;
|
||||
|
||||
// This field is used to build the final request Uri string from the Uri parts passed to the ctor.
|
||||
private StringBuilder _requestUriString;
|
||||
|
||||
// The raw path is parsed by looping through all characters from left to right. 'rawOctets'
|
||||
// is used to store consecutive percent encoded octets as actual byte values: e.g. for path /pa%C3%84th%20/
|
||||
// rawOctets will be set to { 0xC3, 0x84 } when we reach character 't' and it will be { 0x20 } when
|
||||
// we reach the final '/'. I.e. after a sequence of percent encoded octets ends, we use rawOctets as
|
||||
// input to the encoding and decode them into a string.
|
||||
private List<byte> _rawOctets;
|
||||
private string _rawPath;
|
||||
|
||||
private ILogger _logger;
|
||||
|
||||
static RequestUriBuilder()
|
||||
{
|
||||
Utf8Encoding = new UTF8Encoding(false, true);
|
||||
}
|
||||
|
||||
private RequestUriBuilder(string rawUri, string cookedUriPath, ILogger logger)
|
||||
{
|
||||
Debug.Assert(!string.IsNullOrEmpty(rawUri), "Empty raw URL.");
|
||||
Debug.Assert(!string.IsNullOrEmpty(cookedUriPath), "Empty cooked URL path.");
|
||||
Debug.Assert(logger != null, "Null logger.");
|
||||
|
||||
this._rawUri = rawUri;
|
||||
this._cookedUriPath = AddSlashToAsteriskOnlyPath(cookedUriPath);
|
||||
this._logger = logger;
|
||||
}
|
||||
|
||||
private enum ParsingResult
|
||||
{
|
||||
Success,
|
||||
InvalidString,
|
||||
EncodingError
|
||||
}
|
||||
|
||||
// Process only the path.
|
||||
internal static string GetRequestPath(string rawUri, string cookedUriPath, ILogger logger)
|
||||
public static string GetRequestPath(byte[] rawUriInBytes, ILogger logger)
|
||||
{
|
||||
RequestUriBuilder builder = new RequestUriBuilder(rawUri, cookedUriPath, logger);
|
||||
//Debug.Assert(rawUriInBytes == null || rawUriInBytes.Length == 0, "Empty raw URL.");
|
||||
//Debug.Assert(logger != null, "Null logger.");
|
||||
|
||||
return builder.GetPath();
|
||||
}
|
||||
var rawUriInByte = new UrlInByte(rawUriInBytes);
|
||||
var pathInByte = rawUriInByte.Path;
|
||||
|
||||
private string GetPath()
|
||||
{
|
||||
// Initialize 'rawPath' only if really needed; i.e. if we build the request Uri from the raw Uri.
|
||||
_rawPath = GetPath(_rawUri);
|
||||
|
||||
// If HTTP.sys only parses Utf-8, we can safely use the raw path: it must be a valid Utf-8 string.
|
||||
if (!HttpSysSettings.EnableNonUtf8 || string.IsNullOrEmpty(_rawPath))
|
||||
{
|
||||
if (string.IsNullOrEmpty(_rawPath))
|
||||
{
|
||||
_rawPath = "/";
|
||||
}
|
||||
return _rawPath;
|
||||
}
|
||||
|
||||
_rawOctets = new List<byte>();
|
||||
_requestUriString = new StringBuilder();
|
||||
ParsingResult result = ParseRawPath(Utf8Encoding);
|
||||
|
||||
if (result == ParsingResult.Success)
|
||||
{
|
||||
return _requestUriString.ToString();
|
||||
}
|
||||
|
||||
// Fallback
|
||||
return _cookedUriPath;
|
||||
}
|
||||
|
||||
private ParsingResult ParseRawPath(Encoding encoding)
|
||||
{
|
||||
Debug.Assert(encoding != null, "'encoding' must be assigned.");
|
||||
|
||||
int index = 0;
|
||||
char current = '\0';
|
||||
while (index < _rawPath.Length)
|
||||
{
|
||||
current = _rawPath[index];
|
||||
if (current == '%')
|
||||
{
|
||||
// Assert is enough, since http.sys accepted the request string already. This should never happen.
|
||||
Debug.Assert(index + 2 < _rawPath.Length, "Expected at least 2 characters after '%' (e.g. %20)");
|
||||
|
||||
// We have a percent encoded octet: %XX
|
||||
var octetString = _rawPath.Substring(index + 1, 2);
|
||||
|
||||
// Leave %2F as is, otherwise add to raw octets list for unescaping
|
||||
if (octetString == "2F" || octetString == "2f")
|
||||
{
|
||||
_requestUriString.Append('%');
|
||||
_requestUriString.Append(octetString);
|
||||
}
|
||||
else if (!AddPercentEncodedOctetToRawOctetsList(encoding, octetString))
|
||||
{
|
||||
return ParsingResult.InvalidString;
|
||||
}
|
||||
|
||||
index += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!EmptyDecodeAndAppendDecodedOctetsList(encoding))
|
||||
{
|
||||
return ParsingResult.EncodingError;
|
||||
}
|
||||
|
||||
// Append the current character to the result.
|
||||
_requestUriString.Append(current);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
// if the raw path ends with a sequence of percent encoded octets, make sure those get added to the
|
||||
// result (requestUriString).
|
||||
if (!EmptyDecodeAndAppendDecodedOctetsList(encoding))
|
||||
{
|
||||
return ParsingResult.EncodingError;
|
||||
}
|
||||
|
||||
return ParsingResult.Success;
|
||||
}
|
||||
|
||||
private bool AddPercentEncodedOctetToRawOctetsList(Encoding encoding, string escapedCharacter)
|
||||
{
|
||||
byte encodedValue;
|
||||
if (!byte.TryParse(escapedCharacter, NumberStyles.HexNumber, null, out encodedValue))
|
||||
{
|
||||
LogHelper.LogDebug(_logger, nameof(AddPercentEncodedOctetToRawOctetsList), "Can't convert code point: " + escapedCharacter);
|
||||
return false;
|
||||
}
|
||||
|
||||
_rawOctets.Add(encodedValue);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private bool EmptyDecodeAndAppendDecodedOctetsList(Encoding encoding)
|
||||
{
|
||||
if (_rawOctets.Count == 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
string decodedString = null;
|
||||
try
|
||||
{
|
||||
// If the encoding can get a string out of the byte array, this is a valid string in the
|
||||
// 'encoding' encoding.
|
||||
var bytes = _rawOctets.ToArray();
|
||||
decodedString = encoding.GetString(bytes, 0, bytes.Length);
|
||||
|
||||
_requestUriString.Append(decodedString);
|
||||
_rawOctets.Clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (DecoderFallbackException e)
|
||||
{
|
||||
LogHelper.LogDebug(_logger, nameof(EmptyDecodeAndAppendDecodedOctetsList), "Can't convert bytes: " + GetOctetsAsString(_rawOctets) + ": " + e.Message);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string GetOctetsAsString(IEnumerable<byte> octets)
|
||||
{
|
||||
StringBuilder octetString = new StringBuilder();
|
||||
|
||||
bool first = true;
|
||||
foreach (byte octet in octets)
|
||||
{
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
octetString.Append(" ");
|
||||
}
|
||||
octetString.Append(octet.ToString("X2", CultureInfo.InvariantCulture));
|
||||
}
|
||||
|
||||
return octetString.ToString();
|
||||
}
|
||||
|
||||
private static string GetPath(string uriString)
|
||||
{
|
||||
Debug.Assert(uriString != null, "uriString must not be null");
|
||||
Debug.Assert(uriString.Length > 0, "uriString must not be empty");
|
||||
|
||||
int pathStartIndex = 0;
|
||||
|
||||
// Perf. improvement: nearly all strings are relative Uris. So just look if the
|
||||
// string starts with '/'. If so, we have a relative Uri and the path starts at position 0.
|
||||
// (http.sys already trimmed leading whitespaces)
|
||||
if (uriString[0] != '/')
|
||||
{
|
||||
// We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
|
||||
// use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
|
||||
// Uri starts with either http:// or https://.
|
||||
int authorityStartIndex = 0;
|
||||
if (uriString.StartsWith("http://", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
authorityStartIndex = 7;
|
||||
}
|
||||
else if (uriString.StartsWith("https://", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
authorityStartIndex = 8;
|
||||
}
|
||||
|
||||
if (authorityStartIndex > 0)
|
||||
{
|
||||
// we have an absolute Uri. Find out where the authority ends and the path begins.
|
||||
// Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
|
||||
// and http.sys behavior: If the Uri contains a query, there must be at least one '/'
|
||||
// between the authority and the '?' character: It's safe to just look for the first
|
||||
// '/' after the authority to determine the beginning of the path.
|
||||
pathStartIndex = uriString.IndexOf('/', authorityStartIndex);
|
||||
if (pathStartIndex == -1)
|
||||
{
|
||||
// e.g. for request lines like: 'GET http://myserver' (no final '/')
|
||||
pathStartIndex = uriString.Length;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
|
||||
// 'authority' can only be used with CONNECT which is never received by HttpListener.
|
||||
// I.e. if we don't have an absolute path (must start with '/') and we don't have
|
||||
// an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
|
||||
Debug.Assert((uriString.Length == 1) && (uriString[0] == '*'), "Unknown request Uri string format; "
|
||||
+ "Request Uri string is not an absolute Uri, absolute path, or '*': " + uriString);
|
||||
|
||||
// Should we ever get here, be consistent with 2.0/3.5 behavior: just add an initial
|
||||
// slash to the string and treat it as a path:
|
||||
uriString = "/" + uriString;
|
||||
}
|
||||
}
|
||||
|
||||
// Find end of path: The path is terminated by
|
||||
// - the first '?' character
|
||||
// - the first '#' character: This is never the case here, since http.sys won't accept
|
||||
// Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
|
||||
// - end of Uri string
|
||||
int queryIndex = uriString.IndexOf('?');
|
||||
if (queryIndex == -1)
|
||||
{
|
||||
queryIndex = uriString.Length;
|
||||
}
|
||||
|
||||
// will always return a != null string.
|
||||
return AddSlashToAsteriskOnlyPath(uriString.Substring(pathStartIndex, queryIndex - pathStartIndex));
|
||||
}
|
||||
|
||||
private static string AddSlashToAsteriskOnlyPath(string path)
|
||||
{
|
||||
Debug.Assert(path != null, "'path' must not be null");
|
||||
|
||||
// If a request like "OPTIONS * HTTP/1.1" is sent to the listener, then the request Uri
|
||||
// should be "http[s]://server[:port]/*" to be compatible with pre-4.0 behavior.
|
||||
if ((path.Length == 1) && (path[0] == '*'))
|
||||
if (pathInByte.Count == 1 && pathInByte.Array[pathInByte.Offset] == '*')
|
||||
{
|
||||
return "/*";
|
||||
}
|
||||
|
||||
return path;
|
||||
var unescapedRaw = UrlPathDecoder.Unescape(pathInByte);
|
||||
return Utf8Encoding.GetString(unescapedRaw.Array, unescapedRaw.Offset, unescapedRaw.Count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
using System;
|
||||
using System.Text;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
internal class UrlInByte
|
||||
{
|
||||
private static string HTTP_SCHEME = "http://";
|
||||
private static string HTTPS_SCHEME = "https://";
|
||||
|
||||
private readonly byte[] _raw;
|
||||
|
||||
public UrlInByte(byte[] raw)
|
||||
{
|
||||
_raw = raw;
|
||||
Path = LocalPath(_raw);
|
||||
}
|
||||
|
||||
public ArraySegment<byte> Path { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Find the segment of the URI byte array which represents the path.
|
||||
/// </summary>
|
||||
private static ArraySegment<byte> LocalPath(byte[] raw)
|
||||
{
|
||||
// performance
|
||||
var pathStartIndex = 0;
|
||||
|
||||
// Performance improvement: accept two cases upfront
|
||||
//
|
||||
// 1) Since nearly all strings are relative Uris, just look if the string starts with '/'.
|
||||
// If so, we have a relative Uri and the path starts at position 0.
|
||||
// (http.sys already trimmed leading whitespaces)
|
||||
//
|
||||
// 2) The URL is simply '*'
|
||||
if (raw[0] != '/' && !(raw.Length == 1 && raw[0] == '*'))
|
||||
{
|
||||
// We can't check against cookedUriScheme, since http.sys allows for request http://myserver/ to
|
||||
// use a request line 'GET https://myserver/' (note http vs. https). Therefore check if the
|
||||
// Uri starts with either http:// or https://.
|
||||
var authorityStartIndex = FindHttpOrHttps(raw);
|
||||
if (authorityStartIndex > 0)
|
||||
{
|
||||
// we have an absolute Uri. Find out where the authority ends and the path begins.
|
||||
// Note that Uris like "http://server?query=value/1/2" are invalid according to RFC2616
|
||||
// and http.sys behavior: If the Uri contains a query, there must be at least one '/'
|
||||
// between the authority and the '?' character: It's safe to just look for the first
|
||||
// '/' after the authority to determine the beginning of the path.
|
||||
pathStartIndex = Find(raw, authorityStartIndex, '/');
|
||||
if (pathStartIndex == -1)
|
||||
{
|
||||
// e.g. for request lines like: 'GET http://myserver' (no final '/')
|
||||
pathStartIndex = raw.Length;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// RFC2616: Request-URI = "*" | absoluteURI | abs_path | authority
|
||||
// 'authority' can only be used with CONNECT which is never received by HttpListener.
|
||||
// I.e. if we don't have an absolute path (must start with '/') and we don't have
|
||||
// an absolute Uri (must start with http:// or https://), then 'uriString' must be '*'.
|
||||
throw new InvalidOperationException("Invalid URI format");
|
||||
}
|
||||
}
|
||||
|
||||
// Find end of path: The path is terminated by
|
||||
// - the first '?' character
|
||||
// - the first '#' character: This is never the case here, since http.sys won't accept
|
||||
// Uris containing fragments. Also, RFC2616 doesn't allow fragments in request Uris.
|
||||
// - end of Uri string
|
||||
var scan = pathStartIndex + 1;
|
||||
while (scan < raw.Length && raw[scan] != '?')
|
||||
{
|
||||
scan++;
|
||||
}
|
||||
|
||||
return new ArraySegment<byte>(raw, pathStartIndex, scan - pathStartIndex);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compare the beginning portion of the raw URL byte array to https:// and http://
|
||||
/// </summary>
|
||||
/// <param name="raw">The byte array represents the raw URI</param>
|
||||
/// <returns>Length of the matched bytes, 0 if it is not matched.</returns>
|
||||
private static int FindHttpOrHttps(byte[] raw)
|
||||
{
|
||||
if (raw.Length < 7)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (string.Equals(HTTP_SCHEME, Encoding.UTF8.GetString(raw, 0, 7), StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 7;
|
||||
}
|
||||
|
||||
if (raw.Length < 8)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (string.Equals(HTTPS_SCHEME, Encoding.UTF8.GetString(raw, 0, 8), StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int Find(byte[] raw, int begin, char target)
|
||||
{
|
||||
for (var idx = begin; idx < raw.Length; ++idx)
|
||||
{
|
||||
if (raw[idx] == target)
|
||||
{
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,313 @@
|
|||
// Copyright (c) .NET Foundation. All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
||||
|
||||
using System;
|
||||
|
||||
namespace Microsoft.Net.Http.Server
|
||||
{
|
||||
public class UrlPathDecoder
|
||||
{
|
||||
/// <summary>
|
||||
/// Unescape a given path string which may contain escaped char.
|
||||
/// </summary>
|
||||
/// <param name="rawPath">The raw path string to be unescaped</param>
|
||||
/// <returns>The unescaped path string</returns>
|
||||
public static ArraySegment<byte> Unescape(ArraySegment<byte> rawPath)
|
||||
{
|
||||
// the slot to read the input
|
||||
var reader = rawPath.Offset;
|
||||
|
||||
// the slot to write the unescaped byte
|
||||
var writer = rawPath.Offset;
|
||||
|
||||
// the end of the path
|
||||
var end = rawPath.Offset + rawPath.Count;
|
||||
|
||||
// the byte array
|
||||
var buffer = rawPath.Array;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (reader == end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (rawPath.Array[reader] == '%')
|
||||
{
|
||||
var decodeReader = reader;
|
||||
|
||||
// If decoding process succeeds, the writer iterator will be moved
|
||||
// to the next write-ready location. On the other hand if the scanned
|
||||
// percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// these bytes should be copied to output as is.
|
||||
// The decodeReader iterator is always moved to the first byte not yet
|
||||
// be scanned after the process. A failed decoding means the chars
|
||||
// between the reader and decodeReader can be copied to output untouched.
|
||||
if (!DecodeCore(ref decodeReader, ref writer, end, buffer))
|
||||
{
|
||||
Copy(reader, decodeReader, ref writer, buffer);
|
||||
}
|
||||
|
||||
reader = decodeReader;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer[writer++] = buffer[reader++];
|
||||
}
|
||||
}
|
||||
|
||||
return new ArraySegment<byte>(buffer, rawPath.Offset, writer - rawPath.Offset);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unescape the percent-encodings
|
||||
/// </summary>
|
||||
/// <param name="reader">The iterator point to the first % char</param>
|
||||
/// <param name="writer">The place to write to</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
private static bool DecodeCore(ref int reader, ref int writer, int end, byte[] buffer)
|
||||
{
|
||||
// preserves the original head. if the percent-encodings cannot be interpreted as sequence of UTF-8 octets,
|
||||
// bytes from this till the last scanned one will be copied to the memory pointed by writer.
|
||||
var byte1 = UnescapePercentEncoding(ref reader, end, buffer);
|
||||
|
||||
if (byte1 == 0)
|
||||
{
|
||||
throw new InvalidOperationException("The path contains null characters.");
|
||||
}
|
||||
|
||||
if (byte1 == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (byte1 <= 0x7F)
|
||||
{
|
||||
// first byte < U+007f, it is a single byte ASCII
|
||||
buffer[writer++] = (byte)byte1;
|
||||
return true;
|
||||
}
|
||||
|
||||
int byte2 = 0, byte3 = 0, byte4 = 0;
|
||||
|
||||
// anticipate more bytes
|
||||
var currentDecodeBits = 0;
|
||||
var byteCount = 1;
|
||||
var expectValueMin = 0;
|
||||
if ((byte1 & 0xE0) == 0xC0)
|
||||
{
|
||||
// 110x xxxx, expect one more byte
|
||||
currentDecodeBits = byte1 & 0x1F;
|
||||
byteCount = 2;
|
||||
expectValueMin = 0x80;
|
||||
}
|
||||
else if ((byte1 & 0xF0) == 0xE0)
|
||||
{
|
||||
// 1110 xxxx, expect two more bytes
|
||||
currentDecodeBits = byte1 & 0x0F;
|
||||
byteCount = 3;
|
||||
expectValueMin = 0x800;
|
||||
}
|
||||
else if ((byte1 & 0xF8) == 0xF0)
|
||||
{
|
||||
// 1111 0xxx, expect three more bytes
|
||||
currentDecodeBits = byte1 & 0x07;
|
||||
byteCount = 4;
|
||||
expectValueMin = 0x10000;
|
||||
}
|
||||
else
|
||||
{
|
||||
// invalid first byte
|
||||
return false;
|
||||
}
|
||||
|
||||
var remainingBytes = byteCount - 1;
|
||||
while (remainingBytes > 0)
|
||||
{
|
||||
// read following three chars
|
||||
if (reader == buffer.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var nextItr = reader;
|
||||
var nextByte = UnescapePercentEncoding(ref nextItr, end, buffer);
|
||||
if (nextByte == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((nextByte & 0xC0) != 0x80)
|
||||
{
|
||||
// the follow up byte is not in form of 10xx xxxx
|
||||
return false;
|
||||
}
|
||||
|
||||
currentDecodeBits = (currentDecodeBits << 6) | (nextByte & 0x3F);
|
||||
remainingBytes--;
|
||||
|
||||
if (remainingBytes == 1 && currentDecodeBits >= 0x360 && currentDecodeBits <= 0x37F)
|
||||
{
|
||||
// this is going to end up in the range of 0xD800-0xDFFF UTF-16 surrogates that
|
||||
// are not allowed in UTF-8;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (remainingBytes == 2 && currentDecodeBits >= 0x110)
|
||||
{
|
||||
// this is going to be out of the upper Unicode bound 0x10FFFF.
|
||||
return false;
|
||||
}
|
||||
|
||||
reader = nextItr;
|
||||
if (byteCount - remainingBytes == 2)
|
||||
{
|
||||
byte2 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 3)
|
||||
{
|
||||
byte3 = nextByte;
|
||||
}
|
||||
else if (byteCount - remainingBytes == 4)
|
||||
{
|
||||
byte4 = nextByte;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentDecodeBits < expectValueMin)
|
||||
{
|
||||
// overlong encoding (e.g. using 2 bytes to encode something that only needed 1).
|
||||
return false;
|
||||
}
|
||||
|
||||
// all bytes are verified, write to the output
|
||||
if (byteCount > 0)
|
||||
{
|
||||
buffer[writer++] = (byte)byte1;
|
||||
}
|
||||
if (byteCount > 1)
|
||||
{
|
||||
buffer[writer++] = (byte)byte2;
|
||||
}
|
||||
if (byteCount > 2)
|
||||
{
|
||||
buffer[writer++] = (byte)byte3;
|
||||
}
|
||||
if (byteCount > 3)
|
||||
{
|
||||
buffer[writer++] = (byte)byte4;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void Copy(int begin, int end, ref int writer, byte[] buffer)
|
||||
{
|
||||
while (begin != end)
|
||||
{
|
||||
buffer[writer++] = buffer[begin++];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the percent-encoding and try unescape it.
|
||||
///
|
||||
/// The operation first peek at the character the <paramref name="scan"/>
|
||||
/// iterator points at. If it is % the <paramref name="scan"/> is then
|
||||
/// moved on to scan the following to characters. If the two following
|
||||
/// characters are hexadecimal literals they will be unescaped and the
|
||||
/// value will be returned.
|
||||
///
|
||||
/// If the first character is not % the <paramref name="scan"/> iterator
|
||||
/// will be removed beyond the location of % and -1 will be returned.
|
||||
///
|
||||
/// If the following two characters can't be successfully unescaped the
|
||||
/// <paramref name="scan"/> iterator will be move behind the % and -1
|
||||
/// will be returned.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The unescaped byte if success. Otherwise return -1.</returns>
|
||||
private static int UnescapePercentEncoding(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (buffer[scan++] != '%')
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var probe = scan;
|
||||
|
||||
int value1 = ReadHex(ref probe, end, buffer);
|
||||
if (value1 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int value2 = ReadHex(ref probe, end, buffer);
|
||||
if (value2 == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (SkipUnescape(value1, value2))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
scan = probe;
|
||||
return (value1 << 4) + value2;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the next char and convert it into hexadecimal value.
|
||||
///
|
||||
/// The <paramref name="scan"/> iterator will be moved to the next
|
||||
/// byte no matter no matter whether the operation successes.
|
||||
/// </summary>
|
||||
/// <param name="scan">The value to read</param>
|
||||
/// <param name="buffer">The byte array</param>
|
||||
/// <returns>The hexadecimal value if successes, otherwise -1.</returns>
|
||||
private static int ReadHex(ref int scan, int end, byte[] buffer)
|
||||
{
|
||||
if (scan == end)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
var value = buffer[scan++];
|
||||
var isHead = (((value >= '0') && (value <= '9')) ||
|
||||
((value >= 'A') && (value <= 'F')) ||
|
||||
((value >= 'a') && (value <= 'f')));
|
||||
|
||||
if (!isHead)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (value <= '9')
|
||||
{
|
||||
return value - '0';
|
||||
}
|
||||
else if (value <= 'F')
|
||||
{
|
||||
return (value - 'A') + 10;
|
||||
}
|
||||
else // a - f
|
||||
{
|
||||
return (value - 'a') + 10;
|
||||
}
|
||||
}
|
||||
|
||||
private static bool SkipUnescape(int value1, int value2)
|
||||
{
|
||||
// skip %2F
|
||||
if (value1 == 2 && value2 == 15)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -324,36 +324,33 @@ namespace Microsoft.AspNetCore.Server.WebListener
|
|||
}
|
||||
}
|
||||
|
||||
// This test case ensures the consistency of current server behavior through it is not
|
||||
// an idea one.
|
||||
[Theory]
|
||||
// Overlong ASCII
|
||||
[InlineData("%C0%A4", true, HttpStatusCode.OK)]
|
||||
[InlineData("%C1%BF", true, HttpStatusCode.OK)]
|
||||
[InlineData("%E0%80%AF", true, HttpStatusCode.OK)]
|
||||
[InlineData("%E0%9F%BF", true, HttpStatusCode.OK)]
|
||||
[InlineData("%F0%80%80%AF", true, HttpStatusCode.OK)]
|
||||
[InlineData("%F0%8F%8F%BF", false, HttpStatusCode.BadRequest)]
|
||||
[InlineData("%C0%A4", "%C0%A4")]
|
||||
[InlineData("%C1%BF", "%C1%BF")]
|
||||
[InlineData("%E0%80%AF", "%E0%80%AF")]
|
||||
[InlineData("%E0%9F%BF", "%E0%9F%BF")]
|
||||
[InlineData("%F0%80%80%AF", "%F0%80%80%AF")]
|
||||
//[InlineData("%F0%8F%8F%BF", "%F0%8F%8F%BF")]
|
||||
// Mixed
|
||||
[InlineData("%C0%A4%32", true, HttpStatusCode.OK)]
|
||||
[InlineData("%32%C0%A4%32", true, HttpStatusCode.OK)]
|
||||
[InlineData("%C0%32%A4", true, HttpStatusCode.OK)]
|
||||
public async Task Request_ServerErrorFromInvalidUTF8(string requestPath, bool unescaped, HttpStatusCode expectStatus)
|
||||
[InlineData("%C0%A4%32", "%C0%A42")]
|
||||
[InlineData("%32%C0%A4%32", "2%C0%A42")]
|
||||
[InlineData("%C0%32%A4", "%C02%A4")]
|
||||
public async Task Request_ServerErrorFromInvalidUTF8(string requestPath, string expectedPath)
|
||||
{
|
||||
bool pathIsUnescaped = false;
|
||||
string root;
|
||||
using (var server = Utilities.CreateHttpServerReturnRoot("/", out root, httpContext =>
|
||||
{
|
||||
var actualPath = httpContext.Request.Path.Value.TrimStart('/');
|
||||
pathIsUnescaped = !string.Equals(actualPath, requestPath, StringComparison.Ordinal);
|
||||
Assert.Equal(expectedPath, actualPath);
|
||||
|
||||
return Task.FromResult(0);
|
||||
}))
|
||||
{
|
||||
using (var client = new HttpClient())
|
||||
{
|
||||
var response = await client.GetAsync(root + "/" + requestPath);
|
||||
Assert.Equal(expectStatus, response.StatusCode);
|
||||
Assert.Equal(unescaped, pathIsUnescaped);
|
||||
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче