From 7fdbbc3e3cd82c50b169333b55ec166624e309f2 Mon Sep 17 00:00:00 2001 From: Neil Deakin Date: Thu, 5 May 2022 19:46:45 +0000 Subject: [PATCH] Bug 1746052, improve file sanitization by filtering and collpasing more whitespace characters and truncating files to 255 bytes, r=jfkthame Differential Revision: https://phabricator.services.mozilla.com/D138736 --- netwerk/mime/nsIMIMEService.idl | 2 +- .../exthandler/nsExternalHelperAppService.cpp | 180 ++++++++++++++---- 2 files changed, 146 insertions(+), 36 deletions(-) diff --git a/netwerk/mime/nsIMIMEService.idl b/netwerk/mime/nsIMIMEService.idl index f931074cf1c3..712073446dc9 100644 --- a/netwerk/mime/nsIMIMEService.idl +++ b/netwerk/mime/nsIMIMEService.idl @@ -153,7 +153,7 @@ interface nsIMIMEService : nsISupports { * - Whitespace and periods are removed from the beginning and end. * - Unless VALIDATE_DONT_COLLAPSE_WHITESPACE is specified, multiple * consecutive whitespace characters are collapsed to a single space - * character (' '). + * character, either ' ' or an ideographic space 0x3000 if present. * - Unless VALIDATE_DONT_TRUNCATE is specified, the filename is truncated * to a maximum length, preserving the extension if possible. * diff --git a/uriloader/exthandler/nsExternalHelperAppService.cpp b/uriloader/exthandler/nsExternalHelperAppService.cpp index 0b691f52faab..0afa589780dd 100644 --- a/uriloader/exthandler/nsExternalHelperAppService.cpp +++ b/uriloader/exthandler/nsExternalHelperAppService.cpp @@ -51,6 +51,8 @@ #include "nsOSHelperAppService.h" #include "nsOSHelperAppServiceChild.h" #include "nsContentSecurityUtils.h" +#include "nsUTF8Utils.h" +#include "nsUnicodeProperties.h" // used to access our datastore of user-configured helper applications #include "nsIHandlerService.h" @@ -3443,58 +3445,166 @@ void nsExternalHelperAppService::SanitizeFileName(nsAString& aFileName, uint32_t aFlags) { nsAutoString fileName(aFileName); + // Replace characters fileName.ReplaceChar(KNOWN_PATH_SEPARATORS, '_'); fileName.ReplaceChar(FILE_ILLEGAL_CHARACTERS, ' '); fileName.StripChar(char16_t(0)); - // Remove unsafe bidi characters which might have spoofing implications (bug - // 511521). - const char16_t unsafeBidiCharacters[] = { - char16_t(0x061c), // Arabic Letter Mark - char16_t(0x200e), // Left-to-Right Mark - char16_t(0x200f), // Right-to-Left Mark - char16_t(0x202a), // Left-to-Right Embedding - char16_t(0x202b), // Right-to-Left Embedding - char16_t(0x202c), // Pop Directional Formatting - char16_t(0x202d), // Left-to-Right Override - char16_t(0x202e), // Right-to-Left Override - char16_t(0x2066), // Left-to-Right Isolate - char16_t(0x2067), // Right-to-Left Isolate - char16_t(0x2068), // First Strong Isolate - char16_t(0x2069), // Pop Directional Isolate - char16_t(0)}; - fileName.ReplaceChar(unsafeBidiCharacters, '_'); + const char16_t *startStr, *endStr; + fileName.BeginReading(startStr); + fileName.EndReading(endStr); - // Trim whitespace, periods and vowel separators from the beginning and - // end of the filename. Periods are removed to avoid creating hidden files. - fileName.Trim(" .\f\n\r\t\v", true, true); + // True if multiple consecutive whitespace characters should + // be replaced by single space ' '. + bool collapseWhitespace = !(aFlags & VALIDATE_DONT_COLLAPSE_WHITESPACE); - // Collapse duplicate whitespace. - if (!(aFlags & VALIDATE_DONT_COLLAPSE_WHITESPACE)) { - fileName.CompressWhitespace(); + // The maximum filename length differs based on the platform: + // Windows (FAT/NTFS) stores filenames as a maximum of 255 UTF-16 code units. + // Mac (APFS) stores filenames with a maximum 255 of UTF-8 code units. + // Linux (ext3/ext4...) stores filenames with a maximum 255 bytes. + // So here we just use the maximum of 255 bytes. + uint32_t maxBytes = 0; // 0 means don't truncate at a maximum size. + if (!(aFlags & VALIDATE_DONT_TRUNCATE)) { + maxBytes = 255 - aExtension.Length() - 1; + } + + // True if the last character added was whitespace. + bool lastWasWhitespace = false; + + // True if the filename is too long and must be truncated. + bool longFileName = false; + + // Length of the filename that fits into the maximum size excluding the + // extension and period. + int32_t longFileNameEnd = -1; + + // Index of the last character added that was not a character that can be + // trimmed off of the end of the string. Trimmable characters are whitespace, + // periods and the vowel separator u'\u180e'. If all the characters after this + // point are trimmable characters, truncate the string to this point after + // iterating over the filename. + int32_t lastNonTrimmable = -1; + + // The number of bytes that the string would occupy if encoded in UTF-8. + uint32_t bytesLength = 0; + + // This algorithm iterates over each character in the string and appends it + // or a replacement character if needed to outFileName. + nsAutoString outFileName; + while (startStr < endStr) { + bool err = false; + char32_t nextChar = UTF16CharEnumerator::NextChar(&startStr, endStr, &err); + if (err) { + break; + } + + if (nextChar == char16_t(0)) { + continue; + } + + auto unicodeCategory = unicode::GetGeneralCategory(nextChar); + if (unicodeCategory == HB_UNICODE_GENERAL_CATEGORY_CONTROL || + unicodeCategory == HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR || + unicodeCategory == HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR) { + // Skip over any control characters and separators. + continue; + } + + if (maxBytes) { + // UTF16CharEnumerator already converts surrogate pairs, so we can use + // a simple computation of byte length here. + bytesLength += nextChar < 0x80 ? 1 + : nextChar < 0x800 ? 2 + : nextChar < 0x10000 ? 3 + : 4; + if (bytesLength > maxBytes) { + if (longFileNameEnd == -1) { + longFileNameEnd = int32_t(outFileName.Length()); + } + if (bytesLength > 255) { + longFileName = true; + break; + } + } + } + + if (unicodeCategory == HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR || + nextChar == u'\ufeff') { + // Trim out any whitespace characters at the beginning of the filename, + // and only add whitespace in the middle of the filename if the last + // character was not whitespace or if we are not collapsing whitespace. + if (!outFileName.IsEmpty() && + (!lastWasWhitespace || !collapseWhitespace)) { + // Allow the ideographic space if it is present, otherwise replace with + // ' '. + if (nextChar != u'\u3000') { + nextChar = ' '; + } + lastWasWhitespace = true; + } else { + lastWasWhitespace = true; + continue; + } + } else { + lastWasWhitespace = false; + if (nextChar == '.' || nextChar == u'\u180e') { + // Don't add any periods or vowel separators at the beginning of the + // string. Note also that lastNonTrimmable is not adjusted in this + // case, because periods and vowel separators are included in the + // set of characters to trim at the end of the filename. + if (outFileName.IsEmpty()) { + continue; + } + } else { + if (unicodeCategory == HB_UNICODE_GENERAL_CATEGORY_FORMAT) { + // Replace formatting characters with an underscore. + nextChar = '_'; + } + + lastNonTrimmable = int32_t(outFileName.Length()) + 1; + } + } + + AppendUCS4ToUTF16(nextChar, outFileName); + } + + // There are two ways in which the filename should be truncated: + // - If the filename was too long, truncate the name at the length + // of the filename minus the space needed for the extension and period. + // This position is indicated by longFileNameEnd. + // - lastNonTrimmable will indicate the last character that was not + // whitespace, a period, or a vowel separator at the end of the + // the string, so the string should be truncated there as well. + // If both apply, use the earliest position. + if (lastNonTrimmable >= 0) { + outFileName.Truncate(longFileName + ? std::min(longFileNameEnd, lastNonTrimmable) + : lastNonTrimmable); } // If the filename is too long, truncate it, but preserve the desired // extension. - if (!(aFlags & VALIDATE_DONT_TRUNCATE) && - fileName.Length() > kDefaultMaxFileNameLength) { + if (!maxBytes && !(aFlags & VALIDATE_DONT_TRUNCATE) && + outFileName.Length() > kDefaultMaxFileNameLength) { // This is extremely unlikely, but if the extension is larger than the // maximum size, just get rid of it. if (aExtension.Length() >= kDefaultMaxFileNameLength) { - fileName.Truncate(kDefaultMaxFileNameLength - 1); + outFileName.Truncate(kDefaultMaxFileNameLength - 1); } else { - fileName.Truncate(kDefaultMaxFileNameLength - aExtension.Length() - 1); - if (!fileName.IsEmpty()) { - if (fileName.Last() != '.') { - fileName.AppendLiteral("."); - } - - fileName.Append(NS_ConvertUTF8toUTF16(aExtension)); - } + outFileName.Truncate(kDefaultMaxFileNameLength - aExtension.Length() - 1); + longFileName = true; } } - aFileName = fileName; + if (longFileName && !outFileName.IsEmpty()) { + if (outFileName.Last() != '.') { + outFileName.AppendLiteral("."); + } + + outFileName.Append(NS_ConvertUTF8toUTF16(aExtension)); + } + + aFileName = outFileName; } nsExternalHelperAppService::ModifyExtensionType