From c76bfd0158a17aa0c1ed33af772dc42d3e6c4255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Bargull?= Date: Thu, 13 Sep 2018 01:34:06 -0700 Subject: [PATCH] Bug 1490609: Add JS_EncodeStringToASCII to CharacterEncoding.h. r=Waldo --- js/public/CharacterEncoding.h | 19 +++++++++++ js/src/builtin/TestingFunctions.cpp | 45 +++++++++++++------------ js/src/builtin/intl/CommonFunctions.cpp | 29 +++++++--------- js/src/jsapi.cpp | 9 +++++ js/src/jsnum.cpp | 2 +- js/src/proxy/ScriptedProxyHandler.cpp | 4 +-- js/src/vm/Debugger.cpp | 12 ++++++- js/src/vm/StringType.cpp | 36 +++++++++++++++++--- js/src/vm/StringType.h | 14 ++++++++ 9 files changed, 123 insertions(+), 47 deletions(-) diff --git a/js/public/CharacterEncoding.h b/js/public/CharacterEncoding.h index a1e006fc6228..7861e9f63fc1 100644 --- a/js/public/CharacterEncoding.h +++ b/js/public/CharacterEncoding.h @@ -368,4 +368,23 @@ JS_EncodeStringToLatin1(JSContext* cx, JSString* str); extern JS_PUBLIC_API(JS::UniqueChars) JS_EncodeStringToUTF8(JSContext* cx, JS::Handle str); +/** + * DEPRECATED + * + * Same behavior as JS_EncodeStringToLatin1(), but encode into an ASCII string. + * + * This function asserts in debug mode that the input string contains only + * ASCII characters. + * + * The returned string is also subject to misinterpretation if |str| contains + * any nulls (which are faithfully transcribed into the returned string, but + * which will implicitly truncate the string if it's passed to functions that + * expect null-terminated strings). + * + * Avoid using this function if possible, because we'll remove it once we can + * devise a better API for the task. + */ +extern JS_PUBLIC_API(JS::UniqueChars) +JS_EncodeStringToASCII(JSContext* cx, JSString* str); + #endif /* js_CharacterEncoding_h */ diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp index f9e81cf13c88..0fedba40302c 100644 --- a/js/src/builtin/TestingFunctions.cpp +++ b/js/src/builtin/TestingFunctions.cpp @@ -5016,7 +5016,17 @@ SetTimeZone(JSContext* cx, unsigned argc, Value* vp) }; if (args[0].isString() && !args[0].toString()->empty()) { - UniqueChars timeZone = JS_EncodeStringToLatin1(cx, args[0].toString()); + RootedLinearString str(cx, args[0].toString()->ensureLinear(cx)); + if (!str) { + return false; + } + + if (!StringIsAscii(str)) { + ReportUsageErrorASCII(cx, callee, "First argument contains non-ASCII characters"); + return false; + } + + UniqueChars timeZone = JS_EncodeStringToASCII(cx, str); if (!timeZone) { return false; } @@ -5081,38 +5091,31 @@ SetDefaultLocale(JSContext* cx, unsigned argc, Value* vp) } if (args[0].isString() && !args[0].toString()->empty()) { - auto containsOnlyValidBCP47Characters = [](auto* chars, size_t length) { - return mozilla::IsAsciiAlpha(chars[0]) && - std::all_of(chars, chars + length, [](auto c) { - return mozilla::IsAsciiAlphanumeric(c) || c == '-'; - }); - }; - RootedLinearString str(cx, args[0].toString()->ensureLinear(cx)); if (!str) { return false; } - bool hasValidChars; - { - JS::AutoCheckCannotGC nogc; - - size_t length = str->length(); - hasValidChars = str->hasLatin1Chars() - ? containsOnlyValidBCP47Characters(str->latin1Chars(nogc), length) - : containsOnlyValidBCP47Characters(str->twoByteChars(nogc), length); - } - - if (!hasValidChars) { - ReportUsageErrorASCII(cx, callee, "First argument should be BCP47 language tag"); + if (!StringIsAscii(str)) { + ReportUsageErrorASCII(cx, callee, "First argument contains non-ASCII characters"); return false; } - UniqueChars locale = JS_EncodeStringToLatin1(cx, str); + UniqueChars locale = JS_EncodeStringToASCII(cx, str); if (!locale) { return false; } + bool containsOnlyValidBCP47Characters = mozilla::IsAsciiAlpha(locale[0]) && + std::all_of(locale.get(), locale.get() + str->length(), [](auto c) { + return mozilla::IsAsciiAlphanumeric(c) || c == '-'; + }); + + if (!containsOnlyValidBCP47Characters) { + ReportUsageErrorASCII(cx, callee, "First argument should be a BCP47 language tag"); + return false; + } + if (!JS_SetDefaultLocale(cx->runtime(), locale.get())) { ReportOutOfMemory(cx); return false; diff --git a/js/src/builtin/intl/CommonFunctions.cpp b/js/src/builtin/intl/CommonFunctions.cpp index ed8812f6820f..b5cac47e7efb 100644 --- a/js/src/builtin/intl/CommonFunctions.cpp +++ b/js/src/builtin/intl/CommonFunctions.cpp @@ -91,27 +91,22 @@ js::intl::ReportInternalError(JSContext* cx) js::UniqueChars js::intl::EncodeLocale(JSContext* cx, JSString* locale) { -#ifdef DEBUG - auto containsOnlyValidBCP47Chars = [](auto* chars, size_t length) { - return length > 0 && - mozilla::IsAsciiAlpha(chars[0]) && - std::all_of(chars, chars + length, [](auto c) { - return mozilla::IsAsciiAlphanumeric(c) || c == '-'; - }); - }; + MOZ_ASSERT(locale->length() > 0); - if (JSLinearString* linear = locale->ensureLinear(cx)) { - JS::AutoCheckCannotGC nogc; - MOZ_ASSERT(linear->hasLatin1Chars() - ? containsOnlyValidBCP47Chars(linear->latin1Chars(nogc), linear->length()) - : containsOnlyValidBCP47Chars(linear->twoByteChars(nogc), linear->length())); - } else { - // Ignore OOM when only performing a debug assertion. - cx->recoverFromOutOfMemory(); + js::UniqueChars chars = EncodeAscii(cx, locale); + +#ifdef DEBUG + // Ensure the returned value contains only valid BCP 47 characters. + // (Lambdas can't be placed inside MOZ_ASSERT, so move the checks in an + // #ifdef block.) + if (chars) { + auto alnumOrDash = [](char c) { return mozilla::IsAsciiAlphanumeric(c) || c == '-'; }; + MOZ_ASSERT(mozilla::IsAsciiAlpha(chars[0])); + MOZ_ASSERT(std::all_of(chars.get(), chars.get() + locale->length(), alnumOrDash)); } #endif - return EncodeLatin1(cx, locale); + return chars; } bool diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index 9cf13ae27805..3c2b530f6867 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -6267,6 +6267,15 @@ JS_DecodeBytes(JSContext* cx, const char* src, size_t srclen, char16_t* dst, siz return true; } +JS_PUBLIC_API(JS::UniqueChars) +JS_EncodeStringToASCII(JSContext* cx, JSString* str) +{ + AssertHeapIsIdle(); + CHECK_THREAD(cx); + + return js::EncodeAscii(cx, str); +} + JS_PUBLIC_API(JS::UniqueChars) JS_EncodeStringToLatin1(JSContext* cx, JSString* str) { diff --git a/js/src/jsnum.cpp b/js/src/jsnum.cpp index 8512096b1667..5743f71f83f2 100644 --- a/js/src/jsnum.cpp +++ b/js/src/jsnum.cpp @@ -842,7 +842,7 @@ num_toLocaleString_impl(JSContext* cx, const CallArgs& args) * Create the string, move back to bytes to make string twiddling * a bit easier and so we can insert platform charset seperators. */ - UniqueChars numBytes = JS_EncodeStringToLatin1(cx, str); + UniqueChars numBytes = EncodeAscii(cx, str); if (!numBytes) { return false; } diff --git a/js/src/proxy/ScriptedProxyHandler.cpp b/js/src/proxy/ScriptedProxyHandler.cpp index b5f99f9f9709..ca67f42884e8 100644 --- a/js/src/proxy/ScriptedProxyHandler.cpp +++ b/js/src/proxy/ScriptedProxyHandler.cpp @@ -187,12 +187,12 @@ GetProxyTrap(JSContext* cx, HandleObject handler, HandlePropertyName name, Mutab // Step 4. if (!IsCallable(func)) { - UniqueChars bytes = EncodeLatin1(cx, name); + UniqueChars bytes = EncodeAscii(cx, name); if (!bytes) { return false; } - JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr, JSMSG_BAD_TRAP, bytes.get()); + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_TRAP, bytes.get()); return false; } diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp index dfc2f2bb9c73..79149e107e0f 100644 --- a/js/src/vm/Debugger.cpp +++ b/js/src/vm/Debugger.cpp @@ -5069,6 +5069,16 @@ class MOZ_STACK_CLASS Debugger::ObjectQuery "neither undefined nor a string"); return false; } + JSLinearString* str = cls.toString()->ensureLinear(cx); + if (!str) { + return false; + } + if (!StringIsAscii(str)) { + JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_UNEXPECTED_TYPE, + "query object's 'class' property", + "not a string containing only ASCII characters"); + return false; + } className = cls; } return true; @@ -5197,7 +5207,7 @@ class MOZ_STACK_CLASS Debugger::ObjectQuery */ bool prepareQuery() { if (className.isString()) { - classNameCString = JS_EncodeStringToLatin1(cx, className.toString()); + classNameCString = JS_EncodeStringToASCII(cx, className.toString()); if (!classNameCString) { return false; } diff --git a/js/src/vm/StringType.cpp b/js/src/vm/StringType.cpp index b10af0fc0360..72e3d8156cfe 100644 --- a/js/src/vm/StringType.cpp +++ b/js/src/vm/StringType.cpp @@ -17,6 +17,8 @@ #include "mozilla/TypeTraits.h" #include "mozilla/Unused.h" +#include + #include "jsfriendapi.h" #include "frontend/BytecodeCompiler.h" @@ -1038,15 +1040,27 @@ js::CompareAtoms(JSAtom* atom1, JSAtom* atom2) return CompareStringsImpl(atom1, atom2); } +bool +js::StringIsAscii(JSLinearString* str) +{ + auto containsOnlyAsciiCharacters = [](const auto* chars, size_t length) { + return std::all_of(chars, chars + length, [](auto c) { + return mozilla::IsAscii(c); + }); + }; + + JS::AutoCheckCannotGC nogc; + return str->hasLatin1Chars() + ? containsOnlyAsciiCharacters(str->latin1Chars(nogc), str->length()) + : containsOnlyAsciiCharacters(str->twoByteChars(nogc), str->length()); +} + bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) { + MOZ_ASSERT(JS::StringIsASCII(asciiBytes)); + size_t length = strlen(asciiBytes); -#ifdef DEBUG - for (size_t i = 0; i != length; ++i) { - MOZ_ASSERT(unsigned(asciiBytes[i]) <= 127); - } -#endif if (length != str->length()) { return false; } @@ -2194,6 +2208,18 @@ js::EncodeLatin1(JSContext* cx, JSString* str) return UniqueChars(reinterpret_cast(buf)); } +UniqueChars +js::EncodeAscii(JSContext* cx, JSString* str) +{ + JSLinearString* linear = str->ensureLinear(cx); + if (!linear) { + return nullptr; + } + + MOZ_ASSERT(StringIsAscii(linear)); + return EncodeLatin1(cx, linear); +} + UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id, IdToPrintableBehavior behavior) { diff --git a/js/src/vm/StringType.h b/js/src/vm/StringType.h index 5bf1948e2763..479abb084490 100644 --- a/js/src/vm/StringType.h +++ b/js/src/vm/StringType.h @@ -1659,6 +1659,12 @@ CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result); extern int32_t CompareAtoms(JSAtom* atom1, JSAtom* atom2); +/** + * Return true if the string contains only ASCII characters. + */ +extern bool +StringIsAscii(JSLinearString* str); + /* * Return true if the string matches the given sequence of ASCII bytes. */ @@ -1688,6 +1694,14 @@ SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengt /*** Conversions *********************************************************************************/ +/* + * Convert a string to a printable C string. + * + * Asserts if the input contains any non-ASCII characters. + */ +UniqueChars +EncodeAscii(JSContext* cx, JSString* str); + /* * Convert a string to a printable C string. */