2015-05-03 22:32:37 +03:00
|
|
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
2012-09-28 14:19:18 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
|
|
#ifndef mozilla_dom_encodingutils_h_
|
|
|
|
#define mozilla_dom_encodingutils_h_
|
|
|
|
|
|
|
|
#include "nsDataHashtable.h"
|
|
|
|
#include "nsString.h"
|
|
|
|
|
2013-11-26 11:31:52 +04:00
|
|
|
class nsIUnicodeDecoder;
|
|
|
|
class nsIUnicodeEncoder;
|
|
|
|
|
2012-09-28 14:19:18 +04:00
|
|
|
namespace mozilla {
|
|
|
|
namespace dom {
|
|
|
|
|
|
|
|
class EncodingUtils
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements get an encoding algorithm from Encoding spec.
|
2012-10-10 14:32:37 +04:00
|
|
|
* http://encoding.spec.whatwg.org/#concept-encoding-get
|
2012-09-28 14:19:18 +04:00
|
|
|
* Given a label, this function returns the corresponding encoding or a
|
|
|
|
* false.
|
2012-11-08 03:04:22 +04:00
|
|
|
* The returned name may not be lowercased due to compatibility with
|
|
|
|
* our internal implementations.
|
2012-09-28 14:19:18 +04:00
|
|
|
*
|
|
|
|
* @param aLabel, incoming label describing charset to be decoded.
|
2014-05-08 13:32:00 +04:00
|
|
|
* @param aOutEncoding, returning corresponding encoding for label.
|
2012-09-28 14:19:18 +04:00
|
|
|
* @return false if no encoding was found for label.
|
|
|
|
* true if valid encoding found.
|
|
|
|
*/
|
2012-11-08 03:04:22 +04:00
|
|
|
static bool FindEncodingForLabel(const nsACString& aLabel,
|
|
|
|
nsACString& aOutEncoding);
|
|
|
|
|
2012-09-28 14:19:18 +04:00
|
|
|
static bool FindEncodingForLabel(const nsAString& aLabel,
|
2012-11-08 03:04:22 +04:00
|
|
|
nsACString& aOutEncoding)
|
|
|
|
{
|
|
|
|
return FindEncodingForLabel(NS_ConvertUTF16toUTF8(aLabel), aOutEncoding);
|
|
|
|
}
|
2012-09-28 14:19:18 +04:00
|
|
|
|
2014-05-08 13:32:00 +04:00
|
|
|
/**
|
|
|
|
* Like FindEncodingForLabel() except labels that map to "replacement"
|
|
|
|
* are treated as unknown.
|
|
|
|
*
|
|
|
|
* @param aLabel, incoming label describing charset to be decoded.
|
|
|
|
* @param aOutEncoding, returning corresponding encoding for label.
|
|
|
|
* @return false if no encoding was found for label.
|
|
|
|
* true if valid encoding found.
|
|
|
|
*/
|
|
|
|
static bool FindEncodingForLabelNoReplacement(const nsACString& aLabel,
|
|
|
|
nsACString& aOutEncoding);
|
|
|
|
|
|
|
|
static bool FindEncodingForLabelNoReplacement(const nsAString& aLabel,
|
|
|
|
nsACString& aOutEncoding)
|
|
|
|
{
|
|
|
|
return FindEncodingForLabelNoReplacement(NS_ConvertUTF16toUTF8(aLabel),
|
|
|
|
aOutEncoding);
|
|
|
|
}
|
|
|
|
|
2012-09-28 14:19:18 +04:00
|
|
|
/**
|
|
|
|
* Remove any leading and trailing space characters, following the
|
|
|
|
* definition of space characters from Encoding spec.
|
2012-10-10 14:32:37 +04:00
|
|
|
* http://encoding.spec.whatwg.org/#terminology
|
2012-09-28 14:19:18 +04:00
|
|
|
* Note that nsAString::StripWhitespace() doesn't exactly match the
|
|
|
|
* definition. It also removes all matching chars in the string,
|
|
|
|
* not just leading and trailing.
|
|
|
|
*
|
|
|
|
* @param aString, string to be trimmed.
|
|
|
|
*/
|
2012-11-08 03:04:22 +04:00
|
|
|
template<class T>
|
|
|
|
static void TrimSpaceCharacters(T& aString)
|
2012-09-28 14:19:18 +04:00
|
|
|
{
|
|
|
|
aString.Trim(" \t\n\f\r");
|
|
|
|
}
|
|
|
|
|
2013-03-04 22:09:11 +04:00
|
|
|
/**
|
|
|
|
* Check is the encoding is ASCII-compatible in the sense that Basic Latin
|
|
|
|
* encodes to ASCII bytes. (The reverse may not be true!)
|
|
|
|
*
|
|
|
|
* @param aPreferredName a preferred encoding label
|
|
|
|
* @return whether the encoding is ASCII-compatible
|
|
|
|
*/
|
|
|
|
static bool IsAsciiCompatible(const nsACString& aPreferredName);
|
|
|
|
|
2013-11-26 11:31:52 +04:00
|
|
|
/**
|
|
|
|
* Instantiates a decoder for an encoding. The input must be a
|
|
|
|
* Gecko-canonical encoding name.
|
|
|
|
* @param aEncoding a Gecko-canonical encoding name
|
|
|
|
* @return a decoder
|
|
|
|
*/
|
|
|
|
static already_AddRefed<nsIUnicodeDecoder>
|
|
|
|
DecoderForEncoding(const char* aEncoding)
|
|
|
|
{
|
|
|
|
nsDependentCString encoding(aEncoding);
|
|
|
|
return DecoderForEncoding(encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates a decoder for an encoding. The input must be a
|
|
|
|
* Gecko-canonical encoding name
|
|
|
|
* @param aEncoding a Gecko-canonical encoding name
|
|
|
|
* @return a decoder
|
|
|
|
*/
|
|
|
|
static already_AddRefed<nsIUnicodeDecoder>
|
|
|
|
DecoderForEncoding(const nsACString& aEncoding);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates an encoder for an encoding. The input must be a
|
|
|
|
* Gecko-canonical encoding name.
|
|
|
|
* @param aEncoding a Gecko-canonical encoding name
|
|
|
|
* @return an encoder
|
|
|
|
*/
|
|
|
|
static already_AddRefed<nsIUnicodeEncoder>
|
|
|
|
EncoderForEncoding(const char* aEncoding)
|
|
|
|
{
|
|
|
|
nsDependentCString encoding(aEncoding);
|
|
|
|
return EncoderForEncoding(encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Instantiates an encoder for an encoding. The input must be a
|
|
|
|
* Gecko-canonical encoding name.
|
|
|
|
* @param aEncoding a Gecko-canonical encoding name
|
|
|
|
* @return an encoder
|
|
|
|
*/
|
|
|
|
static already_AddRefed<nsIUnicodeEncoder>
|
|
|
|
EncoderForEncoding(const nsACString& aEncoding);
|
|
|
|
|
2014-05-08 13:32:00 +04:00
|
|
|
/**
|
|
|
|
* Finds a Gecko language group string (e.g. x-western) for a Gecko-canonical
|
|
|
|
* encoding name.
|
|
|
|
*
|
|
|
|
* @param aEncoding, incoming label describing charset to be decoded.
|
|
|
|
* @param aOutGroup, returning corresponding language group.
|
|
|
|
*/
|
|
|
|
static void LangGroupForEncoding(const nsACString& aEncoding,
|
|
|
|
nsACString& aOutGroup);
|
|
|
|
|
2012-11-08 03:04:22 +04:00
|
|
|
private:
|
2015-01-07 02:35:02 +03:00
|
|
|
EncodingUtils() = delete;
|
2012-09-28 14:19:18 +04:00
|
|
|
};
|
|
|
|
|
2015-07-13 18:25:42 +03:00
|
|
|
} // namespace dom
|
|
|
|
} // namespace mozilla
|
2012-09-28 14:19:18 +04:00
|
|
|
|
|
|
|
#endif // mozilla_dom_encodingutils_h_
|