Refine the conversion of entities during serialization, b=169590, r=nhotta, sr=jst

This commit is contained in:
rbs%maths.uq.edu.au 2002-12-13 22:12:10 +00:00
Родитель 974ade6725
Коммит c7e2c76219
12 изменённых файлов: 70 добавлений и 27 удалений

Просмотреть файл

@ -140,9 +140,11 @@ public:
// Convert links, image src, and script src to absolute URLs when possible
OutputAbsoluteLinks = 128,
// Encode entities when outputting to a string.
// E.g. If set, we'll output   if clear, we'll output 0xa0.
OutputEncodeEntities = 256,
// Attempt to encode entities standardized at W3C (HTML, MathML, etc).
// This is a catch-all flag for documents with mixed contents. Beware of
// interoperability issues. See below for other flags which might likely
// do what you want.
OutputEncodeW3CEntities = 256,
// LineBreak processing: we can do either platform line breaks,
// CR, LF, or CRLF. If neither of these flags is set, then we
@ -160,7 +162,19 @@ public:
// Don't allow any formatting nodes (e.g. <br>, <b>) inside a <pre>.
// This is used primarily by mail.
OutputNoFormattingInPre = 8192
OutputNoFormattingInPre = 8192,
// Encode entities when outputting to a string.
// E.g. If set, we'll output &nbsp; if clear, we'll output 0xa0.
// The basic set is just &nbsp; &amp; &lt; &gt; &quot; for interoperability
// with older products that don't support &alpha; and friends.
// The Latin1 entity set additionally includes 8bit accented letters
// between 128 and 255.
// The HTML entity set additionally includes accented letters, greek
// letters, and other special markup symbols as defined in HTML4.
OutputEncodeBasicEntities = 16384,
OutputEncodeLatin1Entities = 32768,
OutputEncodeHTMLEntities = 65536
};
/**

Просмотреть файл

@ -135,15 +135,6 @@ nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
mPreLevel = 0;
mCharSet = aCharSet;
mIsLatin1 = PR_FALSE;
if (aCharSet) {
const PRUnichar *charset;
aCharSet->GetUnicode(&charset);
if (NS_LITERAL_STRING("ISO-8859-1").Equals(charset)) {
mIsLatin1 = PR_TRUE;
}
}
return NS_OK;
}
@ -811,7 +802,9 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
}
if (aTranslateEntities && !mInCDATA) {
if (mFlags & nsIDocumentEncoder::OutputEncodeEntities) {
if (mFlags & nsIDocumentEncoder::OutputEncodeBasicEntities ||
mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities ||
mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities) {
nsIParserService* parserService =
nsContentUtils::GetParserServiceWeakRef();
@ -851,7 +844,10 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
else if ((val <= kGTVal) && (entityTable[val][0] != 0)) {
entityText = entityTable[val];
break;
} else if (mIsLatin1 && val > 127 && val < 256) {
} else if (val > 127 &&
((val < 256 &&
mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
if (!entityReplacement.IsEmpty()) {

Просмотреть файл

@ -135,7 +135,6 @@ class nsHTMLContentSerializer : public nsXMLContentSerializer {
* what so ever.
*/
PRPackedBool mInCDATA;
PRPackedBool mIsLatin1;
PRInt32 mMaxColumn;

Просмотреть файл

@ -878,7 +878,7 @@ nsGenericHTMLElement::GetInnerHTML(nsAString& aInnerHTML)
NS_ENSURE_TRUE(docEncoder, NS_ERROR_FAILURE);
docEncoder->Init(doc, NS_LITERAL_STRING("text/html"),
nsIDocumentEncoder::OutputEncodeEntities);
nsIDocumentEncoder::OutputEncodeBasicEntities);
nsCOMPtr<nsIDOMRange> range(new nsRange);
NS_ENSURE_TRUE(range, NS_ERROR_OUT_OF_MEMORY);

Просмотреть файл

@ -986,7 +986,10 @@ function OutputFileWithPersistAPI(editorDoc, aDestinationLocation, aRelatedFiles
// returns output flags based on mimetype, wrapCol and prefs
function GetOutputFlags(aMimeType, aWrapColumn)
{
var outputFlags = webPersist.ENCODE_FLAGS_ENCODE_ENTITIES;
var editor = GetCurrentEditor();
var outputFlags = (editor && editor.documentCharacterSet == "ISO-8859-1")
? webPersist.ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES
: webPersist.ENCODE_FLAGS_ENCODE_BASIC_ENTITIES;
if (aMimeType == "text/plain")
{
// When saving in "text/plain" format, always do formatting

Просмотреть файл

@ -1575,7 +1575,9 @@ function SetEditMode(mode)
}
// Get the entire document's source string
var flags = 256; // OutputEncodeEntities;
var flags = (editor.documentCharacterSet == "ISO-8859-1")
? 32768 // OutputEncodeLatin1Entities
: 16384; // OutputEncodeBasicEntities
try {
var prettyPrint = gPrefs.getBoolPref("editor.prettyprint");

Просмотреть файл

@ -109,7 +109,6 @@ function onAccept()
var str;
try {
// 1 = OutputSelectionOnly, 1024 = OutputLFLineBreak
// 256 = OutputEncodeEntities
str = editor.outputToString("text/html", 1+1024);
} catch (e) {}
if (!str)

Просмотреть файл

@ -145,8 +145,15 @@ interface nsIWebBrowserPersist : nsISupports
const unsigned long ENCODE_FLAGS_FORMAT_FLOWED = 64;
/** Convert links to absolute links where possible. */
const unsigned long ENCODE_FLAGS_ABSOLUTE_LINKS = 128;
/** Encode entities, e.g. output &nbsp; instead of character code 0xa0. */
const unsigned long ENCODE_FLAGS_ENCODE_ENTITIES = 256;
/**
* Attempt to encode entities standardized at W3C (HTML, MathML, etc).
* This is a catch-all flag for documents with mixed contents. Beware of
* interoperability issues. See below for other flags which might likely
* do what you want.
*/
const unsigned long ENCODE_FLAGS_ENCODE_W3C_ENTITIES = 256;
/**
* Output with carriage return line breaks. May also be combined with
* ENCODE_FLAGS_LF_LINEBREAKS and if neither is specified, the platform
@ -164,6 +171,23 @@ interface nsIWebBrowserPersist : nsISupports
/** For plaintext output. Output the content of noframes elements. */
const unsigned long ENCODE_FLAGS_NOFRAMES_CONTENT = 4096;
/**
* Encode basic entities, e.g. output &nbsp; instead of character code 0xa0.
* The basic set is just &nbsp; &amp; &lt; &gt; &quot; for interoperability
* with older products that don't support &alpha; and friends.
*/
const unsigned long ENCODE_FLAGS_ENCODE_BASIC_ENTITIES = 8192;
/**
* Encode Latin1 entities. This includes the basic set and
* accented letters between 128 and 255.
*/
const unsigned long ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES = 16384;
/**
* Encode HTML4 entities. This includes the basic set, accented
* letters, greek letters and certain special markup symbols.
*/
const unsigned long ENCODE_FLAGS_ENCODE_HTML_ENTITIES = 32768;
/**
* Save the specified DOM document to file and optionally all linked files
* (e.g. images, CSS, JS & subframes). Do not call this method until the

Просмотреть файл

@ -395,8 +395,14 @@ NS_IMETHODIMP nsWebBrowserPersist::SaveDocument(
mEncodingFlags |= nsIDocumentEncoder::OutputFormatFlowed;
if (aEncodingFlags & ENCODE_FLAGS_ABSOLUTE_LINKS)
mEncodingFlags |= nsIDocumentEncoder::OutputAbsoluteLinks;
if (aEncodingFlags & ENCODE_FLAGS_ENCODE_ENTITIES)
mEncodingFlags |= nsIDocumentEncoder::OutputEncodeEntities;
if (aEncodingFlags & ENCODE_FLAGS_ENCODE_BASIC_ENTITIES)
mEncodingFlags |= nsIDocumentEncoder::OutputEncodeBasicEntities;
if (aEncodingFlags & ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES)
mEncodingFlags |= nsIDocumentEncoder::OutputEncodeLatin1Entities;
if (aEncodingFlags & ENCODE_FLAGS_ENCODE_HTML_ENTITIES)
mEncodingFlags |= nsIDocumentEncoder::OutputEncodeHTMLEntities;
if (aEncodingFlags & ENCODE_FLAGS_ENCODE_W3C_ENTITIES)
mEncodingFlags |= nsIDocumentEncoder::OutputEncodeW3CEntities;
if (aEncodingFlags & ENCODE_FLAGS_CR_LINEBREAKS)
mEncodingFlags |= nsIDocumentEncoder::OutputCRLineBreak;
if (aEncodingFlags & ENCODE_FLAGS_LF_LINEBREAKS)

Просмотреть файл

@ -100,7 +100,7 @@ static nsresult SetUpEncoder(nsIDOMNode *aRoot, const char* aCharset, nsIDocumen
}
// This method will fail if no document
rv = encoder->Init(document,NS_LITERAL_STRING("text/xml"),nsIDocumentEncoder::OutputEncodeEntities);
rv = encoder->Init(document,NS_LITERAL_STRING("text/xml"),nsIDocumentEncoder::OutputEncodeBasicEntities);
if (NS_FAILED(rv))
return rv;

Просмотреть файл

@ -2590,7 +2590,7 @@ NS_IMETHODIMP nsPluginInstanceOwner::GetTagText(const char* *result)
nsCOMPtr<nsIDocumentEncoder> docEncoder(do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html", &rv));
if (NS_FAILED(rv))
return rv;
rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeEntities);
rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeBasicEntities);
if (NS_FAILED(rv))
return rv;

Просмотреть файл

@ -2590,7 +2590,7 @@ NS_IMETHODIMP nsPluginInstanceOwner::GetTagText(const char* *result)
nsCOMPtr<nsIDocumentEncoder> docEncoder(do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/html", &rv));
if (NS_FAILED(rv))
return rv;
rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeEntities);
rv = docEncoder->Init(document, NS_LITERAL_STRING("text/html"), nsIDocumentEncoder::OutputEncodeBasicEntities);
if (NS_FAILED(rv))
return rv;