зеркало из https://github.com/mozilla/gecko-dev.git
Patch from sayrer@gmail.com Change text search method, handle html/xhtml, trim whitespace bug=321036 r=annie.sullivan@gmail.com
This commit is contained in:
Родитель
ce64f51e07
Коммит
670ce54d8a
|
@ -72,6 +72,7 @@ REQUIRES = xpcom \
|
|||
xpconnect \
|
||||
js \
|
||||
nkcache \
|
||||
widget \
|
||||
$(NULL)
|
||||
|
||||
LOCAL_INCLUDES = -I$(srcdir)/../../build
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
*
|
||||
* Contributor(s):
|
||||
* Annie Sullivan <annie.sullivan@gmail.com> (modified to work with places)
|
||||
* Robert Sayre <sayrer@gmail.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
|
@ -70,6 +71,17 @@
|
|||
#include "nsIDOMCharacterData.h"
|
||||
#include "nsIDOMNodeList.h"
|
||||
#include "nsIDOM3Node.h"
|
||||
#include "nsIDOMDocumentTraversal.h"
|
||||
#include "nsIDOMTreeWalker.h"
|
||||
#include "nsIDOMNodeFilter.h"
|
||||
#include "nsIDOMDocumentFragment.h"
|
||||
#include "nsIParser.h"
|
||||
#include "nsParserCIID.h"
|
||||
#include "nsIFragmentContentSink.h"
|
||||
#include "nsIContentSink.h"
|
||||
#include "nsIDocument.h"
|
||||
|
||||
static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
|
||||
|
||||
// These are defined in nsLivemarkService.cpp
|
||||
extern nsIRDFResource *kLMRDF_type;
|
||||
|
@ -135,7 +147,8 @@ protected:
|
|||
// helpers
|
||||
NS_METHOD HandleRDFItem (nsIRDFDataSource *aDS, nsIRDFResource *itemResource,
|
||||
nsIRDFResource *aLinkResource, nsIRDFResource *aTitleResource);
|
||||
NS_METHOD FindTextInNode (nsIDOMNode *aParentNode, nsAString &aString);
|
||||
NS_METHOD FindTextInChildNodes (nsIDOMNode *aNode, nsAString &aString);
|
||||
NS_METHOD ParseHTMLFragment(nsAString &aFragString, nsIDocument* aTargetDocument, nsIDOMNode **outNode);
|
||||
|
||||
PRBool IsLinkValid(const PRUnichar *aURI);
|
||||
|
||||
|
@ -412,44 +425,79 @@ nsLivemarkLoadListener::TryParseAsRDF ()
|
|||
return NS_OK;
|
||||
}
|
||||
|
||||
//
|
||||
// find the first node below aParentNode that is a TEXT_NODE,
|
||||
// and return it
|
||||
//
|
||||
|
||||
nsresult
|
||||
nsLivemarkLoadListener::FindTextInNode (nsIDOMNode *aParentNode, nsAString &aString)
|
||||
nsLivemarkLoadListener::ParseHTMLFragment(nsAString &aFragString,
|
||||
nsIDocument* aTargetDocument,
|
||||
nsIDOMNode **outNode)
|
||||
{
|
||||
NS_ENSURE_ARG(aParentNode);
|
||||
|
||||
NS_ENSURE_ARG(aTargetDocument);
|
||||
|
||||
nsresult rv;
|
||||
|
||||
nsCOMPtr<nsIParser> parser;
|
||||
parser = do_CreateInstance(kCParserCID, &rv);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// create the html fragment sink
|
||||
nsCOMPtr<nsIContentSink> sink;
|
||||
sink = do_CreateInstance(NS_HTMLFRAGMENTSINK2_CONTRACTID);
|
||||
NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
|
||||
nsCOMPtr<nsIFragmentContentSink> fragSink(do_QueryInterface(sink));
|
||||
NS_ENSURE_TRUE(fragSink, NS_ERROR_FAILURE);
|
||||
|
||||
fragSink->SetTargetDocument(aTargetDocument);
|
||||
|
||||
// parse the fragment
|
||||
parser->SetContentSink(sink);
|
||||
parser->Parse(aFragString, (void*)0, NS_LITERAL_CSTRING("text/html"), PR_FALSE, PR_TRUE, eDTDMode_fragment);
|
||||
// get the fragment node
|
||||
nsCOMPtr<nsIDOMDocumentFragment> contextfrag;
|
||||
rv = fragSink->GetFragment(getter_AddRefs(contextfrag));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
rv = CallQueryInterface (contextfrag, outNode);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
nsCOMPtr<nsIDOMNode> childNode;
|
||||
rv = aParentNode->GetFirstChild(getter_AddRefs(childNode));
|
||||
if (!childNode) return NS_OK;
|
||||
// find all of the text and CDATA nodes below aNode and return them as a string
|
||||
nsresult
|
||||
nsLivemarkLoadListener::FindTextInChildNodes (nsIDOMNode *aNode, nsAString &aString)
|
||||
{
|
||||
NS_ENSURE_ARG(aNode);
|
||||
|
||||
nsresult rv;
|
||||
|
||||
nsCOMPtr<nsIDOMDocument> aDoc;
|
||||
aNode->GetOwnerDocument(getter_AddRefs(aDoc));
|
||||
nsCOMPtr<nsIDOMDocumentTraversal> trav = do_QueryInterface(aDoc, &rv);
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
PRUint16 nodeType = 0;
|
||||
do {
|
||||
rv = childNode->GetNodeType(&nodeType);
|
||||
if (nodeType == nsIDOMNode::TEXT_NODE ||
|
||||
nodeType == nsIDOMNode::CDATA_SECTION_NODE)
|
||||
break;
|
||||
|
||||
nsCOMPtr<nsIDOMNode> temp;
|
||||
rv = childNode->GetNextSibling(getter_AddRefs(temp));
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
childNode = temp;
|
||||
} while (childNode);
|
||||
|
||||
if (nodeType == nsIDOMNode::TEXT_NODE ||
|
||||
nodeType == nsIDOMNode::CDATA_SECTION_NODE)
|
||||
{
|
||||
nsCOMPtr<nsIDOMCharacterData> charTextNode = do_QueryInterface(childNode);
|
||||
if (charTextNode)
|
||||
return charTextNode->GetData(aString);
|
||||
nsCOMPtr<nsIDOMTreeWalker> walker;
|
||||
rv = trav->CreateTreeWalker(aNode,
|
||||
nsIDOMNodeFilter::SHOW_TEXT | nsIDOMNodeFilter::SHOW_CDATA_SECTION,
|
||||
nsnull, PR_TRUE, getter_AddRefs(walker));
|
||||
if (NS_FAILED(rv)) return rv;
|
||||
|
||||
nsCOMPtr<nsIDOMNode> currentNode;
|
||||
walker->GetCurrentNode(getter_AddRefs(currentNode));
|
||||
nsCOMPtr<nsIDOMCharacterData> charTextNode;
|
||||
nsAutoString tempString;
|
||||
while (currentNode) {
|
||||
charTextNode = do_QueryInterface(currentNode);
|
||||
if (charTextNode) {
|
||||
charTextNode->GetData(tempString);
|
||||
aString.Append(tempString);
|
||||
}
|
||||
walker->NextNode(getter_AddRefs(currentNode));
|
||||
}
|
||||
|
||||
if (aString.IsEmpty()) {
|
||||
return NS_ERROR_FAILURE;
|
||||
} else {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
return NS_ERROR_FAILURE;
|
||||
}
|
||||
|
||||
nsresult
|
||||
|
@ -620,12 +668,59 @@ nsLivemarkLoadListener::TryParseAsSimpleRSS ()
|
|||
rv = childNode->GetNodeName (childNname);
|
||||
|
||||
if (childNname.Equals(NS_LITERAL_STRING("title"))) {
|
||||
rv = FindTextInNode (childNode, titleStr);
|
||||
if (isAtom) {
|
||||
/* Atom titles can contain HTML, so we need to find and remove it */
|
||||
nsCOMPtr<nsIDOMElement> titleElem = do_QueryInterface(childNode);
|
||||
if (!titleElem) break; // out of while(childNode) loop
|
||||
|
||||
nsAutoString titleMode; // Atom 0.3 only
|
||||
nsAutoString titleType;
|
||||
titleElem->GetAttribute(NS_LITERAL_STRING("type"), titleType);
|
||||
titleElem->GetAttribute(NS_LITERAL_STRING("mode"), titleMode);
|
||||
|
||||
/* No one does this in <title> except standards pedants making test feeds,
|
||||
* Atom 0.3 is deprecated, and RFC 4287 doesn't allow it
|
||||
*/
|
||||
if (titleMode.EqualsLiteral("base64")) {
|
||||
break; // out of while(childNode) loop
|
||||
}
|
||||
|
||||
/* Cover Atom 0.3 and RFC 4287 together */
|
||||
if (titleType.EqualsLiteral("text") ||
|
||||
titleType.EqualsLiteral("text/plain") ||
|
||||
titleType.IsEmpty())
|
||||
{
|
||||
rv = FindTextInChildNodes(childNode, titleStr);
|
||||
} else if (titleType.EqualsLiteral("html") || // RFC 4287
|
||||
(titleType.EqualsLiteral("text/html") && // Atom 0.3
|
||||
!titleMode.EqualsLiteral("xml")) ||
|
||||
titleMode.EqualsLiteral("escaped")) // Atom 0.3
|
||||
{
|
||||
nsAutoString escapedHTMLStr;
|
||||
rv = FindTextInChildNodes(childNode, escapedHTMLStr);
|
||||
if (NS_FAILED(rv)) break; // out of while(childNode) loop
|
||||
|
||||
nsCOMPtr<nsIDOMNode> newNode;
|
||||
nsCOMPtr<nsIDocument> doc = do_QueryInterface(xmldoc);
|
||||
ParseHTMLFragment(escapedHTMLStr, doc, getter_AddRefs(newNode));
|
||||
rv = FindTextInChildNodes(newNode, titleStr);
|
||||
|
||||
}else if (titleType.EqualsLiteral("xhtml") || // RFC 4287
|
||||
titleType.EqualsLiteral("application/xhtml") || // Atom 0.3
|
||||
titleMode.EqualsLiteral("xml")) // Atom 0.3
|
||||
{
|
||||
rv = FindTextInChildNodes(childNode, titleStr);
|
||||
} else {
|
||||
break; // out of while(childNode) loop
|
||||
}
|
||||
} else {
|
||||
rv = FindTextInChildNodes(childNode, titleStr);
|
||||
}
|
||||
if (NS_FAILED(rv)) break;
|
||||
} else if (childNname.Equals(NS_LITERAL_STRING("pubDate")) ||
|
||||
childNname.Equals(NS_LITERAL_STRING("updated")))
|
||||
{
|
||||
rv = FindTextInNode (childNode, dateStr);
|
||||
rv = FindTextInChildNodes (childNode, dateStr);
|
||||
if (NS_FAILED(rv)) break;
|
||||
} else if (!isAtom && childNname.Equals(NS_LITERAL_STRING("guid"))) {
|
||||
nsCOMPtr<nsIDOMElement> linkElem = do_QueryInterface(childNode);
|
||||
|
@ -636,7 +731,7 @@ nsLivemarkLoadListener::TryParseAsSimpleRSS ()
|
|||
// Ignore failures. isPermaLink defaults to true.
|
||||
if (!isPermaLink.Equals(NS_LITERAL_STRING("false"))) {
|
||||
// in node's TEXT
|
||||
rv = FindTextInNode (childNode, linkStr);
|
||||
rv = FindTextInChildNodes (childNode, linkStr);
|
||||
if (NS_FAILED(rv)) break;
|
||||
}
|
||||
} else if (childNname.Equals(NS_LITERAL_STRING("link"))) {
|
||||
|
@ -672,7 +767,7 @@ nsLivemarkLoadListener::TryParseAsSimpleRSS ()
|
|||
}
|
||||
} else if (linkStr.IsEmpty()) {
|
||||
// in node's TEXT
|
||||
rv = FindTextInNode (childNode, linkStr);
|
||||
rv = FindTextInChildNodes (childNode, linkStr);
|
||||
if (NS_FAILED(rv)) break;
|
||||
}
|
||||
}
|
||||
|
@ -687,6 +782,11 @@ nsLivemarkLoadListener::TryParseAsSimpleRSS ()
|
|||
if (!childNode || NS_FAILED(rv)) break;
|
||||
}
|
||||
|
||||
// Clean up whitespace
|
||||
titleStr.CompressWhitespace();
|
||||
linkStr.Trim("\b\t\r\n ");
|
||||
dateStr.CompressWhitespace();
|
||||
|
||||
if (titleStr.IsEmpty() && !dateStr.IsEmpty())
|
||||
titleStr.Assign(dateStr);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче