зеркало из https://github.com/mozilla/gecko-dev.git
Bug 971043 - Implement getTranslationNodes function to retrieve nodes from webpage that contains meaningful text for translation. r=smaug
This commit is contained in:
Родитель
ce7b44032d
Коммит
4260890571
|
@ -200,6 +200,7 @@ public:
|
|||
virtual nsresult AppendText(const char16_t* aBuffer, uint32_t aLength,
|
||||
bool aNotify) MOZ_OVERRIDE;
|
||||
virtual bool TextIsOnlyWhitespace() MOZ_OVERRIDE;
|
||||
virtual bool HasTextForTranslation() MOZ_OVERRIDE;
|
||||
virtual void AppendTextTo(nsAString& aResult) MOZ_OVERRIDE;
|
||||
virtual bool AppendTextTo(nsAString& aResult,
|
||||
const mozilla::fallible_t&) MOZ_OVERRIDE NS_WARN_UNUSED_RESULT;
|
||||
|
|
|
@ -39,8 +39,8 @@ enum nsLinkState {
|
|||
|
||||
// IID for the nsIContent interface
|
||||
#define NS_ICONTENT_IID \
|
||||
{ 0xafa52dfb, 0x9d92, 0x4592, \
|
||||
{ 0xa1, 0xd2, 0x08, 0xc4, 0x92, 0x89, 0x7f, 0xce } }
|
||||
{ 0x1329e5b7, 0x4bcd, 0x450c, \
|
||||
{ 0xa2, 0x3a, 0x98, 0xc5, 0x85, 0xcd, 0x73, 0xf9 } }
|
||||
|
||||
/**
|
||||
* A node of content in a document's content model. This interface
|
||||
|
@ -533,6 +533,14 @@ public:
|
|||
*/
|
||||
virtual bool TextIsOnlyWhitespace() = 0;
|
||||
|
||||
/**
|
||||
* Method to see if the text node contains data that is useful
|
||||
* for a translation: i.e., it consists of more than just whitespace,
|
||||
* digits and punctuation.
|
||||
* NOTE: Always returns false for elements.
|
||||
*/
|
||||
virtual bool HasTextForTranslation() = 0;
|
||||
|
||||
/**
|
||||
* Append the text content to aResult.
|
||||
* NOTE: This asserts and returns for elements
|
||||
|
|
|
@ -1919,6 +1919,12 @@ FragmentOrElement::TextIsOnlyWhitespace()
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
FragmentOrElement::HasTextForTranslation()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
FragmentOrElement::AppendTextTo(nsAString& aResult)
|
||||
{
|
||||
|
|
|
@ -990,6 +990,41 @@ nsGenericDOMDataNode::TextIsOnlyWhitespace()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
nsGenericDOMDataNode::HasTextForTranslation()
|
||||
{
|
||||
if (mText.Is2b()) {
|
||||
// The fragment contains non-8bit characters which means there
|
||||
// was at least one "interesting" character to trigger non-8bit.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (HasFlag(NS_CACHED_TEXT_IS_ONLY_WHITESPACE) &&
|
||||
HasFlag(NS_TEXT_IS_ONLY_WHITESPACE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* cp = mText.Get1b();
|
||||
const char* end = cp + mText.GetLength();
|
||||
|
||||
unsigned char ch;
|
||||
for (; cp < end; cp++) {
|
||||
ch = *cp;
|
||||
|
||||
// These are the characters that are letters
|
||||
// in the first 256 UTF-8 codepoints.
|
||||
if ((ch >= 'a' && ch <= 'z') ||
|
||||
(ch >= 'A' && ch <= 'Z') ||
|
||||
(ch >= 192 && ch <= 214) ||
|
||||
(ch >= 216 && ch <= 246) ||
|
||||
(ch >= 248)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
nsGenericDOMDataNode::AppendTextTo(nsAString& aResult)
|
||||
{
|
||||
|
|
|
@ -145,6 +145,7 @@ public:
|
|||
virtual nsresult AppendText(const char16_t* aBuffer, uint32_t aLength,
|
||||
bool aNotify) MOZ_OVERRIDE;
|
||||
virtual bool TextIsOnlyWhitespace() MOZ_OVERRIDE;
|
||||
virtual bool HasTextForTranslation() MOZ_OVERRIDE;
|
||||
virtual void AppendTextTo(nsAString& aResult) MOZ_OVERRIDE;
|
||||
virtual bool AppendTextTo(nsAString& aResult,
|
||||
const mozilla::fallible_t&) MOZ_OVERRIDE NS_WARN_UNUSED_RESULT;
|
||||
|
|
|
@ -87,6 +87,7 @@
|
|||
#include "nsIInterfaceRequestorUtils.h"
|
||||
#include "GeckoProfiler.h"
|
||||
#include "mozilla/Preferences.h"
|
||||
#include "nsIContentIterator.h"
|
||||
|
||||
#ifdef XP_WIN
|
||||
#undef GetClassName
|
||||
|
@ -1597,6 +1598,91 @@ nsDOMWindowUtils::NodesFromRect(float aX, float aY,
|
|||
aIgnoreRootScrollFrame, aFlushLayout, aReturn);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsDOMWindowUtils::GetTranslationNodes(nsIDOMNode* aRoot,
|
||||
nsITranslationNodeList** aRetVal)
|
||||
{
|
||||
if (!nsContentUtils::IsCallerChrome()) {
|
||||
return NS_ERROR_DOM_SECURITY_ERR;
|
||||
}
|
||||
|
||||
NS_ENSURE_ARG_POINTER(aRetVal);
|
||||
nsCOMPtr<nsIContent> root = do_QueryInterface(aRoot);
|
||||
NS_ENSURE_STATE(root);
|
||||
nsCOMPtr<nsIDocument> doc = GetDocument();
|
||||
NS_ENSURE_STATE(doc);
|
||||
|
||||
if (root->OwnerDoc() != doc) {
|
||||
return NS_ERROR_DOM_WRONG_DOCUMENT_ERR;
|
||||
}
|
||||
|
||||
nsTHashtable<nsPtrHashKey<nsIContent>> translationNodesHash(1000);
|
||||
nsRefPtr<nsTranslationNodeList> list = new nsTranslationNodeList;
|
||||
|
||||
uint32_t limit = 15000;
|
||||
|
||||
// We begin iteration with content->GetNextNode because we want to explictly
|
||||
// skip the root tag from being a translation node.
|
||||
nsIContent* content = root;
|
||||
while ((limit > 0) && (content = content->GetNextNode(root))) {
|
||||
if (!content->IsHTML()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nsIAtom* localName = content->Tag();
|
||||
|
||||
// Skip elements that usually contain non-translatable text content.
|
||||
if (localName == nsGkAtoms::script ||
|
||||
localName == nsGkAtoms::iframe ||
|
||||
localName == nsGkAtoms::frameset ||
|
||||
localName == nsGkAtoms::frame ||
|
||||
localName == nsGkAtoms::code ||
|
||||
localName == nsGkAtoms::noscript ||
|
||||
localName == nsGkAtoms::style) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// An element is a translation node if it contains
|
||||
// at least one text node that has meaningful data
|
||||
// for translation
|
||||
for (nsIContent* child = content->GetFirstChild();
|
||||
child;
|
||||
child = child->GetNextSibling()) {
|
||||
|
||||
if (child->HasTextForTranslation()) {
|
||||
translationNodesHash.PutEntry(content);
|
||||
|
||||
bool isBlockFrame = false;
|
||||
nsIFrame* frame = content->GetPrimaryFrame();
|
||||
if (frame) {
|
||||
isBlockFrame = frame->IsFrameOfType(nsIFrame::eBlockFrame);
|
||||
}
|
||||
|
||||
bool isTranslationRoot = isBlockFrame;
|
||||
if (!isBlockFrame) {
|
||||
// If an element is not a block element, it still
|
||||
// can be considered a translation root if the parent
|
||||
// of this element didn't make into the list of nodes
|
||||
// to be translated.
|
||||
bool parentInList = false;
|
||||
nsIContent* parent = content->GetParent();
|
||||
if (parent) {
|
||||
parentInList = translationNodesHash.Contains(parent);
|
||||
}
|
||||
isTranslationRoot = !parentInList;
|
||||
}
|
||||
|
||||
list->AppendElement(content->AsDOMNode(), isTranslationRoot);
|
||||
--limit;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*aRetVal = list.forget().take();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
static TemporaryRef<DataSourceSurface>
|
||||
CanvasToDataSourceSurface(nsIDOMHTMLCanvasElement* aCanvas)
|
||||
{
|
||||
|
@ -3883,3 +3969,40 @@ nsDOMWindowUtils::SetAudioVolume(float aVolume)
|
|||
|
||||
return window->SetAudioVolume(aVolume);
|
||||
}
|
||||
|
||||
NS_INTERFACE_MAP_BEGIN(nsTranslationNodeList)
|
||||
NS_INTERFACE_MAP_ENTRY(nsISupports)
|
||||
NS_INTERFACE_MAP_ENTRY(nsITranslationNodeList)
|
||||
NS_INTERFACE_MAP_END
|
||||
|
||||
NS_IMPL_ADDREF(nsTranslationNodeList)
|
||||
NS_IMPL_RELEASE(nsTranslationNodeList)
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsTranslationNodeList::Item(uint32_t aIndex, nsIDOMNode** aRetVal)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aRetVal);
|
||||
NS_IF_ADDREF(*aRetVal = mNodes.SafeElementAt(aIndex));
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsTranslationNodeList::IsTranslationRootAtIndex(uint32_t aIndex, bool* aRetVal)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aRetVal);
|
||||
if (aIndex >= mLength) {
|
||||
*aRetVal = false;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
*aRetVal = mNodeIsRoot.ElementAt(aIndex);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsTranslationNodeList::GetLength(uint32_t* aRetVal)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aRetVal);
|
||||
*aRetVal = mLength;
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,32 @@ namespace mozilla {
|
|||
}
|
||||
}
|
||||
|
||||
class nsTranslationNodeList MOZ_FINAL : public nsITranslationNodeList
|
||||
{
|
||||
public:
|
||||
nsTranslationNodeList()
|
||||
{
|
||||
mNodes.SetCapacity(1000);
|
||||
mNodeIsRoot.SetCapacity(1000);
|
||||
mLength = 0;
|
||||
}
|
||||
|
||||
NS_DECL_ISUPPORTS
|
||||
NS_DECL_NSITRANSLATIONNODELIST
|
||||
|
||||
void AppendElement(nsIDOMNode* aElement, bool aIsRoot)
|
||||
{
|
||||
mNodes.AppendElement(aElement);
|
||||
mNodeIsRoot.AppendElement(aIsRoot);
|
||||
mLength++;
|
||||
}
|
||||
|
||||
private:
|
||||
nsTArray<nsCOMPtr<nsIDOMNode> > mNodes;
|
||||
nsTArray<bool> mNodeIsRoot;
|
||||
uint32_t mLength;
|
||||
};
|
||||
|
||||
class nsDOMWindowUtils MOZ_FINAL : public nsIDOMWindowUtils,
|
||||
public nsSupportsWeakReference
|
||||
{
|
||||
|
|
|
@ -25,6 +25,8 @@ support-files =
|
|||
[test_domwindowutils.html]
|
||||
[test_e4x_for_each.html]
|
||||
[test_error.html]
|
||||
[test_getTranslationNodes.html]
|
||||
[test_getTranslationNodes_limit.html]
|
||||
[test_gsp-qualified.html]
|
||||
[test_gsp-quirks.html]
|
||||
[test_gsp-standards.html]
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test for nsIDOMWindowUtils.getTranslationNodes</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
</head>
|
||||
<body onload="runTest()">
|
||||
<script type="application/javascript">
|
||||
var utils = SpecialPowers.wrap(window).
|
||||
QueryInterface(SpecialPowers.Ci.nsIInterfaceRequestor).
|
||||
getInterface(SpecialPowers.Ci.nsIDOMWindowUtils);
|
||||
|
||||
|
||||
function testTranslationRoot(rootNode) {
|
||||
var translationNodes = utils.getTranslationNodes(rootNode);
|
||||
|
||||
var expectedResult = rootNode.getAttribute("expected");
|
||||
var expectedLength = expectedResult.split(" ").length;
|
||||
|
||||
is(translationNodes.length, expectedLength,
|
||||
"Correct number of translation nodes for testcase " + rootNode.id);
|
||||
|
||||
var resultList = [];
|
||||
for (var i = 0; i < translationNodes.length; i++) {
|
||||
var node = translationNodes.item(i).localName;
|
||||
if (translationNodes.isTranslationRootAtIndex(i)) {
|
||||
node += "[root]"
|
||||
}
|
||||
resultList.push(node);
|
||||
}
|
||||
|
||||
is(resultList.length, translationNodes.length,
|
||||
"Correct number of translation nodes for testcase " + rootNode.id);
|
||||
|
||||
is(resultList.join(" "), expectedResult,
|
||||
"Correct list of translation nodes for testcase " + rootNode.id);
|
||||
}
|
||||
|
||||
function runTest() {
|
||||
isnot(utils, null, "nsIDOMWindowUtils");
|
||||
|
||||
var testcases = document.querySelectorAll("div[expected]");
|
||||
for (var testcase of testcases) {
|
||||
testTranslationRoot(testcase);
|
||||
}
|
||||
|
||||
var testiframe = document.getElementById("testiframe");
|
||||
var iframediv = testiframe.contentDocument.querySelector("div");
|
||||
try {
|
||||
var foo = utils.getTranslationNodes(iframediv);
|
||||
ok(false, "Cannot use a node from a different document");
|
||||
} catch (e) {
|
||||
is(e.name, "WrongDocumentError", "Cannot use a node from a different document");
|
||||
}
|
||||
|
||||
SimpleTest.finish();
|
||||
}
|
||||
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
</script>
|
||||
|
||||
<!-- Test that an inline element inside a root is not a root -->
|
||||
<div id="testcase1"
|
||||
expected="div[root] span">
|
||||
<div>
|
||||
lorem ipsum <span>dolor</span> sit amet
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that a usually inline element becomes a root if it is
|
||||
displayed as a block -->
|
||||
<div id="testcase2"
|
||||
expected="div[root] span[root]">
|
||||
<div>
|
||||
lorem ipsum <span style="display: block;">dolor</span> sit amet
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that the content-less <div> is ignored and only the
|
||||
<p> with content is returned -->
|
||||
<div id="testcase3"
|
||||
expected="p[root]">
|
||||
<div>
|
||||
<p>lorem ipsum</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that an inline element which the parent is not a root
|
||||
becomes a root -->
|
||||
<div id="testcase4"
|
||||
expected="span[root]">
|
||||
<div>
|
||||
<span>lorem ipsum</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test siblings -->
|
||||
<div id="testcase5"
|
||||
expected="li[root] li[root]">
|
||||
<ul>
|
||||
<li>lorem</li>
|
||||
<li>ipsum</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Test <ul> with content outside li -->
|
||||
<div id="testcase6"
|
||||
expected="ul[root] li[root] li[root]">
|
||||
<ul>Lorem
|
||||
<li>lorem</li>
|
||||
<li>ipsum</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Test inline siblings -->
|
||||
<div id="testcase7"
|
||||
expected="ul[root] li li">
|
||||
<ul>Lorem
|
||||
<li style="display: inline">lorem</li>
|
||||
<li style="display: inline">ipsum</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Test inline siblings becoming roots -->
|
||||
<div id="testcase8"
|
||||
expected="li[root] li[root]">
|
||||
<ul>
|
||||
<li style="display: inline">lorem</li>
|
||||
<li style="display: inline">ipsum</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Test that nodes with only punctuation, whitespace
|
||||
or numbers are ignored -->
|
||||
<div id="testcase9"
|
||||
expected="li[root] li[root]">
|
||||
<ul>
|
||||
<li>lorem</li>
|
||||
<li>ipsum</li>
|
||||
<li>-.,;'/!@#$%^*()</li>
|
||||
<li>0123456789</li>
|
||||
<li>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<!-- Test paragraphs -->
|
||||
<div id="testcase10"
|
||||
expected="p[root] a b p[root] a b">
|
||||
<p>Lorem ipsum <a href="a.htm">dolor</a> sit <b>amet</b>, consetetur</p>
|
||||
<p>Lorem ipsum <a href="a.htm">dolor</a> sit <b>amet</b>, consetetur</p>
|
||||
</div>
|
||||
|
||||
<!-- Test that a display:none element is not ignored -->
|
||||
<div id="testcase11"
|
||||
expected="p[root] a b">
|
||||
<p>Lorem ipsum <a href="a.htm">dolor</a> sit <b style="display:none">amet</b>, consetetur</p>
|
||||
</div>
|
||||
|
||||
<!-- Test that deep nesting does not cause useless content to be returned -->
|
||||
<div id="testcase12"
|
||||
expected="p[root]">
|
||||
<div>
|
||||
<div>
|
||||
<div>
|
||||
<p>Lorem ipsum</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that deep nesting does not cause useless content to be returned -->
|
||||
<div id="testcase13"
|
||||
expected="div[root] p[root]">
|
||||
<div>Lorem ipsum
|
||||
<div>
|
||||
<div>
|
||||
<p>Lorem ipsum</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that non-html elements and elements that usually have non-translatable
|
||||
content are ignored -->
|
||||
<div id="testcase14"
|
||||
expected="div[root]">
|
||||
<div>
|
||||
Lorem Ipsum
|
||||
<noscript>Lorem Ipsum</noscript>
|
||||
<style>.dummyClass { color: blue; }</style>
|
||||
<script> /* script tag */ </script>
|
||||
<code> code </code>
|
||||
<iframe id="testiframe"
|
||||
src="data:text/html,<div>Lorem ipsum</div>">
|
||||
</iframe>
|
||||
<svg>lorem</svg>
|
||||
<math>ipsum</math>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Test that nesting of inline elements won't produce roots as long as
|
||||
the parents are in the list of translation nodes -->
|
||||
<div id="testcase15"
|
||||
expected="p[root] a b span em">
|
||||
<p>Lorem <a>ipsum <b>dolor <span>sit</span> amet</b></a>, <em>consetetur</em></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test for nsIDOMWindowUtils.getTranslationNodes</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
</head>
|
||||
<body onload="runTest()">
|
||||
<script type="application/javascript">
|
||||
var utils = SpecialPowers.wrap(window).
|
||||
QueryInterface(SpecialPowers.Ci.nsIInterfaceRequestor).
|
||||
getInterface(SpecialPowers.Ci.nsIDOMWindowUtils);
|
||||
|
||||
function runTest() {
|
||||
isnot(utils, null, "nsIDOMWindowUtils");
|
||||
|
||||
for (var i = 0; i < 16000; i++) {
|
||||
var text = document.createTextNode("a");
|
||||
var node = document.createElement("b");
|
||||
node.appendChild(text);
|
||||
document.body.appendChild(node);
|
||||
}
|
||||
|
||||
var translationRoots = utils.getTranslationNodes(document.body);
|
||||
is (translationRoots.length, 15000, "Translation nodes were limited to 15000 nodes.");
|
||||
|
||||
SimpleTest.finish();
|
||||
}
|
||||
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
|
@ -42,8 +42,9 @@ interface nsIURI;
|
|||
interface nsIDOMEventTarget;
|
||||
interface nsIRunnable;
|
||||
interface nsICompositionStringSynthesizer;
|
||||
interface nsITranslationNodeList;
|
||||
|
||||
[scriptable, uuid(f3148b3e-6db8-4a49-aa5c-de726449054d)]
|
||||
[scriptable, uuid(3d977df2-1c0e-4b61-bc21-c6ee757a9191)]
|
||||
interface nsIDOMWindowUtils : nsISupports {
|
||||
|
||||
/**
|
||||
|
@ -805,6 +806,16 @@ interface nsIDOMWindowUtils : nsISupports {
|
|||
in boolean aIgnoreRootScrollFrame,
|
||||
in boolean aFlushLayout);
|
||||
|
||||
|
||||
/**
|
||||
* Get a list of nodes that have meaningful textual content to
|
||||
* be translated. The implementation of this algorithm is in flux
|
||||
* as we experiment and refine which approach works best.
|
||||
*
|
||||
* This method requires chrome privileges.
|
||||
*/
|
||||
nsITranslationNodeList getTranslationNodes(in nsIDOMNode aRoot);
|
||||
|
||||
/**
|
||||
* Compare the two canvases, returning the number of differing pixels and
|
||||
* the maximum difference in a channel. This will throw an error if
|
||||
|
@ -1628,3 +1639,13 @@ interface nsIDOMWindowUtils : nsISupports {
|
|||
*/
|
||||
attribute float audioVolume;
|
||||
};
|
||||
|
||||
[scriptable, uuid(c694e359-7227-4392-a138-33c0cc1f15a6)]
|
||||
interface nsITranslationNodeList : nsISupports {
|
||||
readonly attribute unsigned long length;
|
||||
nsIDOMNode item(in unsigned long index);
|
||||
|
||||
// A translation root is a block element, or an inline element
|
||||
// which its parent is not a translation node.
|
||||
boolean isTranslationRootAtIndex(in unsigned long index);
|
||||
};
|
||||
|
|
Загрузка…
Ссылка в новой задаче