From de0ef149d60682bbeb7130ace5bdefeceb760d64 Mon Sep 17 00:00:00 2001 From: Felipe Gomes Date: Mon, 19 May 2014 17:31:23 -0300 Subject: [PATCH] Bug 976556 - Parse translation results back into the TranslationDocument and TranslationItem data structures. r=florian --- .../components/translation/BingTranslator.jsm | 43 ++++++- .../translation/TranslationDocument.jsm | 110 ++++++++++++++++++ 2 files changed, 151 insertions(+), 2 deletions(-) diff --git a/browser/components/translation/BingTranslator.jsm b/browser/components/translation/BingTranslator.jsm index 139e1293bf19..774e88140e86 100644 --- a/browser/components/translation/BingTranslator.jsm +++ b/browser/components/translation/BingTranslator.jsm @@ -122,8 +122,47 @@ this.BingTranslation.prototype = { } }, - _parseChunkResult() { - // note: this function is implemented in the patch from bug 976556 + /** + * This function parses the result returned by Bing's Http.svc API, + * which is a XML file that contains a number of elements. To our + * particular interest, the only part of the response that matters + * are the nodes, which contains the resulting + * items that were sent to be translated. + * + * @param request The request sent to the server. + * @returns boolean True if parsing of this chunk was successful. + */ + _parseChunkResult: function(bingRequest) { + let domParser = Cc["@mozilla.org/xmlextras/domparser;1"] + .createInstance(Ci.nsIDOMParser); + + let results; + try { + let doc = domParser.parseFromString(bingRequest.networkRequest + .response.body, "text/xml"); + results = doc.querySelectorAll("TranslatedText"); + } catch (e) { + return false; + } + + let len = results.length; + if (len != bingRequest.translationData.length) { + // This should never happen, but if the service returns a different number + // of items (from the number of items submitted), we can't use this chunk + // because all items would be paired incorrectly. + return false; + } + + let error = false; + for (let i = 0; i < len; i++) { + try { + bingRequest.translationData[i][0].parseResult( + results[i].firstChild.nodeValue + ); + } catch (e) { error = true; } + } + + return !error; }, /** diff --git a/browser/components/translation/TranslationDocument.jsm b/browser/components/translation/TranslationDocument.jsm index 82a80fad0dc9..43a6b7dd68bb 100644 --- a/browser/components/translation/TranslationDocument.jsm +++ b/browser/components/translation/TranslationDocument.jsm @@ -167,6 +167,38 @@ this.TranslationDocument.prototype = { * This class represents an item for translation. It's basically our * wrapper class around a node returned by getTranslationNode, with * more data and structural information on it. + * + * At the end of the translation process, besides the properties below, + * a TranslationItem will contain two other properties: one called "original" + * and one called "translation". They are twin objects, one which reflect + * the structure of that node in its original state, and the other in its + * translated state. + * + * The "original" array is generated in the generateTextForItem function, + * and the "translation" array is generated when the translation results + * are parsed. + * + * They are both arrays, which contain a mix of strings and references to + * child TranslationItems. The references in both arrays point to the * same * + * TranslationItem object, but they might appear in different orders between the + * "original" and "translation" arrays. + * + * An example: + * + * English:
Welcome to Mozilla's website
+ * Portuguese:
Bem vindo a pagina da Mozilla
+ * + * TranslationItem n1 = { + * id: 1, + * original: ["Welcome to", ptr to n2, "website"] + * translation: ["Bem vindo a pagina", ptr to n2] + * } + * + * TranslationItem n2 = { + * id: 2, + * original: ["Mozilla's"], + * translation: ["da Mozilla"] + * } */ function TranslationItem(node, id, isRoot) { this.nodeRef = node; @@ -185,5 +217,83 @@ TranslationItem.prototype = { : ''; return "[object TranslationItem: <" + this.nodeRef.localName + ">" + rootType + "]"; + }, + + /** + * This function will parse the result of the translation of one translation + * item. If this item was a simple root, all we sent was a plain-text version + * of it, so the result is also straightforward text. + * + * For non-simple roots, we sent a simplified HTML representation of that + * node, and we'll first parse that into an HTML doc and then call the + * parseResultNode helper function to parse it. + * + * While parsing, the result is stored in the "translation" field of the + * TranslationItem, which will be used to display the final translation when + * all items are finished. It remains stored too to allow back-and-forth + * switching between the "Show Original" and "Show Translation" functions. + * + * @param result A string with the textual result received from the server, + * which can be plain-text or a serialized HTML doc. + */ + parseResult: function(result) { + if (this.isSimpleRoot) { + this.translation = [result]; + return; + } + + let domParser = Cc["@mozilla.org/xmlextras/domparser;1"] + .createInstance(Ci.nsIDOMParser); + + let doc = domParser.parseFromString(result, "text/html"); + parseResultNode(this, doc.body.firstChild); + }, + + /** + * This function finds a child TranslationItem + * with the given id. + * @param id The id to look for, in the format "n#" + * @returns A TranslationItem with the given id, or null if + * it was not found. + */ + getChildById: function(id) { + let foundChild = null; + for (let child of item.children) { + if (("n" + child.id) == id) { + foundChild = child; + break; + } + } + return foundChild; + } +}; + +/** + * Helper function to parse a HTML doc result. + * How it works: + * + * An example result string is: + * + *
Hello World of Mozilla.
+ * + * For an element node, we look at its id and find the corresponding + * TranslationItem that was associated with this node, and then we + * walk down it repeating the process. + * + * For text nodes we simply add it as a string. + */ +function parseResultNode(item, node) { + item.translation = []; + for (let child of node.childNodes) { + if (child.nodeType == TEXT_NODE) { + item.translation.push(child.nodeValue); + } else { + let translationItemChild = item.getChildById(child.id); + + if (translationItemChild) { + item.translation.push(translationItemChild); + parseResultNode(translationItemChild, child); + } + } } }