Bug 1759504 - Prototype a UAWidget for TextRecognition r=emilio,nordzilla

Note this is an experiment in getting functionality going for text recognition. It may be possible to do this more efficiently from the C++ side of things, but this will allow for rapid experimentation while working with UX. Differential Revision: https://phabricator.services.mozilla.com/D143422
2022-06-03 16:01:33 +00:00 · 2022-06-03 16:01:33 +00:00 · b0b5b0333b
--- a/dom/base/nsImageLoadingContent.cpp
+++ b/dom/base/nsImageLoadingContent.cpp
@ -1240,9 +1240,10 @@ already_AddRefed<Promise> nsImageLoadingContent::RecognizeCurrentImageText(
          return;
        }
        Element* el = ilc->AsContent()->AsElement();
-        el->AttachAndSetUAShadowRoot(Element::NotifyUAWidgetSetup::No);
+        el->AttachAndSetUAShadowRoot(Element::NotifyUAWidgetSetup::Yes);
        TextRecognition::FillShadow(*el->GetShadowRoot(),
                                    aValue.ResolveValue());
+        el->NotifyUAWidgetSetupOrChange();
        // TODO: Maybe resolve with the recognition results?
        domPromise->MaybeResolveWithUndefined();
      });
--- a/toolkit/actors/UAWidgetsChild.jsm
+++ b/toolkit/actors/UAWidgetsChild.jsm
@ -105,6 +105,9 @@ class UAWidgetsChild extends JSWindowActorChild {
        uri = "chrome://global/content/elements/marquee.js";
        widgetName = "MarqueeWidget";
        break;
+      case "img":
+        uri = "chrome://global/content/elements/textrecognition.js";
+        widgetName = "TextRecognitionWidget";
    }

    if (!uri || !widgetName) {
--- a/toolkit/content/jar.mn
+++ b/toolkit/content/jar.mn
@ -103,6 +103,7 @@ toolkit.jar:
   content/global/elements/stringbundle.js     (widgets/stringbundle.js)
   content/global/elements/tabbox.js           (widgets/tabbox.js)
   content/global/elements/text.js             (widgets/text.js)
+   content/global/elements/textrecognition.js  (widgets/textrecognition.js)
   content/global/elements/toolbarbutton.js    (widgets/toolbarbutton.js)
   content/global/elements/videocontrols.js    (widgets/videocontrols.js)
   content/global/elements/tree.js             (widgets/tree.js)
--- a/toolkit/content/widgets/textrecognition.js
+++ b/toolkit/content/widgets/textrecognition.js
@ -0,0 +1,366 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+// This is a UA widget. It runs in per-origin UA widget scope,
+// to be loaded by UAWidgetsChild.jsm.
+
+this.TextRecognitionWidget = class {
+  /**
+   * @param {ShadowRoot} shadowRoot
+   * @param {Record<string, string | boolean | number>} _prefs
+   */
+  constructor(shadowRoot, _prefs) {
+    /** @type {ShadowRoot} */
+    this.shadowRoot = shadowRoot;
+    /** @type {HTMLElement} */
+    this.element = shadowRoot.host;
+    /** @type {Document} */
+    this.document = this.element.ownerDocument;
+    /** @type {Window} */
+    this.window = this.document.defaultView;
+    /** @type {ResizeObserver} */
+    this.resizeObserver = null;
+    /** @type {Map<HTMLSpanElement, DOMRect} */
+    this.spanRects = new Map();
+    /** @type {boolean} */
+    this.isInitialized = false;
+    /** @type {null | number} */
+    this.lastCanvasStyleWidth = null;
+  }
+
+  /*
+   * Callback called by UAWidgets right after constructor.
+   */
+  onsetup() {
+    this.resizeObserver = new this.window.ResizeObserver(() => {
+      this.positionSpans();
+    });
+    this.resizeObserver.observe(this.element);
+  }
+
+  positionSpans() {
+    if (!this.shadowRoot.firstChild) {
+      return;
+    }
+    this.lazilyInitialize();
+
+    /** @type {HTMLDivElement} */
+    const div = this.shadowRoot.firstChild;
+    const canvas = div.querySelector("canvas");
+    const spans = div.querySelectorAll("span");
+
+    // TODO Bug 1770438 - The <img> element does not currently let child elements be
+    // sized relative to the size of the containing <img> element. It would be better
+    // to teach the <img> element how to do this. For the prototype, do the more expensive
+    // operation of getting the bounding client rect, and handle the positioning manually.
+    const imgRect = this.element.getBoundingClientRect();
+    div.style.width = imgRect.width + "px";
+    div.style.height = imgRect.height + "px";
+    canvas.style.width = imgRect.width + "px";
+    canvas.style.height = imgRect.height + "px";
+
+    // The ctx is only available when redrawing the canvas. This is operation is only
+    // done when necessary, as it can be expensive.
+    /** @type {null | CanvasRenderingContext2D} */
+    let ctx = null;
+
+    if (
+      // The canvas hasn't been drawn to yet.
+      this.lastCanvasStyleWidth === null ||
+      // Only redraw when the image has grown 25% larger. This percentage was chosen
+      // as it visually seemed to work well, with the canvas never appearing blurry
+      // when manually testing it.
+      imgRect.width > this.lastCanvasStyleWidth * 1.25
+    ) {
+      const dpr = this.window.devicePixelRatio;
+      canvas.width = imgRect.width * dpr;
+      canvas.height = imgRect.height * dpr;
+      this.lastCanvasStyleWidth = imgRect.width;
+
+      ctx = canvas.getContext("2d");
+      ctx.scale(dpr, dpr);
+      ctx.fillStyle = "#00000088";
+      ctx.fillRect(0, 0, imgRect.width, imgRect.height);
+
+      ctx.beginPath();
+    }
+
+    for (const span of spans) {
+      let spanRect = this.spanRects.get(span);
+      if (!spanRect) {
+        // This only needs to happen once.
+        spanRect = span.getBoundingClientRect();
+        this.spanRects.set(span, spanRect);
+      }
+
+      const points = span.dataset.points.split(",").map(p => Number(p));
+      // Use the points in the string, e.g.
+      // "0.0275349,0.14537,0.0275349,0.244662,0.176966,0.244565,0.176966,0.145273"
+      //  0         1       2         3        4        5        6        7
+      //  ^ bottomleft      ^ topleft          ^ topright        ^ bottomright
+      let [
+        bottomLeftX,
+        bottomLeftY,
+        topLeftX,
+        topLeftY,
+        topRightX,
+        topRightY,
+        bottomRightX,
+        bottomRightY,
+      ] = points;
+
+      // Invert the Y.
+      topLeftY = 1 - topLeftY;
+      topRightY = 1 - topRightY;
+      bottomLeftY = 1 - bottomLeftY;
+      bottomRightY = 1 - bottomRightY;
+
+      // Create a projection matrix to position the <span> relative to the bounds.
+      // prettier-ignore
+      const mat4 = projectPoints(
+        spanRect.width,               spanRect.height,
+        imgRect.width * topLeftX,     imgRect.height * topLeftY,
+        imgRect.width * topRightX,    imgRect.height * topRightY,
+        imgRect.width * bottomLeftX,  imgRect.height * bottomLeftY,
+        imgRect.width * bottomRightX, imgRect.height * bottomRightY
+      );
+
+      span.style.transform = "matrix3d(" + mat4.join(", ") + ")";
+
+      if (ctx) {
+        const inset = 3;
+        ctx.moveTo(
+          imgRect.width * bottomLeftX + inset,
+          imgRect.height * bottomLeftY - inset
+        );
+        ctx.lineTo(
+          imgRect.width * topLeftX + inset,
+          imgRect.height * topLeftY + inset
+        );
+        ctx.lineTo(
+          imgRect.width * topRightX - inset,
+          imgRect.height * topRightY + inset
+        );
+        ctx.lineTo(
+          imgRect.width * bottomRightX - inset,
+          imgRect.height * bottomRightY - inset
+        );
+        ctx.closePath();
+      }
+    }
+
+    if (ctx) {
+      // This composite operation will cut out the quads. The color is arbitrary.
+      ctx.globalCompositeOperation = "destination-out";
+      ctx.fillStyle = "#ffffff";
+      ctx.fill();
+
+      // Creating a round line will grow the selection slightly, and round the corners.
+      ctx.lineWidth = 10;
+      ctx.lineJoin = "round";
+      ctx.strokeStyle = "#ffffff";
+      ctx.stroke();
+    }
+  }
+
+  destructor() {
+    this.shadowRoot.firstChild.remove();
+    this.resizeObserver.disconnect();
+    this.spanRects.clear();
+  }
+
+  lazilyInitialize() {
+    if (this.isInitialized) {
+      return;
+    }
+    this.isInitialized = true;
+
+    const parser = new this.window.DOMParser();
+    let parserDoc = parser.parseFromString(
+      `<div class="textrecognition" xmlns="http://www.w3.org/1999/xhtml" role="none">
+        <link rel="stylesheet" href="chrome://global/skin/media/textrecognition.css" />
+        <canvas />
+        <!-- The spans will be reattached here -->
+      </div>`,
+      "application/xml"
+    );
+    if (
+      this.shadowRoot.children.length !== 1 ||
+      this.shadowRoot.firstChild.tagName !== "DIV"
+    ) {
+      throw new Error(
+        "Expected the shadowRoot to have a single div as the root element."
+      );
+    }
+
+    const spansDiv = this.shadowRoot.firstChild;
+    // Example layout of spansDiv:
+    // <div>
+    //   <span data-points="0.0275349,0.14537,0.0275349,0.244662,0.176966,0.244565,0.176966,0.145273">
+    //     Text that has been recognized
+    //   </span>
+    //   ...
+    // </div>
+    spansDiv.remove();
+
+    this.shadowRoot.importNodeAndAppendChildAt(
+      this.shadowRoot,
+      parserDoc.documentElement,
+      true /* deep */
+    );
+
+    this.shadowRoot.importNodeAndAppendChildAt(
+      this.shadowRoot.firstChild,
+      spansDiv,
+      true /* deep */
+    );
+  }
+};
+
+/**
+ * A three dimensional vector.
+ *
+ * @typedef {[number, number, number]} Vec3
+ */
+
+/**
+ * A 3x3 matrix.
+ *
+ * @typedef {[number, number, number,
+ *            number, number, number,
+ *            number, number, number]} Matrix3
+ */
+
+/**
+ * A 4x4 matrix.
+ *
+ * @typedef {[number, number, number, number,
+ *            number, number, number, number,
+ *            number, number, number, number,
+ *            number, number, number, number]} Matrix4
+ */
+
+/**
+ * Compute the adjugate matrix.
+ * https://en.wikipedia.org/wiki/Adjugate_matrix
+ *
+ * @param {Matrix3} m
+ * @returns {Matrix3}
+ */
+function computeAdjugate(m) {
+  // prettier-ignore
+  return [
+    m[4] * m[8] - m[5] * m[7],
+    m[2] * m[7] - m[1] * m[8],
+    m[1] * m[5] - m[2] * m[4],
+    m[5] * m[6] - m[3] * m[8],
+    m[0] * m[8] - m[2] * m[6],
+    m[2] * m[3] - m[0] * m[5],
+    m[3] * m[7] - m[4] * m[6],
+    m[1] * m[6] - m[0] * m[7],
+    m[0] * m[4] - m[1] * m[3],
+  ];
+}
+
+/**
+ * @param {Matrix3} a
+ * @param {Matrix3} b
+ * @returns {Matrix3}
+ */
+function multiplyMat3(a, b) {
+  let out = [];
+  for (let i = 0; i < 3; i++) {
+    for (let j = 0; j < 3; j++) {
+      let sum = 0;
+      for (let k = 0; k < 3; k++) {
+        sum += a[3 * i + k] * b[3 * k + j];
+      }
+      out[3 * i + j] = sum;
+    }
+  }
+  return out;
+}
+
+/**
+ * @param {Matrix3} m
+ * @param {Vec3} v
+ * @returns {Vec3}
+ */
+function multiplyMat3Vec3(m, v) {
+  // prettier-ignore
+  return [
+    m[0] * v[0] + m[1] * v[1] + m[2] * v[2],
+    m[3] * v[0] + m[4] * v[1] + m[5] * v[2],
+    m[6] * v[0] + m[7] * v[1] + m[8] * v[2],
+  ];
+}
+
+/**
+ * @returns {Matrix3}
+ */
+function basisToPoints(x1, y1, x2, y2, x3, y3, x4, y4) {
+  /** @type {Matrix3} */
+  let mat3 = [x1, x2, x3, y1, y2, y3, 1, 1, 1];
+  let vec3 = multiplyMat3Vec3(computeAdjugate(mat3), [x4, y4, 1]);
+  // prettier-ignore
+  return multiplyMat3(
+    mat3,
+    [
+      vec3[0], 0,       0,
+      0,       vec3[1], 0,
+      0,       0,       vec3[2]
+    ]
+  );
+}
+
+/**
+ * @type {(...Matrix4) => Matrix3}
+ */
+// prettier-ignore
+function general2DProjection(
+  x1s, y1s, x1d, y1d,
+  x2s, y2s, x2d, y2d,
+  x3s, y3s, x3d, y3d,
+  x4s, y4s, x4d, y4d
+) {
+  let s = basisToPoints(x1s, y1s, x2s, y2s, x3s, y3s, x4s, y4s);
+  let d = basisToPoints(x1d, y1d, x2d, y2d, x3d, y3d, x4d, y4d);
+  return multiplyMat3(d, computeAdjugate(s));
+}
+
+/**
+ * Given a width and height, compute a projection matrix to points 1-4.
+ *
+ * The points (x1,y1) through (x4, y4) use the following ordering:
+ *
+ *         w
+ *      ┌─────┐      project     1 ─────── 2
+ *    h │     │       -->        │        /
+ *      └─────┘                  │       /
+ *                               3 ──── 4
+ *
+ * @returns {Matrix4}
+ */
+function projectPoints(w, h, x1, y1, x2, y2, x3, y3, x4, y4) {
+  // prettier-ignore
+  const mat3 = general2DProjection(
+    0, 0, x1, y1,
+    w, 0, x2, y2,
+    0, h, x3, y3,
+    w, h, x4, y4
+  );
+
+  for (let i = 0; i < 9; i++) {
+    mat3[i] = mat3[i] / mat3[8];
+  }
+
+  // prettier-ignore
+  return [
+    mat3[0], mat3[3], 0, mat3[6],
+    mat3[1], mat3[4], 0, mat3[7],
+    0,       0,       1, 0,
+    mat3[2], mat3[5], 0, mat3[8],
+  ];
+}
--- a/toolkit/themes/shared/media/textrecognition.css
+++ b/toolkit/themes/shared/media/textrecognition.css
@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+@namespace url("http://www.w3.org/1999/xhtml");
+
+.textrecognition {
+  position: absolute;
+  overflow: clip;
+
+  /* Prevent unwanted style inheritance. */
+  font: normal normal normal 100%/normal sans-serif;
+  text-align: left;
+  white-space: normal;
+}
+
+canvas {
+  position: absolute;
+  user-select: none;
+  inset: 0;
+}
+
+span {
+  position: absolute;
+  transform-origin: 0 0;
+  color: transparent;
+}
--- a/toolkit/themes/shared/minimal-toolkit.jar.inc.mn
+++ b/toolkit/themes/shared/minimal-toolkit.jar.inc.mn
@ -36,3 +36,7 @@ toolkit.jar:
  skin/classic/global/media/closedCaptionButton-cc-on.svg  (../../shared/media/closedCaptionButton-cc-on.svg)
  skin/classic/global/media/fullscreenEnterButton.svg      (../../shared/media/fullscreenEnterButton.svg)
  skin/classic/global/media/fullscreenExitButton.svg       (../../shared/media/fullscreenExitButton.svg)
+
+# Text recognition widget
+
+  skin/classic/global/media/textrecognition.css            (../../shared/media/textrecognition.css)