Fix #158: Add 'attempt_extraction' and 'complete_extraction' probes
* Update 'method' extra key for 'complete_extraction' event to have values of 'fathom', 'css_selectors', 'open_graph' or 'none' to distinguish between the two fallback extraction methods to Fathom: CSS Selectors or Open Graph attributes. * Note: Since none of our five supported sites use Open Graph attributes currently for product information, we should not expect to see any successful extraction using that method for the MVP. * Important caveats for making conclusions using these probes as-is: * The coverage values that these probes will suggest will not be very accurate initially until we ensure we are extracting on only **product pages** for a supported site, rather than any page on the site (see Issues #225 and #181). * Successful extraction does not mean that the information extracted for the product is correct. It only means that _a_ value was extracted for each product feature (i.e. title, image and price) on the page.
This commit is contained in:
Родитель
6bf71516aa
Коммит
0bca7d982a
|
@ -140,7 +140,7 @@ Below is a sample ping for the `badge_toolbar_button` and `visit_supported_site`
|
|||
- `'badge_type'`: Indicates what, if any, badge was present on the browserAction toolbar button. One of 'add', 'price_alert', or 'none'. A value of 'unknown' is possible if the badge text is unrecognized.
|
||||
- `'extraction_id'`: A unique identifier to associate an extraction attempt to an extraction completion event for a given page.
|
||||
- `'is_bg_update'`: 'true' if the extraction is associated with a background price check; otherwise 'false'.
|
||||
- `method`: The extraction method that was successful, if any. One of: 'fathom', 'fallback' or 'neither'. A value of 'neither' means that extraction failed.
|
||||
- `method`: The extraction method that was successful, if any. One of: 'fathom', 'css_selectors', 'open_graph' or 'none'. A value of 'none' means that all extraction methods failed.
|
||||
- `'price'`: The price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only).
|
||||
- `'price_alert'`: 'true' if the product has an active Price Alert; otherwise 'false'.
|
||||
- `'price_last_high'`: The last high price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only).
|
||||
|
|
|
@ -8,9 +8,11 @@
|
|||
* been parsed but before all resources have been loaded.
|
||||
*/
|
||||
|
||||
import uuidv4 from 'uuid/v4';
|
||||
|
||||
import config from 'commerce/config/content';
|
||||
import extractProductWithFathom from 'commerce/extraction/fathom';
|
||||
import extractProductWithFallback from 'commerce/extraction/selector';
|
||||
import extractProductWithCSSSelectors from 'commerce/extraction/selector';
|
||||
import extractProductWithOpenGraph from 'commerce/extraction/open_graph';
|
||||
import {shouldExtract} from 'commerce/privacy';
|
||||
import recordEvent from 'commerce/telemetry/content';
|
||||
|
@ -20,25 +22,39 @@ import recordEvent from 'commerce/telemetry/content';
|
|||
* return either a valid ExtractedProduct, or null if a valid product could not
|
||||
* be found.
|
||||
*/
|
||||
const EXTRACTION_METHODS = [
|
||||
extractProductWithFathom,
|
||||
extractProductWithFallback,
|
||||
extractProductWithOpenGraph,
|
||||
];
|
||||
const EXTRACTION_METHODS = {
|
||||
fathom: extractProductWithFathom,
|
||||
css_selectors: extractProductWithCSSSelectors,
|
||||
open_graph: extractProductWithOpenGraph,
|
||||
};
|
||||
|
||||
/**
|
||||
* Perform product extraction, trying each method from EXTRACTION_METHODS in
|
||||
* order until one of them returns a truthy result.
|
||||
* @return {ExtractedProduct|null}
|
||||
*/
|
||||
function extractProduct() {
|
||||
for (const extract of EXTRACTION_METHODS) {
|
||||
function extractProduct(isBackgroundUpdate) {
|
||||
const baseExtra = {
|
||||
extraction_id: uuidv4(),
|
||||
is_bg_update: isBackgroundUpdate,
|
||||
};
|
||||
recordEvent('attempt_extraction', 'product_page', null, {
|
||||
...baseExtra,
|
||||
});
|
||||
for (const [method, extract] of Object.entries(EXTRACTION_METHODS)) {
|
||||
const extractedProduct = extract(window.document);
|
||||
if (extractedProduct) {
|
||||
recordEvent('complete_extraction', 'product_page', null, {
|
||||
...baseExtra,
|
||||
method,
|
||||
});
|
||||
return extractedProduct;
|
||||
}
|
||||
}
|
||||
|
||||
recordEvent('complete_extraction', 'product_page', null, {
|
||||
...baseExtra,
|
||||
method: 'none',
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -58,8 +74,8 @@ async function sendProductToBackground(extractedProduct, sendTelemetry) {
|
|||
* Checks to see if any product information for the page was found,
|
||||
* and if so, sends it to the background script.
|
||||
*/
|
||||
async function attemptExtraction() {
|
||||
const extractedProduct = extractProduct();
|
||||
async function attemptExtraction(isBackgroundUpdate) {
|
||||
const extractedProduct = extractProduct(isBackgroundUpdate);
|
||||
if (extractedProduct) {
|
||||
await sendProductToBackground(extractedProduct);
|
||||
}
|
||||
|
@ -105,9 +121,9 @@ async function attemptExtraction() {
|
|||
}
|
||||
|
||||
// Extract immediately, and again if the readyState changes.
|
||||
let extractedProduct = await attemptExtraction();
|
||||
let extractedProduct = await attemptExtraction(isBackgroundUpdate);
|
||||
document.addEventListener('readystatechange', async () => {
|
||||
extractedProduct = await attemptExtraction();
|
||||
extractedProduct = await attemptExtraction(isBackgroundUpdate);
|
||||
});
|
||||
|
||||
// Messy workaround for bug 1493470: Resend product info to the background
|
||||
|
|
Загрузка…
Ссылка в новой задаче