Fix #158: Add 'attempt_extraction' and 'complete_extraction' probes

* Update 'method' extra key for 'complete_extraction' event to have values of 'fathom', 'css_selectors', 'open_graph' or 'none' to distinguish between the two fallback extraction methods to Fathom: CSS Selectors or Open Graph attributes.
  * Note: Since none of our five supported sites use Open Graph attributes currently for product information, we should not expect to see any successful extraction using that method for the MVP.
* Important caveats for making conclusions using these probes as-is:
  * The coverage values that these probes will suggest will not be very accurate initially until we ensure we are extracting on only **product pages** for a supported site, rather than any page on the site (see Issues #225 and #181).
  * Successful extraction does not mean that the information extracted for the product is correct. It only means that _a_ value was extracted for each product feature (i.e. title, image and price) on the page.
This commit is contained in:
Bianca Danforth 2018-11-01 11:02:17 -07:00
Родитель 6bf71516aa
Коммит 0bca7d982a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 2C96DD7DB2A2D72D
2 изменённых файлов: 30 добавлений и 14 удалений

Просмотреть файл

@ -140,7 +140,7 @@ Below is a sample ping for the `badge_toolbar_button` and `visit_supported_site`
- `'badge_type'`: Indicates what, if any, badge was present on the browserAction toolbar button. One of 'add', 'price_alert', or 'none'. A value of 'unknown' is possible if the badge text is unrecognized.
- `'extraction_id'`: A unique identifier to associate an extraction attempt to an extraction completion event for a given page.
- `'is_bg_update'`: 'true' if the extraction is associated with a background price check; otherwise 'false'.
- `method`: The extraction method that was successful, if any. One of: 'fathom', 'fallback' or 'neither'. A value of 'neither' means that extraction failed.
- `method`: The extraction method that was successful, if any. One of: 'fathom', 'css_selectors', 'open_graph' or 'none'. A value of 'none' means that all extraction methods failed.
- `'price'`: The price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only).
- `'price_alert'`: 'true' if the product has an active Price Alert; otherwise 'false'.
- `'price_last_high'`: The last high price of the product in subunits (e.g. a $10.00 product would have a value of `'1000'`). For the MVP, the units here are always cents (USD currency only).

Просмотреть файл

@ -8,9 +8,11 @@
* been parsed but before all resources have been loaded.
*/
import uuidv4 from 'uuid/v4';
import config from 'commerce/config/content';
import extractProductWithFathom from 'commerce/extraction/fathom';
import extractProductWithFallback from 'commerce/extraction/selector';
import extractProductWithCSSSelectors from 'commerce/extraction/selector';
import extractProductWithOpenGraph from 'commerce/extraction/open_graph';
import {shouldExtract} from 'commerce/privacy';
import recordEvent from 'commerce/telemetry/content';
@ -20,25 +22,39 @@ import recordEvent from 'commerce/telemetry/content';
* return either a valid ExtractedProduct, or null if a valid product could not
* be found.
*/
const EXTRACTION_METHODS = [
extractProductWithFathom,
extractProductWithFallback,
extractProductWithOpenGraph,
];
const EXTRACTION_METHODS = {
fathom: extractProductWithFathom,
css_selectors: extractProductWithCSSSelectors,
open_graph: extractProductWithOpenGraph,
};
/**
* Perform product extraction, trying each method from EXTRACTION_METHODS in
* order until one of them returns a truthy result.
* @return {ExtractedProduct|null}
*/
function extractProduct() {
for (const extract of EXTRACTION_METHODS) {
function extractProduct(isBackgroundUpdate) {
const baseExtra = {
extraction_id: uuidv4(),
is_bg_update: isBackgroundUpdate,
};
recordEvent('attempt_extraction', 'product_page', null, {
...baseExtra,
});
for (const [method, extract] of Object.entries(EXTRACTION_METHODS)) {
const extractedProduct = extract(window.document);
if (extractedProduct) {
recordEvent('complete_extraction', 'product_page', null, {
...baseExtra,
method,
});
return extractedProduct;
}
}
recordEvent('complete_extraction', 'product_page', null, {
...baseExtra,
method: 'none',
});
return null;
}
@ -58,8 +74,8 @@ async function sendProductToBackground(extractedProduct, sendTelemetry) {
* Checks to see if any product information for the page was found,
* and if so, sends it to the background script.
*/
async function attemptExtraction() {
const extractedProduct = extractProduct();
async function attemptExtraction(isBackgroundUpdate) {
const extractedProduct = extractProduct(isBackgroundUpdate);
if (extractedProduct) {
await sendProductToBackground(extractedProduct);
}
@ -105,9 +121,9 @@ async function attemptExtraction() {
}
// Extract immediately, and again if the readyState changes.
let extractedProduct = await attemptExtraction();
let extractedProduct = await attemptExtraction(isBackgroundUpdate);
document.addEventListener('readystatechange', async () => {
extractedProduct = await attemptExtraction();
extractedProduct = await attemptExtraction(isBackgroundUpdate);
});
// Messy workaround for bug 1493470: Resend product info to the background