Added cron job to fetch chromium histograms

This commit is contained in:
Francois Beaufort 2015-02-10 18:31:50 +01:00
Родитель ea2c4bcbae
Коммит 03dd210818
8 изменённых файлов: 118 добавлений и 1157 удалений

Просмотреть файл

@ -37,18 +37,18 @@
*
* Handler File
*
* visiting /metrics/admin calls YesterdayHandler which retrieves yesterday's data from the UMA Cloud Storage.
* visiting /cron/metrics calls YesterdayHandler which retrieves yesterday's data from the UMA Cloud Storage.
* This is how the cron job is updating - Daily grabbing the previous day's data
* The data is parsed and stored as:
* class StableInstance(webapp2.RequestHandler):
* propertyName = db.StringProperty();
* bucketID = db.IntegerProperty();
* date = db.DateTimeProperty();
* hits = db.IntegerProperty();
* totalFlushes = db.IntegerProperty();
* dayPercentage = db.FloatProperty();
* rollingPercentage = db.FloatProperty();
* class StableInstance(DictModel):
* property_name = db.StringProperty();
* bucket_id = db.IntegerProperty();
* date = db.DateProperty();
* day_percentage = db.FloatProperty();
* rolling_percentage = db.FloatProperty();
*
* visiting /cron/histograms calls HistogramsHandler which retrieves FeatureObserver and
* FeatureObserver histograms from chromium.googlesource.com.
*
* ACTION REQUIRED: we will need to replace histogramID with the appropriate ID.
* This can be obtained from uma.googleplex.com/data/histograms/ids-chrome-histograms.txt
@ -58,20 +58,13 @@
*
** uma.py
*
* property_name provides a mapping for bucketID to human readable property name
*
**
** featurelevel.js
*
* Creates charts for the feature level page.
*
* drawVisualization()
* This function takes in the name of the property for which the graph is being drawn.
* (This should probably be changed to the proeprtyID/bucketID in the future.)
* (This should probably be changed to the propertyID/bucketID in the future.)
* We iterate through parsed data, building up a data object which we can pass to chart.draw()
* The desired form of data to pass to chart.draw() is:
* [[Date, Name, Percentage]

Просмотреть файл

@ -26,6 +26,7 @@ import os
import re
import sys
import webapp2
import xml.dom.minidom
# Appengine imports.
from google.appengine.api import files
@ -40,17 +41,21 @@ from google.appengine.ext.webapp import blobstore_handlers
import common
import models
import settings
import uma
# uma.googleplex.com/data/histograms/ids-chrome-histograms.txt
BIGSTORE_BUCKET = '/gs/uma-dashboards/'
BIGSTORE_RESTFUL_URI = 'https://uma-dashboards.storage.googleapis.com/'
HISTOGRAMS_URL = 'https://chromium.googlesource.com/chromium/src/+/master/' \
'tools/metrics/histograms/histograms.xml?format=TEXT'
CSSPROPERITES_BS_HISTOGRAM_ID = str(0xbfd59b316a6c31f1)
ANIMATIONPROPS_BS_HISTOGRAM_ID = str(0xbee14b73f4fdde73)
FEATURE_OBSERVER_BS_HISTOGRAM_ID = str(0x2e44945129413683)
PAGE_VISITS_BUCKET_ID = 52
# For fetching files from the production BigStore during development.
OAUTH2_CREDENTIALS_FILENAME = os.path.join(
settings.ROOT_DIR, 'scripts', 'oauth2.data')
@ -99,29 +104,28 @@ class YesterdayHandler(blobstore_handlers.BlobstoreDownloadHandler):
# For CSSPROPERITES_BS_HISTOGRAM_ID, bucket 1 is total pages visited for
# stank rank histogram. We're guaranteed to have it.
# For the FEATURE_OBSERVER_BS_HISTOGRAM_ID, the PageVisits bucket_id is 52
# See uma.py. The actual % is calculated from the count / this number.
# For the FEATURE_OBSERVER_BS_HISTOGRAM_ID, the PageVisits bucket_id is 52.
# The actual % is calculated from the count / this number.
# For ANIMATIONPROPS_BS_HISTOGRAM_ID, we have to calculate the total count.
if 1 in properties_dict and histogram_id == CSSPROPERITES_BS_HISTOGRAM_ID:
total_pages = properties_dict.get(1)
elif (uma.PAGE_VISITS_BUCKET_ID in properties_dict and
elif (PAGE_VISITS_BUCKET_ID in properties_dict and
histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID):
total_pages = properties_dict.get(uma.PAGE_VISITS_BUCKET_ID)
total_pages = properties_dict.get(PAGE_VISITS_BUCKET_ID)
# Don't include PageVisits results.
del properties_dict[uma.PAGE_VISITS_BUCKET_ID]
del properties_dict[PAGE_VISITS_BUCKET_ID]
else:
total_pages = sum(properties_dict.values())
property_map = models.CssPropertyHistogram.get_all()
if histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID:
property_map = models.FeatureObserverHistogram.get_all()
for bucket_id, num_hits in properties_dict.items():
# If the id is not in the map, use 'ERROR' for the name.
# TODO(ericbidelman): Non-matched bucket ids are likely new properties
# that have been added and need to be updated in uma.py. Find way to
# autofix these values with the appropriate property_name later.
property_map = uma.CSS_PROPERTY_BUCKETS
if histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID:
property_map = uma.FEATUREOBSERVER_BUCKETS
# that have been added and will be updated in cron/histograms.
property_name = property_map.get(bucket_id, 'ERROR')
query = model_class.all()
@ -216,6 +220,67 @@ class YesterdayHandler(blobstore_handlers.BlobstoreDownloadHandler):
return (result.content, result.status_code)
class HistogramsHandler(webapp2.RequestHandler):
MODEL_CLASS = {
'FeatureObserver': models.FeatureObserverHistogram,
'MappedCSSProperties': models.CssPropertyHistogram,
}
def _SaveData(self, data, histogram_id):
try:
model_class = self.MODEL_CLASS[histogram_id]
except Exception:
logging.error('Invalid Histogram id used: %s' % histogram_id)
return
bucket_id = int(data['bucket_id'])
property_name = data['property_name']
key_name = '%s_%s' % (bucket_id, property_name)
# Bucket ID 1 is reserved for number of CSS Pages Visited. So don't add it.
if (model_class == models.CssPropertyHistogram and bucket_id == 1):
return
model_class.get_or_insert(key_name,
bucket_id=bucket_id,
property_name=property_name
)
def get(self):
# Attempt to fetch the histograms.xml file from chromium.googlesource.com.
result = urlfetch.fetch(HISTOGRAMS_URL)
if (result.status_code != 200):
logging.error('Unable to retrieve chromium histograms.')
return
browsed_histograms = []
histograms_content = result.content.decode('base64')
dom = xml.dom.minidom.parseString(histograms_content)
# The histograms.xml file looks like this:
#
# ...
# <enum name="FeatureObserver" type="int">
# <int value="0" label="PageDestruction"/>
# <int value="1" label="LegacyNotifications"/>
for enum in dom.getElementsByTagName('enum'):
histogram_id = enum.attributes['name'].value
if (histogram_id in self.MODEL_CLASS.keys()):
browsed_histograms.append(histogram_id)
for child in enum.getElementsByTagName('int'):
data = {
'bucket_id': child.attributes['value'].value,
'property_name': child.attributes['label'].value
}
self._SaveData(data, histogram_id)
# Log an error if some histograms were not found.
if (len(list(set(browsed_histograms))) != len(self.MODEL_CLASS.keys())):
logging.error('Less histograms than expected were retrieved.')
class FeatureHandler(common.ContentHandler):
DEFAULT_URL = '/features'
@ -269,8 +334,6 @@ class FeatureHandler(common.ContentHandler):
elif feature_id and 'new' in path:
return self.redirect(self.ADD_NEW_URL)
feature = None
template_data = {
'feature_form': models.FeatureForm()
}
@ -425,6 +488,7 @@ class FeatureHandler(common.ContentHandler):
app = webapp2.WSGIApplication([
('/cron/metrics', YesterdayHandler),
('/cron/histograms', HistogramsHandler),
('/(.*)/([0-9]*)', FeatureHandler),
('/(.*)', FeatureHandler),
], debug=settings.DEBUG)

Просмотреть файл

@ -46,7 +46,7 @@ handlers:
script: google.appengine.ext.admin.application
login: admin
- url: /cron/metrics
- url: /cron/.*
script: admin.app
login: admin # Prevents raw access to this handler. Cron runs as admin.

Просмотреть файл

@ -1,4 +1,7 @@
cron:
- description: retrieve from chromium.googlesource.com chromium histograms
url: /cron/histograms
schedule: every day 04:00
- description: retrieve from UMA Cloud Storage data gathered yesterday
url: /cron/metrics
schedule: every day 05:00

Просмотреть файл

@ -553,3 +553,24 @@ class AppUser(DictModel):
d = self.to_dict()
d['id'] = self.key().id()
return d
class HistogramModel(db.Model):
"""Container for a histogram."""
bucket_id = db.IntegerProperty(required=True)
property_name = db.StringProperty(required=True)
@classmethod
def get_all(self):
output = {}
buckets = self.all().fetch(None)
for bucket in buckets:
output[bucket.bucket_id] = bucket.property_name
return output
class CssPropertyHistogram(HistogramModel):
pass
class FeatureObserverHistogram(HistogramModel):
pass

Просмотреть файл

@ -4,12 +4,12 @@
# Copyright 2014 Google Inc. All Rights Reserved.
import models
import uma
def CorrectPropertyName(bucket_id):
if bucket_id in uma.CSS_PROPERTY_BUCKETS:
return uma.CSS_PROPERTY_BUCKETS[bucket_id]
allCssPropertyHistograms = models.CssPropertyHistogram.get_all()
if bucket_id in allCssPropertyHistograms:
return allCssPropertyHistograms[bucket_id]
return None
def FetchAllPropertiesWithError(bucket_id=None):

Просмотреть файл

@ -27,7 +27,6 @@ from google.appengine.api import users
import common
import models
import settings
import uma
def normalized_name(val):
@ -174,11 +173,11 @@ class MainHandler(common.ContentHandler, common.JSONHandler):
template_data['feature'] = feature
elif path.startswith('metrics/css/timeline'):
properties = sorted(uma.CSS_PROPERTY_BUCKETS.items(), key=lambda x:x[1])
properties = sorted(models.CssPropertyHistogram.get_all().iteritems(), key=lambda x:x[1])
template_data['CSS_PROPERTY_BUCKETS'] = json.dumps(
properties, separators=(',',':'))
elif path.startswith('metrics/feature/timeline'):
properties = sorted(uma.FEATUREOBSERVER_BUCKETS.items(), key=lambda x:x[1])
properties = sorted(models.FeatureObserverHistogram.get_all().iteritems(), key=lambda x:x[1])
template_data['FEATUREOBSERVER_BUCKETS'] = json.dumps(
properties, separators=(',',':'))

1119
uma.py

Разница между файлами не показана из-за своего большого размера Загрузить разницу