Added cron job to fetch chromium histograms
This commit is contained in:
Родитель
ea2c4bcbae
Коммит
03dd210818
27
README.txt
27
README.txt
|
@ -37,18 +37,18 @@
|
|||
*
|
||||
* Handler File
|
||||
*
|
||||
* visiting /metrics/admin calls YesterdayHandler which retrieves yesterday's data from the UMA Cloud Storage.
|
||||
* visiting /cron/metrics calls YesterdayHandler which retrieves yesterday's data from the UMA Cloud Storage.
|
||||
* This is how the cron job is updating - Daily grabbing the previous day's data
|
||||
* The data is parsed and stored as:
|
||||
* class StableInstance(webapp2.RequestHandler):
|
||||
* propertyName = db.StringProperty();
|
||||
* bucketID = db.IntegerProperty();
|
||||
* date = db.DateTimeProperty();
|
||||
* hits = db.IntegerProperty();
|
||||
* totalFlushes = db.IntegerProperty();
|
||||
* dayPercentage = db.FloatProperty();
|
||||
* rollingPercentage = db.FloatProperty();
|
||||
* class StableInstance(DictModel):
|
||||
* property_name = db.StringProperty();
|
||||
* bucket_id = db.IntegerProperty();
|
||||
* date = db.DateProperty();
|
||||
* day_percentage = db.FloatProperty();
|
||||
* rolling_percentage = db.FloatProperty();
|
||||
*
|
||||
* visiting /cron/histograms calls HistogramsHandler which retrieves FeatureObserver and
|
||||
* FeatureObserver histograms from chromium.googlesource.com.
|
||||
*
|
||||
* ACTION REQUIRED: we will need to replace histogramID with the appropriate ID.
|
||||
* This can be obtained from uma.googleplex.com/data/histograms/ids-chrome-histograms.txt
|
||||
|
@ -58,20 +58,13 @@
|
|||
*
|
||||
|
||||
|
||||
** uma.py
|
||||
*
|
||||
* property_name provides a mapping for bucketID to human readable property name
|
||||
*
|
||||
**
|
||||
|
||||
|
||||
** featurelevel.js
|
||||
*
|
||||
* Creates charts for the feature level page.
|
||||
*
|
||||
* drawVisualization()
|
||||
* This function takes in the name of the property for which the graph is being drawn.
|
||||
* (This should probably be changed to the proeprtyID/bucketID in the future.)
|
||||
* (This should probably be changed to the propertyID/bucketID in the future.)
|
||||
* We iterate through parsed data, building up a data object which we can pass to chart.draw()
|
||||
* The desired form of data to pass to chart.draw() is:
|
||||
* [[Date, Name, Percentage]
|
||||
|
|
92
admin.py
92
admin.py
|
@ -26,6 +26,7 @@ import os
|
|||
import re
|
||||
import sys
|
||||
import webapp2
|
||||
import xml.dom.minidom
|
||||
|
||||
# Appengine imports.
|
||||
from google.appengine.api import files
|
||||
|
@ -40,17 +41,21 @@ from google.appengine.ext.webapp import blobstore_handlers
|
|||
import common
|
||||
import models
|
||||
import settings
|
||||
import uma
|
||||
|
||||
|
||||
# uma.googleplex.com/data/histograms/ids-chrome-histograms.txt
|
||||
BIGSTORE_BUCKET = '/gs/uma-dashboards/'
|
||||
BIGSTORE_RESTFUL_URI = 'https://uma-dashboards.storage.googleapis.com/'
|
||||
|
||||
HISTOGRAMS_URL = 'https://chromium.googlesource.com/chromium/src/+/master/' \
|
||||
'tools/metrics/histograms/histograms.xml?format=TEXT'
|
||||
|
||||
CSSPROPERITES_BS_HISTOGRAM_ID = str(0xbfd59b316a6c31f1)
|
||||
ANIMATIONPROPS_BS_HISTOGRAM_ID = str(0xbee14b73f4fdde73)
|
||||
FEATURE_OBSERVER_BS_HISTOGRAM_ID = str(0x2e44945129413683)
|
||||
|
||||
PAGE_VISITS_BUCKET_ID = 52
|
||||
|
||||
# For fetching files from the production BigStore during development.
|
||||
OAUTH2_CREDENTIALS_FILENAME = os.path.join(
|
||||
settings.ROOT_DIR, 'scripts', 'oauth2.data')
|
||||
|
@ -99,29 +104,28 @@ class YesterdayHandler(blobstore_handlers.BlobstoreDownloadHandler):
|
|||
|
||||
# For CSSPROPERITES_BS_HISTOGRAM_ID, bucket 1 is total pages visited for
|
||||
# stank rank histogram. We're guaranteed to have it.
|
||||
# For the FEATURE_OBSERVER_BS_HISTOGRAM_ID, the PageVisits bucket_id is 52
|
||||
# See uma.py. The actual % is calculated from the count / this number.
|
||||
# For the FEATURE_OBSERVER_BS_HISTOGRAM_ID, the PageVisits bucket_id is 52.
|
||||
# The actual % is calculated from the count / this number.
|
||||
# For ANIMATIONPROPS_BS_HISTOGRAM_ID, we have to calculate the total count.
|
||||
if 1 in properties_dict and histogram_id == CSSPROPERITES_BS_HISTOGRAM_ID:
|
||||
total_pages = properties_dict.get(1)
|
||||
elif (uma.PAGE_VISITS_BUCKET_ID in properties_dict and
|
||||
elif (PAGE_VISITS_BUCKET_ID in properties_dict and
|
||||
histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID):
|
||||
total_pages = properties_dict.get(uma.PAGE_VISITS_BUCKET_ID)
|
||||
total_pages = properties_dict.get(PAGE_VISITS_BUCKET_ID)
|
||||
|
||||
# Don't include PageVisits results.
|
||||
del properties_dict[uma.PAGE_VISITS_BUCKET_ID]
|
||||
del properties_dict[PAGE_VISITS_BUCKET_ID]
|
||||
else:
|
||||
total_pages = sum(properties_dict.values())
|
||||
|
||||
property_map = models.CssPropertyHistogram.get_all()
|
||||
if histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID:
|
||||
property_map = models.FeatureObserverHistogram.get_all()
|
||||
|
||||
for bucket_id, num_hits in properties_dict.items():
|
||||
# If the id is not in the map, use 'ERROR' for the name.
|
||||
# TODO(ericbidelman): Non-matched bucket ids are likely new properties
|
||||
# that have been added and need to be updated in uma.py. Find way to
|
||||
# autofix these values with the appropriate property_name later.
|
||||
property_map = uma.CSS_PROPERTY_BUCKETS
|
||||
if histogram_id == FEATURE_OBSERVER_BS_HISTOGRAM_ID:
|
||||
property_map = uma.FEATUREOBSERVER_BUCKETS
|
||||
|
||||
# that have been added and will be updated in cron/histograms.
|
||||
property_name = property_map.get(bucket_id, 'ERROR')
|
||||
|
||||
query = model_class.all()
|
||||
|
@ -216,6 +220,67 @@ class YesterdayHandler(blobstore_handlers.BlobstoreDownloadHandler):
|
|||
return (result.content, result.status_code)
|
||||
|
||||
|
||||
class HistogramsHandler(webapp2.RequestHandler):
|
||||
|
||||
MODEL_CLASS = {
|
||||
'FeatureObserver': models.FeatureObserverHistogram,
|
||||
'MappedCSSProperties': models.CssPropertyHistogram,
|
||||
}
|
||||
|
||||
def _SaveData(self, data, histogram_id):
|
||||
try:
|
||||
model_class = self.MODEL_CLASS[histogram_id]
|
||||
except Exception:
|
||||
logging.error('Invalid Histogram id used: %s' % histogram_id)
|
||||
return
|
||||
|
||||
bucket_id = int(data['bucket_id'])
|
||||
property_name = data['property_name']
|
||||
key_name = '%s_%s' % (bucket_id, property_name)
|
||||
|
||||
# Bucket ID 1 is reserved for number of CSS Pages Visited. So don't add it.
|
||||
if (model_class == models.CssPropertyHistogram and bucket_id == 1):
|
||||
return
|
||||
|
||||
model_class.get_or_insert(key_name,
|
||||
bucket_id=bucket_id,
|
||||
property_name=property_name
|
||||
)
|
||||
|
||||
def get(self):
|
||||
# Attempt to fetch the histograms.xml file from chromium.googlesource.com.
|
||||
result = urlfetch.fetch(HISTOGRAMS_URL)
|
||||
|
||||
if (result.status_code != 200):
|
||||
logging.error('Unable to retrieve chromium histograms.')
|
||||
return
|
||||
|
||||
browsed_histograms = []
|
||||
histograms_content = result.content.decode('base64')
|
||||
dom = xml.dom.minidom.parseString(histograms_content)
|
||||
|
||||
# The histograms.xml file looks like this:
|
||||
#
|
||||
# ...
|
||||
# <enum name="FeatureObserver" type="int">
|
||||
# <int value="0" label="PageDestruction"/>
|
||||
# <int value="1" label="LegacyNotifications"/>
|
||||
|
||||
for enum in dom.getElementsByTagName('enum'):
|
||||
histogram_id = enum.attributes['name'].value
|
||||
if (histogram_id in self.MODEL_CLASS.keys()):
|
||||
browsed_histograms.append(histogram_id)
|
||||
for child in enum.getElementsByTagName('int'):
|
||||
data = {
|
||||
'bucket_id': child.attributes['value'].value,
|
||||
'property_name': child.attributes['label'].value
|
||||
}
|
||||
self._SaveData(data, histogram_id)
|
||||
|
||||
# Log an error if some histograms were not found.
|
||||
if (len(list(set(browsed_histograms))) != len(self.MODEL_CLASS.keys())):
|
||||
logging.error('Less histograms than expected were retrieved.')
|
||||
|
||||
class FeatureHandler(common.ContentHandler):
|
||||
|
||||
DEFAULT_URL = '/features'
|
||||
|
@ -269,8 +334,6 @@ class FeatureHandler(common.ContentHandler):
|
|||
elif feature_id and 'new' in path:
|
||||
return self.redirect(self.ADD_NEW_URL)
|
||||
|
||||
feature = None
|
||||
|
||||
template_data = {
|
||||
'feature_form': models.FeatureForm()
|
||||
}
|
||||
|
@ -425,6 +488,7 @@ class FeatureHandler(common.ContentHandler):
|
|||
|
||||
app = webapp2.WSGIApplication([
|
||||
('/cron/metrics', YesterdayHandler),
|
||||
('/cron/histograms', HistogramsHandler),
|
||||
('/(.*)/([0-9]*)', FeatureHandler),
|
||||
('/(.*)', FeatureHandler),
|
||||
], debug=settings.DEBUG)
|
||||
|
|
2
app.yaml
2
app.yaml
|
@ -46,7 +46,7 @@ handlers:
|
|||
script: google.appengine.ext.admin.application
|
||||
login: admin
|
||||
|
||||
- url: /cron/metrics
|
||||
- url: /cron/.*
|
||||
script: admin.app
|
||||
login: admin # Prevents raw access to this handler. Cron runs as admin.
|
||||
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
cron:
|
||||
- description: retrieve from chromium.googlesource.com chromium histograms
|
||||
url: /cron/histograms
|
||||
schedule: every day 04:00
|
||||
- description: retrieve from UMA Cloud Storage data gathered yesterday
|
||||
url: /cron/metrics
|
||||
schedule: every day 05:00
|
||||
|
|
21
models.py
21
models.py
|
@ -553,3 +553,24 @@ class AppUser(DictModel):
|
|||
d = self.to_dict()
|
||||
d['id'] = self.key().id()
|
||||
return d
|
||||
|
||||
|
||||
class HistogramModel(db.Model):
|
||||
"""Container for a histogram."""
|
||||
|
||||
bucket_id = db.IntegerProperty(required=True)
|
||||
property_name = db.StringProperty(required=True)
|
||||
|
||||
@classmethod
|
||||
def get_all(self):
|
||||
output = {}
|
||||
buckets = self.all().fetch(None)
|
||||
for bucket in buckets:
|
||||
output[bucket.bucket_id] = bucket.property_name
|
||||
return output
|
||||
|
||||
class CssPropertyHistogram(HistogramModel):
|
||||
pass
|
||||
|
||||
class FeatureObserverHistogram(HistogramModel):
|
||||
pass
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
# Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
import models
|
||||
import uma
|
||||
|
||||
|
||||
def CorrectPropertyName(bucket_id):
|
||||
if bucket_id in uma.CSS_PROPERTY_BUCKETS:
|
||||
return uma.CSS_PROPERTY_BUCKETS[bucket_id]
|
||||
allCssPropertyHistograms = models.CssPropertyHistogram.get_all()
|
||||
if bucket_id in allCssPropertyHistograms:
|
||||
return allCssPropertyHistograms[bucket_id]
|
||||
return None
|
||||
|
||||
def FetchAllPropertiesWithError(bucket_id=None):
|
||||
|
|
|
@ -27,7 +27,6 @@ from google.appengine.api import users
|
|||
import common
|
||||
import models
|
||||
import settings
|
||||
import uma
|
||||
|
||||
|
||||
def normalized_name(val):
|
||||
|
@ -174,11 +173,11 @@ class MainHandler(common.ContentHandler, common.JSONHandler):
|
|||
|
||||
template_data['feature'] = feature
|
||||
elif path.startswith('metrics/css/timeline'):
|
||||
properties = sorted(uma.CSS_PROPERTY_BUCKETS.items(), key=lambda x:x[1])
|
||||
properties = sorted(models.CssPropertyHistogram.get_all().iteritems(), key=lambda x:x[1])
|
||||
template_data['CSS_PROPERTY_BUCKETS'] = json.dumps(
|
||||
properties, separators=(',',':'))
|
||||
elif path.startswith('metrics/feature/timeline'):
|
||||
properties = sorted(uma.FEATUREOBSERVER_BUCKETS.items(), key=lambda x:x[1])
|
||||
properties = sorted(models.FeatureObserverHistogram.get_all().iteritems(), key=lambda x:x[1])
|
||||
template_data['FEATUREOBSERVER_BUCKETS'] = json.dumps(
|
||||
properties, separators=(',',':'))
|
||||
|
||||
|
|
1119
uma.py
1119
uma.py
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче