Rebased on the latest master.

2016-04-26 15:21:33 +02:00 · 2016-04-26 15:21:33 +02:00 · e533d36179
--- a/.travis.yml
+++ b/.travis.yml
@ -0,0 +1,20 @@
+language: python
+python:
+  - "2.7"
+cache: pip
+install:
+  - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - export PATH="$HOME/miniconda/bin:$PATH"
+  - hash -r
+  - conda config --set always_yes yes --set changeps1 no
+  - conda update -q conda
+  # Useful for debugging any issues with conda
+  - conda info -a
+  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION python-snappy
+  - source activate test-environment
+  - pip --version
+  - pip install .
+
+script:
+  - python setup.py test
--- a/README.md
+++ b/README.md
@ -15,7 +15,11 @@ password:example_pass
 - Fetch the latest code with `git pull`
 - Update PyPI with `python setup.py sdist upload`

-## Running the tests

- python setup.py install
- python setup.py test
+## Updating histogram_tools.py
+moztelemetry/histogram_tools.py is a mirror of its counterpart from
+[mozilla-central](https://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/components/telemetry/histogram_tools.py).
+To update it to the latest version you can run
+```bash
+bin/update_histogram_tools
+```
--- a/bin/update_histogram_tools
+++ b/bin/update_histogram_tools
@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, you can obtain one at http://mozilla.org/MPL/2.0/.
+import os
+
+import requests
+
+
+"""Replace histogram_tools.py with the latest version from mozilla-central"""
+
+print "Downloading latest histogram_tools.py..."
+
+url = ("https://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/"
+       "components/telemetry/histogram_tools.py")
+project_root = os.path.dirname(
+                  os.path.dirname(os.path.realpath(__file__)))
+target_dir = os.path.join(project_root, 'moztelemetry')
+
+response = requests.get(url, timeout=10)
+response.raise_for_status()
+
+print "histogram_tools.py downloaded."
+
+with open(os.path.join(target_dir, "histogram_tools.py"), "w") as fh:
+    fh.write(response.content)
+
+print "histogram_tools.py successfully updated."
--- a/moztelemetry/heka_message_parser.py
+++ b/moztelemetry/heka_message_parser.py
@ -8,20 +8,14 @@
 import ujson as json
 import ssl

-from telemetry.util.heka_message import unpack, BacktrackableFile
+from telemetry.util.heka_message import unpack


-def parse_heka_message(message, boundary_bytes=None):
+def parse_heka_message(message):
    try:
-        message = BacktrackableFile(message)
-
-        for record, total_bytes in unpack(message, backtrack=True):
+        for record, total_bytes in unpack(message):
            yield _parse_heka_record(record)

-            if boundary_bytes and (total_bytes >= boundary_bytes):
-                message.close()
-                break
-
    except ssl.SSLError:
        pass  # https://github.com/boto/boto/issues/2830

--- a/moztelemetry/histogram.py
+++ b/moztelemetry/histogram.py
@ -191,20 +191,3 @@ class Histogram:

    def __add__(self, other):
        return Histogram(self.name, self.buckets + other.buckets, histograms_url=self.histograms_url)
-
-
-if __name__ == "__main__":
-    # Histogram with computed value
-    Histogram("GC_REASON_2", [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 11, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2517, -1, -1, 116979, 0])
-
-    # Histogram without revision
-    Histogram("STARTUP_CRASH_DETECTED", [1, 0, 0, 0, -1, -1, 0, 0], "http://hg.mozilla.org/mozilla-central/rev/da2f28836843")
-
-    # Histogram with revision
-    Histogram("HTTPCONNMGR_USED_SPECULATIVE_CONN", [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779, -1, -1], "http://hg.mozilla.org/mozilla-central/rev/37ddc5e2eb72")
-
-    # Startup histogram
-    Histogram("STARTUP_HTTPCONNMGR_USED_SPECULATIVE_CONN", [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779, -1, -1])
-
-    # Exceptional histogram
-    Histogram("EVENTLOOP_UI_LAG_EXP_MS", {"values": {"0": 0}})
--- a/moztelemetry/histogram_tools.py
+++ b/moztelemetry/histogram_tools.py
@ -0,0 +1,425 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import collections
+import json
+import math
+import os
+import re
+import sys
+
+# histogram_tools.py is used by scripts from a mozilla-central build tree
+# and also by outside consumers, such as the telemetry server.  We need
+# to ensure that importing things works in both contexts.  Therefore,
+# unconditionally importing things that are local to the build tree, such
+# as buildconfig, is a no-no.
+try:
+    import buildconfig
+
+    # Need to update sys.path to be able to find usecounters.
+    sys.path.append(os.path.join(buildconfig.topsrcdir, 'dom/base/'))
+except ImportError:
+    # Must be in an out-of-tree usage scenario.  Trust that whoever is
+    # running this script knows we need the usecounters module and has
+    # ensured it's in our sys.path.
+    pass
+
+from collections import OrderedDict
+
+def table_dispatch(kind, table, body):
+    """Call body with table[kind] if it exists.  Raise an error otherwise."""
+    if kind in table:
+        return body(table[kind])
+    else:
+        raise BaseException, "don't know how to handle a histogram of kind %s" % kind
+
+class DefinitionException(BaseException):
+    pass
+
+def linear_buckets(dmin, dmax, n_buckets):
+    ret_array = [0] * n_buckets
+    dmin = float(dmin)
+    dmax = float(dmax)
+    for i in range(1, n_buckets):
+        linear_range = (dmin * (n_buckets - 1 - i) + dmax * (i - 1)) / (n_buckets - 2)
+        ret_array[i] = int(linear_range + 0.5)
+    return ret_array
+
+def exponential_buckets(dmin, dmax, n_buckets):
+    log_max = math.log(dmax);
+    bucket_index = 2;
+    ret_array = [0] * n_buckets
+    current = dmin
+    ret_array[1] = current
+    for bucket_index in range(2, n_buckets):
+        log_current = math.log(current)
+        log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
+        log_next = log_current + log_ratio
+        next_value = int(math.floor(math.exp(log_next) + 0.5))
+        if next_value > current:
+            current = next_value
+        else:
+            current = current + 1
+        ret_array[bucket_index] = current
+    return ret_array
+
+always_allowed_keys = ['kind', 'description', 'cpp_guard', 'expires_in_version',
+                       'alert_emails', 'keyed', 'releaseChannelCollection',
+                       'bug_numbers']
+
+whitelists = None;
+try:
+    whitelist_path = os.path.join(os.path.abspath(os.path.realpath(os.path.dirname(__file__))), 'histogram-whitelists.json')
+    with open(whitelist_path, 'r') as f:
+        try:
+            whitelists = json.load(f)
+            for name, whitelist in whitelists.iteritems():
+              whitelists[name] = set(whitelist)
+        except ValueError, e:
+            raise BaseException, 'error parsing whitelist (%s)' % whitelist_path
+except IOError:
+    whitelists = None
+    print 'Unable to parse whitelist (%s). Assuming all histograms are acceptable.' % whitelist_path
+
+class Histogram:
+    """A class for representing a histogram definition."""
+
+    def __init__(self, name, definition, strict_type_checks=False):
+        """Initialize a histogram named name with the given definition.
+definition is a dict-like object that must contain at least the keys:
+
+ - 'kind': The kind of histogram.  Must be one of 'boolean', 'flag',
+   'count', 'enumerated', 'linear', or 'exponential'.
+ - 'description': A textual description of the histogram.
+ - 'strict_type_checks': A boolean indicating whether to use the new, stricter type checks.
+                         The server-side still has to deal with old, oddly typed submissions,
+                         so we have to skip them there by default.
+
+The key 'cpp_guard' is optional; if present, it denotes a preprocessor
+symbol that should guard C/C++ definitions associated with the histogram."""
+        self._strict_type_checks = strict_type_checks
+        self.verify_attributes(name, definition)
+        self._name = name
+        self._description = definition['description']
+        self._kind = definition['kind']
+        self._cpp_guard = definition.get('cpp_guard')
+        self._keyed = definition.get('keyed', False)
+        self._expiration = definition.get('expires_in_version')
+        self.compute_bucket_parameters(definition)
+        table = { 'boolean': 'BOOLEAN',
+                  'flag': 'FLAG',
+                  'count': 'COUNT',
+                  'enumerated': 'LINEAR',
+                  'linear': 'LINEAR',
+                  'exponential': 'EXPONENTIAL' }
+        table_dispatch(self.kind(), table,
+                       lambda k: self._set_nsITelemetry_kind(k))
+        datasets = { 'opt-in': 'DATASET_RELEASE_CHANNEL_OPTIN',
+                     'opt-out': 'DATASET_RELEASE_CHANNEL_OPTOUT' }
+        value = definition.get('releaseChannelCollection', 'opt-in')
+        if not value in datasets:
+            raise DefinitionException, "unknown release channel collection policy for " + name
+        self._dataset = "nsITelemetry::" + datasets[value]
+
+    def name(self):
+        """Return the name of the histogram."""
+        return self._name
+
+    def description(self):
+        """Return the description of the histogram."""
+        return self._description
+
+    def kind(self):
+        """Return the kind of the histogram.
+Will be one of 'boolean', 'flag', 'count', 'enumerated', 'linear', or 'exponential'."""
+        return self._kind
+
+    def expiration(self):
+        """Return the expiration version of the histogram."""
+        return self._expiration
+
+    def nsITelemetry_kind(self):
+        """Return the nsITelemetry constant corresponding to the kind of
+the histogram."""
+        return self._nsITelemetry_kind
+
+    def _set_nsITelemetry_kind(self, kind):
+        self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % kind
+
+    def low(self):
+        """Return the lower bound of the histogram."""
+        return self._low
+
+    def high(self):
+        """Return the high bound of the histogram."""
+        return self._high
+
+    def n_buckets(self):
+        """Return the number of buckets in the histogram."""
+        return self._n_buckets
+
+    def cpp_guard(self):
+        """Return the preprocessor symbol that should guard C/C++ definitions
+associated with the histogram.  Returns None if no guarding is necessary."""
+        return self._cpp_guard
+
+    def keyed(self):
+        """Returns True if this a keyed histogram, false otherwise."""
+        return self._keyed
+
+    def dataset(self):
+        """Returns the dataset this histogram belongs into."""
+        return self._dataset
+
+    def ranges(self):
+        """Return an array of lower bounds for each bucket in the histogram."""
+        table = { 'boolean': linear_buckets,
+                  'flag': linear_buckets,
+                  'count': linear_buckets,
+                  'enumerated': linear_buckets,
+                  'linear': linear_buckets,
+                  'exponential': exponential_buckets }
+        return table_dispatch(self.kind(), table,
+                              lambda p: p(self.low(), self.high(), self.n_buckets()))
+
+    def compute_bucket_parameters(self, definition):
+        table = {
+            'boolean': Histogram.boolean_flag_bucket_parameters,
+            'flag': Histogram.boolean_flag_bucket_parameters,
+            'count': Histogram.boolean_flag_bucket_parameters,
+            'enumerated': Histogram.enumerated_bucket_parameters,
+            'linear': Histogram.linear_bucket_parameters,
+            'exponential': Histogram.exponential_bucket_parameters
+            }
+        table_dispatch(self.kind(), table,
+                       lambda p: self.set_bucket_parameters(*p(definition)))
+
+    def verify_attributes(self, name, definition):
+        global always_allowed_keys
+        general_keys = always_allowed_keys + ['low', 'high', 'n_buckets']
+
+        table = {
+            'boolean': always_allowed_keys,
+            'flag': always_allowed_keys,
+            'count': always_allowed_keys,
+            'enumerated': always_allowed_keys + ['n_values'],
+            'linear': general_keys,
+            'exponential': general_keys
+            }
+        # We removed extended_statistics_ok on the client, but the server-side,
+        # where _strict_type_checks==False, has to deal with historical data.
+        if not self._strict_type_checks:
+            table['exponential'].append('extended_statistics_ok')
+
+        table_dispatch(definition['kind'], table,
+                       lambda allowed_keys: Histogram.check_keys(name, definition, allowed_keys))
+
+        if 'alert_emails' not in definition:
+            if whitelists is not None and name not in whitelists['alert_emails']:
+                raise KeyError, 'New histogram "%s" must have an alert_emails field.' % name
+        elif not isinstance(definition['alert_emails'], list):
+            raise KeyError, 'alert_emails must be an array (in histogram "%s")' % name
+
+        Histogram.check_name(name)
+        self.check_field_types(name, definition)
+        Histogram.check_expiration(name, definition)
+        Histogram.check_bug_numbers(name, definition)
+
+    @staticmethod
+    def check_name(name):
+        if '#' in name:
+            raise ValueError, '"#" not permitted for %s' % (name)
+
+    @staticmethod
+    def check_expiration(name, definition):
+        expiration = definition.get('expires_in_version')
+
+        if not expiration:
+            return
+
+        if re.match(r'^[1-9][0-9]*$', expiration):
+            expiration = expiration + ".0a1"
+        elif re.match(r'^[1-9][0-9]*\.0$', expiration):
+            expiration = expiration + "a1"
+
+        definition['expires_in_version'] = expiration
+
+    @staticmethod
+    def check_bug_numbers(name, definition):
+        bug_numbers = definition.get('bug_numbers')
+        if not bug_numbers:
+            if whitelists is None or name in whitelists['bug_numbers']:
+                return
+            else:
+                raise KeyError, 'New histogram "%s" must have a bug_numbers field.' % name
+
+        if not isinstance(bug_numbers, list):
+            raise ValueError, 'bug_numbers field for "%s" should be an array' % (name)
+
+        if not all(type(num) is int for num in bug_numbers):
+            raise ValueError, 'bug_numbers array for "%s" should only contain integers' % (name)
+
+    def check_field_types(self, name, definition):
+        # Define expected types for the histogram properties.
+        type_checked_fields = {
+                "n_buckets": int,
+                "n_values": int,
+                "low": int,
+                "high": int,
+                "keyed": bool,
+                "expires_in_version": basestring,
+                "kind": basestring,
+                "description": basestring,
+                "cpp_guard": basestring,
+                "releaseChannelCollection": basestring
+            }
+
+        # For the server-side, where _strict_type_checks==False, we want to
+        # skip the stricter type checks for these fields for dealing with
+        # historical data.
+        coerce_fields = ["low", "high", "n_values", "n_buckets"]
+        if not self._strict_type_checks:
+            def try_to_coerce_to_number(v):
+                try:
+                    return eval(v, {})
+                except:
+                    return v
+            for key in [k for k in coerce_fields if k in definition]:
+                definition[key] = try_to_coerce_to_number(definition[key])
+
+        for key, key_type in type_checked_fields.iteritems():
+            if not key in definition:
+                continue
+            if not isinstance(definition[key], key_type):
+                if key_type is basestring:
+                    type_name = "string"
+                else:
+                    type_name = key_type.__name__
+                raise ValueError, ('value for key "{0}" in Histogram "{1}" '
+                        'should be {2}').format(key, name, type_name)
+
+    @staticmethod
+    def check_keys(name, definition, allowed_keys):
+        for key in definition.iterkeys():
+            if key not in allowed_keys:
+                raise KeyError, '%s not permitted for %s' % (key, name)
+
+    def set_bucket_parameters(self, low, high, n_buckets):
+        self._low = low
+        self._high = high
+        self._n_buckets = n_buckets
+        if whitelists is not None and self._n_buckets > 100 and type(self._n_buckets) is int:
+            if self._name not in whitelists['n_buckets']:
+                raise KeyError, ('New histogram "%s" is not permitted to have more than 100 buckets. '
+                                'Histograms with large numbers of buckets use disproportionately high amounts of resources. '
+                                'Contact the Telemetry team (e.g. in #telemetry) if you think an exception ought to be made.' % self._name)
+
+    @staticmethod
+    def boolean_flag_bucket_parameters(definition):
+        return (1, 2, 3)
+
+    @staticmethod
+    def linear_bucket_parameters(definition):
+        return (definition.get('low', 1),
+                definition['high'],
+                definition['n_buckets'])
+
+    @staticmethod
+    def enumerated_bucket_parameters(definition):
+        n_values = definition['n_values']
+        return (1, n_values, n_values + 1)
+
+    @staticmethod
+    def exponential_bucket_parameters(definition):
+        return (definition.get('low', 1),
+                definition['high'],
+                definition['n_buckets'])
+
+# We support generating histograms from multiple different input files, not
+# just Histograms.json.  For each file's basename, we have a specific
+# routine to parse that file, and return a dictionary mapping histogram
+# names to histogram parameters.
+def from_Histograms_json(filename):
+    with open(filename, 'r') as f:
+        try:
+            histograms = json.load(f, object_pairs_hook=OrderedDict)
+        except ValueError, e:
+            raise BaseException, "error parsing histograms in %s: %s" % (filename, e.message)
+    return histograms
+
+def from_UseCounters_conf(filename):
+    return usecounters.generate_histograms(filename)
+
+def from_nsDeprecatedOperationList(filename):
+    operation_regex = re.compile('^DEPRECATED_OPERATION\\(([^)]+)\\)')
+    histograms = collections.OrderedDict()
+
+    with open(filename, 'r') as f:
+        for line in f:
+            match = operation_regex.search(line)
+            if not match:
+                continue
+
+            op = match.group(1)
+
+            def add_counter(context):
+                name = 'USE_COUNTER2_DEPRECATED_%s_%s' % (op, context.upper())
+                histograms[name] = {
+                    'expires_in_version': 'never',
+                    'kind': 'boolean',
+                    'description': 'Whether a %s used %s' % (context, op)
+                }
+            add_counter('document')
+            add_counter('page')
+
+    return histograms
+
+FILENAME_PARSERS = {
+    'Histograms.json': from_Histograms_json,
+    'nsDeprecatedOperationList.h': from_nsDeprecatedOperationList,
+}
+
+# Similarly to the dance above with buildconfig, usecounters may not be
+# available, so handle that gracefully.
+try:
+    import usecounters
+
+    FILENAME_PARSERS['UseCounters.conf'] = from_UseCounters_conf
+except ImportError:
+    pass
+
+def from_files(filenames):
+    """Return an iterator that provides a sequence of Histograms for
+the histograms defined in filenames.
+    """
+    all_histograms = OrderedDict()
+    for filename in filenames:
+        parser = FILENAME_PARSERS[os.path.basename(filename)]
+        histograms = parser(filename)
+
+        # OrderedDicts are important, because then the iteration order over
+        # the parsed histograms is stable, which makes the insertion into
+        # all_histograms stable, which makes ordering in generated files
+        # stable, which makes builds more deterministic.
+        if not isinstance(histograms, OrderedDict):
+            raise BaseException, "histogram parser didn't provide an OrderedDict"
+
+        for (name, definition) in histograms.iteritems():
+            if all_histograms.has_key(name):
+                raise DefinitionException, "duplicate histogram name %s" % name
+            all_histograms[name] = definition
+
+    # We require that all USE_COUNTER2_* histograms be defined in a contiguous
+    # block.
+    use_counter_indices = filter(lambda x: x[1].startswith("USE_COUNTER2_"),
+                                 enumerate(all_histograms.iterkeys()));
+    if use_counter_indices:
+        lower_bound = use_counter_indices[0][0]
+        upper_bound = use_counter_indices[-1][0]
+        n_counters = upper_bound - lower_bound + 1
+        if n_counters != len(use_counter_indices):
+            raise DefinitionException, "use counter histograms must be defined in a contiguous block"
+
+    for (name, definition) in all_histograms.iteritems():
+        yield Histogram(name, definition, strict_type_checks=True)
--- a/moztelemetry/spark.py
+++ b/moztelemetry/spark.py
@ -241,15 +241,8 @@ def get_records(sc, source_name, **kwargs):
    else:
        sample = files

-    # TODO: Make sure that "bucket_name" matches the v4 bucket name, otherwise
-    #       introduce a "bucket" parameter to _read_v4_ranges
    parallelism = max(len(sample), sc.defaultParallelism)
-    ranges = sc.parallelize(sample, parallelism).flatMap(_read_v4_ranges).collect()
-
-    if len(ranges) == 0:
-        return sc.parallelize([])
-    else:
-        return sc.parallelize(ranges, len(ranges)).flatMap(_read_v4_range)
+    return sc.parallelize(sample, parallelism).flatMap(_read_v4)


 def _get_data_sources():
@ -362,12 +355,7 @@ def _get_pings_v4(sc, **kwargs):
    else:
        parallelism = len(sample)

-    ranges = sc.parallelize(sample, parallelism).flatMap(_read_v4_ranges).collect()
-
-    if len(ranges) == 0:
-        return sc.parallelize([])
-    else:
-        return sc.parallelize(ranges, parallelism).flatMap(_read_v4_range)
+    return sc.parallelize(sample, parallelism).flatMap(_read_v4)


 def _get_filenames_v2(**kwargs):
@ -431,28 +419,6 @@ def _read_v4(filename):
        return []


-def _read_v4_ranges(filename):
-    try:
-        key = _bucket_v4.get_key(filename)
-        if key is None:
-            return []
-        n_chunks = (key.size / _chunk_size) + 1
-        return zip([filename]*n_chunks, range(n_chunks))
-    except ssl.SSLError:
-        return []
-
-
-def _read_v4_range(filename_chunk):
-    try:
-        filename, chunk = filename_chunk
-        start = _chunk_size*chunk
-        key = _bucket_v4.get_key(filename)
-        key.open_read(headers={'Range': "bytes={}-".format(start)})
-        return parse_heka_message(key, boundary_bytes=_chunk_size)
-    except ssl.SSLError:
-        return []
-
-
 def _get_ping_properties(ping, paths, only_median, with_processes,
                         histograms_url, additional_histograms):
    result = {}
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,2 @@
+[aliases]
+test=pytest
--- a/setup.py
+++ b/setup.py
@ -4,25 +4,19 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import urllib
 from setuptools import setup
-from setuptools.command.install import install

-class FetchExternal(install):
-    def run(self):
-        urllib.urlretrieve("https://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/components/telemetry/histogram_tools.py", "moztelemetry/histogram_tools.py")
-        install.run(self)

-setup(cmdclass={'install': FetchExternal},
-      name='python_moztelemetry',
-      version='0.3.9.7',
-      author='Roberto Agostino Vitillo',
-      author_email='rvitillo@mozilla.com',
-      description='Spark bindings for Mozilla Telemetry',
-      url='https://github.com/vitillo/python_moztelemetry',
-      packages=['moztelemetry'],
-      package_dir={'moztelemetry': 'moztelemetry'},
-      install_requires=['boto', 'backports.lzma', 'ujson', 'requests', 'protobuf', 'expiringdict', 'functools32', 'py4j', 'pandas>=0.14.1', 'numpy>=1.8.2', 'joblib', 'telemetry-tools'],
-      test_suite='nose.collector',
-      tests_require=['mock', 'nose'])
+setup(
+    name='python_moztelemetry',
+    version='0.3.9.8',
+    author='Roberto Agostino Vitillo',
+    author_email='rvitillo@mozilla.com',
+    description='Spark bindings for Mozilla Telemetry',
+    url='https://github.com/vitillo/python_moztelemetry',
+    packages=['moztelemetry'],
+    package_dir={'moztelemetry': 'moztelemetry'},
+    install_requires=['boto', 'backports.lzma', 'ujson', 'requests', 'protobuf', 'expiringdict', 'functools32', 'py4j', 'pandas>=0.14.1', 'numpy>=1.8.2', 'joblib', 'telemetry-tools'],
+    setup_requires = ['pytest-runner'],
+    tests_require = ['pytest'],
+)
--- a/tests/test_histogram.py
+++ b/tests/test_histogram.py
@ -0,0 +1,38 @@
+from moztelemetry.histogram import Histogram
+
+
+def test_histogram_with_computed_value():
+    # Histogram with computed value
+    Histogram("GC_REASON_2",
+              [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1,
+               0, 0, 0, 11, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2517, -1, -1,
+               116979, 0])
+
+
+def test_histogram_without_revision():
+    # Histogram without revision
+    Histogram("STARTUP_CRASH_DETECTED",
+              [1, 0, 0, 0, -1, -1, 0, 0],
+              "http://hg.mozilla.org/mozilla-central/rev/da2f28836843")
+
+
+def test_histogram_with_revision():
+    # Histogram with revision
+    Histogram("HTTPCONNMGR_USED_SPECULATIVE_CONN",
+              [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779, -1,
+               -1],
+              "http://hg.mozilla.org/mozilla-central/rev/37ddc5e2eb72")
+
+
+def test_startup_histogram():
+    # Startup histogram
+    Histogram("STARTUP_HTTPCONNMGR_USED_SPECULATIVE_CONN",
+              [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 1, 0.693147182464599, 0.480453014373779,
+               -1, -1])