Bug 1463885 - Disrecommend get_one_ping_per_client (#212)
* Update docs to more clearly mark deprecated methods as... deprecated * Bug 1463885 - Disrecommend get_one_ping_per_client even more
This commit is contained in:
Родитель
8d3813c321
Коммит
5175938186
12
docs/api.rst
12
docs/api.rst
|
@ -9,12 +9,18 @@ Dataset
|
||||||
.. automodule:: moztelemetry.dataset
|
.. automodule:: moztelemetry.dataset
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. _get_pings:
|
Deprecated ping methods
|
||||||
|
-----------------------
|
||||||
|
|
||||||
get_pings() (deprecated)
|
Before the Dataset API was available, a number of custom methods were
|
||||||
------------------------
|
written for selecting a set of telemetry pings and extracting data
|
||||||
|
from them. These methods are somewhat convoluted and difficult to
|
||||||
|
understand, and are not recommended for new code.
|
||||||
|
|
||||||
.. autofunction:: moztelemetry.spark.get_pings
|
.. autofunction:: moztelemetry.spark.get_pings
|
||||||
|
.. autofunction:: moztelemetry.spark.get_pings_properties
|
||||||
|
.. autofunction:: moztelemetry.spark.get_one_ping_per_client
|
||||||
|
|
||||||
|
|
||||||
Using Spark RDDs
|
Using Spark RDDs
|
||||||
----------------
|
----------------
|
||||||
|
|
|
@ -9,19 +9,16 @@ python_moztelemetry
|
||||||
A simple library to fetch and analyze data collected by the Mozilla Telemetry service.
|
A simple library to fetch and analyze data collected by the Mozilla Telemetry service.
|
||||||
Objects collected by Telemetry are called ``pings``.
|
Objects collected by Telemetry are called ``pings``.
|
||||||
A ping has a number of properties (aka ``dimensions``) and a payload.
|
A ping has a number of properties (aka ``dimensions``) and a payload.
|
||||||
A session of Telemetry data analysis/manipulation typically starts with a query that filters the objects by one or more dimensions.
|
|
||||||
This query can be expressed using either an orm-like api, :ref:`Dataset` or a simple
|
|
||||||
function, :ref:`get_pings`.
|
|
||||||
|
|
||||||
|
A session of Telemetry data analysis/manipulation typically starts
|
||||||
|
with a :ref:`Dataset` query that filters the objects by one or more
|
||||||
|
dimensions, and then extracts the items of interest from their payload.
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
api
|
api
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Indices and tables
|
Indices and tables
|
||||||
==================
|
==================
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,20 @@ from .histogram import Histogram
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def deprecated(func):
|
||||||
|
"""This is a decorator which can be used to mark functions
|
||||||
|
as deprecated. It will result in a warning being emitted
|
||||||
|
when the function is used."""
|
||||||
|
def newFunc(*args, **kwargs):
|
||||||
|
print("Call to deprecated function %s." % func.__name__)
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
newFunc.__name__ = func.__name__
|
||||||
|
newFunc.__doc__ = func.__doc__
|
||||||
|
newFunc.__dict__.update(func.__dict__)
|
||||||
|
return newFunc
|
||||||
|
|
||||||
|
|
||||||
if not boto.config.has_section('Boto'):
|
if not boto.config.has_section('Boto'):
|
||||||
boto.config.add_section('Boto')
|
boto.config.add_section('Boto')
|
||||||
boto.config.set('Boto', 'http_socket_timeout', '10') # https://github.com/boto/boto/issues/2830
|
boto.config.set('Boto', 'http_socket_timeout', '10') # https://github.com/boto/boto/issues/2830
|
||||||
|
@ -47,6 +61,7 @@ class PingCursor(dict):
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated
|
||||||
def get_pings(sc, app=None, build_id=None, channel=None, doc_type='saved_session',
|
def get_pings(sc, app=None, build_id=None, channel=None, doc_type='saved_session',
|
||||||
fraction=1.0, schema=None, source_name='telemetry', source_version='4',
|
fraction=1.0, schema=None, source_name='telemetry', source_version='4',
|
||||||
submission_date=None, version=None):
|
submission_date=None, version=None):
|
||||||
|
@ -109,6 +124,7 @@ def get_pings(sc, app=None, build_id=None, channel=None, doc_type='saved_session
|
||||||
return dataset.records(sc, sample=fraction)
|
return dataset.records(sc, sample=fraction)
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated
|
||||||
def get_pings_properties(pings, paths, only_median=False, with_processes=False,
|
def get_pings_properties(pings, paths, only_median=False, with_processes=False,
|
||||||
histograms_url=None, additional_histograms=None):
|
histograms_url=None, additional_histograms=None):
|
||||||
"""
|
"""
|
||||||
|
@ -151,11 +167,15 @@ def get_pings_properties(pings, paths, only_median=False, with_processes=False,
|
||||||
.filter(lambda p: p)
|
.filter(lambda p: p)
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated
|
||||||
def get_one_ping_per_client(pings):
|
def get_one_ping_per_client(pings):
|
||||||
"""
|
"""
|
||||||
Returns a single ping for each client in the RDD. This operation is expensive
|
Returns a single ping for each client in the RDD.
|
||||||
as it requires data to be shuffled around. It should be run only after extracting
|
|
||||||
a subset with get_pings_properties.
|
THIS METHOD IS NOT RECOMMENDED: The ping to be returned is essentially
|
||||||
|
selected at random. It is also expensive as it requires data to be
|
||||||
|
shuffled around. It should be run only after extracting a subset with
|
||||||
|
get_pings_properties.
|
||||||
"""
|
"""
|
||||||
if isinstance(pings.first(), str):
|
if isinstance(pings.first(), str):
|
||||||
pings = pings.map(lambda p: json.loads(p))
|
pings = pings.map(lambda p: json.loads(p))
|
||||||
|
|
Загрузка…
Ссылка в новой задаче