Renamed test and moved scripts to zip archieve for ease of deployment
This commit is contained in:
Родитель
336f57764f
Коммит
d62a79e6d8
|
@ -1,5 +1,5 @@
|
|||
*.pyc
|
||||
|
||||
dashboard.zip
|
||||
histogram_tools.py
|
||||
histogram_specs.py
|
||||
specs.py
|
||||
|
|
7
Makefile
7
Makefile
|
@ -1,5 +1,5 @@
|
|||
FILES= histogram_tools.py Histograms.json specs.py
|
||||
download: $(FILES)
|
||||
FILES= histogram_tools.py Histograms.json specs.py dashboard.zip
|
||||
all: $(FILES)
|
||||
|
||||
Histograms.json:
|
||||
wget -c http://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/components/telemetry/Histograms.json -O $@
|
||||
|
@ -10,5 +10,8 @@ histogram_tools.py:
|
|||
specs.py: Histograms.json
|
||||
python specgen.py $< > $@
|
||||
|
||||
dashboard.zip: specs.py dashboard.py
|
||||
zip $@ $?
|
||||
|
||||
clean:
|
||||
rm -f $(FILES)
|
||||
|
|
337
dashboard.py
337
dashboard.py
|
@ -1,13 +1,23 @@
|
|||
try:
|
||||
import simplejson as json
|
||||
print "Using simplejson for faster json parsing"
|
||||
except ImportError:
|
||||
import json
|
||||
import sys
|
||||
import math
|
||||
import sys
|
||||
|
||||
# Import histogram specs and generated by makefile using specgen.py
|
||||
import specs
|
||||
|
||||
verbose = True
|
||||
# Counts number of times we've printed a log message
|
||||
logMsgCount = {}
|
||||
|
||||
# Auxiliary method to write log messages
|
||||
def log(msg, *args):
|
||||
# We only print a log message the first 10 times we see it
|
||||
n = logMsgCount.get(msg, 10)
|
||||
if n > 0:
|
||||
logMsgCount[msg] = n - 1
|
||||
print >> sys.stderr, msg % args
|
||||
|
||||
# Auxiliary method for computing bucket offsets from parameters, it is stolen
|
||||
# from histogram_tools.py, though slightly modified...
|
||||
|
@ -44,35 +54,67 @@ simple_measures_buckets = (
|
|||
exponential_buckets(1, 30000, 50)
|
||||
)
|
||||
|
||||
# Cache of all output values, we do a linear scan so we won't write any thing
|
||||
# until map_finished() gets called... This is hack that makes linear scans
|
||||
# a lot faster :)
|
||||
cache = {}
|
||||
|
||||
SPECS = "scripts/histogram_specs.json"
|
||||
histogram_specs = json.loads(
|
||||
jydoop.getResource(SPECS))
|
||||
def map_finished(context):
|
||||
log("map_finished outputing: %s keys", len(cache))
|
||||
global cache
|
||||
for key, value in cache.iteritems():
|
||||
context.write(key, value)
|
||||
cache = {}
|
||||
|
||||
def map(uid, line, context):
|
||||
global histogram_specs
|
||||
# Auxiliary function for aggregating a result to the cache, we pass in context
|
||||
# so we can skip caching here should we ever want to do this
|
||||
def write_to_cache(key, value, context):
|
||||
global cache
|
||||
cachedValue = cache.get(key, None)
|
||||
if cachedValue is None:
|
||||
cache[key] = value
|
||||
else:
|
||||
for filterPath, hgramValues in value.iteritems():
|
||||
existing = cachedValue.get(filterPath, None)
|
||||
if existing is None:
|
||||
cachedValue[filterPath] = hgramValues
|
||||
continue
|
||||
for y in xrange(0, len(hgramValues)):
|
||||
existing[y] += (hgramValues[y] or 0)
|
||||
|
||||
payload = json.loads(line)
|
||||
# histogram incoming format:
|
||||
# [
|
||||
# bucket0, bucket1, ..., bucketN,
|
||||
# sum, log_sum, log_sum_squares, sum_squares_lo, sum_squares_hi
|
||||
# ]
|
||||
# Aggregated histogram format:
|
||||
# [
|
||||
# bucket0, bucket1, ..., bucketN,
|
||||
# sum, log_sum, log_sum_squares, sum_squares_lo, sum_squares_hi, count
|
||||
# ]
|
||||
# where count is the number of histograms aggregated in the histogram.
|
||||
|
||||
def map(key, dims, value, context):
|
||||
# Unpack dimensions
|
||||
reason, appName, channel, version, buildId, submissionDate = dims
|
||||
|
||||
# Get the major version
|
||||
majorVersion = version.split('.')[0]
|
||||
|
||||
# Get the build date, ignore the rest of the buildId
|
||||
buildDate = buildId[:8]
|
||||
|
||||
# Load JSON payload
|
||||
payload = json.loads(value)
|
||||
|
||||
# Get OS, osVersion and architecture information
|
||||
try:
|
||||
i = payload['info']
|
||||
channel = i.get('appUpdateChannel', "too_old")
|
||||
OS = i['OS']
|
||||
appName = i['appName']
|
||||
reason = i['reason']
|
||||
osVersion = str(i['version'])
|
||||
#only care about major versions
|
||||
appVersion = i['appVersion'].split('.')[0]
|
||||
arch = i['arch']
|
||||
buildDate = i['appBuildID'][:8]
|
||||
info = payload['info']
|
||||
OS = info['OS']
|
||||
osVersion = str(info['version'])
|
||||
arch = info['arch']
|
||||
except (KeyError, IndexError, UnicodeEncodeError):
|
||||
if verbose:
|
||||
msg = "error while unpacking the payload"
|
||||
print >> sys.stderr, msg
|
||||
return
|
||||
|
||||
# TODO: histogram_specs should specify the list of versions/channels we
|
||||
# care about
|
||||
if not channel in ['release', 'aurora', 'nightly', 'beta', 'nightly-ux']:
|
||||
log("error while unpacking the payload")
|
||||
return
|
||||
|
||||
# todo combine OS + osVersion + santize on crazy platforms like linux to
|
||||
|
@ -80,226 +122,87 @@ def map(uid, line, context):
|
|||
if OS == "Linux":
|
||||
osVersion = osVersion[:3]
|
||||
|
||||
path = (buildDate, reason, appName, OS, osVersion, arch)
|
||||
# Sanitize path
|
||||
for val in path:
|
||||
if not isinstance(val, basestring) and type(val) in (int, float, long):
|
||||
if verbose:
|
||||
print >> sys.stderr, "Found type %s in path" % type(val)
|
||||
return
|
||||
# Create filter path
|
||||
filterPath = (buildDate, reason, appName, OS, osVersion, arch)
|
||||
|
||||
# Sanitize channel and appVersion
|
||||
for val in (channel, appVersion):
|
||||
if not isinstance(val, basestring) and type(val) in (int, float, long):
|
||||
if verbose:
|
||||
print >> sys.stderr, ("Found type %s in channel or appVersion" %
|
||||
type(val))
|
||||
return
|
||||
|
||||
histograms = payload.get('histograms', None)
|
||||
if histograms is None:
|
||||
histograms = {}
|
||||
if verbose:
|
||||
msg = "histograms is None in map"
|
||||
print >> sys.stderr, msg
|
||||
for h_name, h_values in histograms.iteritems():
|
||||
bucket2index = histogram_specs.get(h_name, None)
|
||||
if bucket2index is None:
|
||||
if verbose:
|
||||
msg = "bucket2index is None in map"
|
||||
print >> sys.stderr, msg
|
||||
# For each histogram
|
||||
for hgramName, hgramValues in payload.get('histograms', {}).iteritems():
|
||||
# Check that we have bucket information on this histogram
|
||||
bucket2index = specs.histograms.get(hgramName, None)
|
||||
if bucket2index == None:
|
||||
log("Missing bucket2index for %s", hgramName)
|
||||
continue
|
||||
else:
|
||||
bucket2index = bucket2index[0]
|
||||
|
||||
# most buckets contain 0s, so preallocation is a significant win
|
||||
outarray = [0] * (len(bucket2index) + 4)
|
||||
|
||||
index_error = False
|
||||
type_error = False
|
||||
if not isinstance(h_values, dict):
|
||||
if verbose:
|
||||
msg = "h_values is not a dictionary"
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
|
||||
try:
|
||||
values = h_values.get('values', None)
|
||||
except AttributeError:
|
||||
msg = "h_values was not a dict"
|
||||
print >> sys.stderr, msg
|
||||
return
|
||||
if values is None:
|
||||
continue
|
||||
for bucket, value in values.iteritems():
|
||||
index = bucket2index.get(bucket, None)
|
||||
if index is None:
|
||||
#print "%s's does not feature %s bucket in schema"
|
||||
# % (h_name, bucket)
|
||||
index_error = True
|
||||
break
|
||||
if type(value) not in (int, long, float):
|
||||
type_error = True
|
||||
if verbose:
|
||||
print >> sys.stderr, "Bad value type: %s " % repr(value)
|
||||
break
|
||||
outarray[index] = value
|
||||
if index_error:
|
||||
if verbose:
|
||||
msg = "index is None in map"
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
if type_error:
|
||||
if verbose:
|
||||
msg = "value is not int, long or float"
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
|
||||
histogram_sum = h_values.get('sum', None)
|
||||
if histogram_sum is None:
|
||||
if verbose:
|
||||
msg = "histogram_sum is None in map"
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
if type(histogram_sum) not in (int, long, float):
|
||||
if verbose:
|
||||
msg = ("histogram_sum is not int, long or float, but: %s" %
|
||||
type(histogram_sum))
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
# if statistics isn't available we just leave the two slots as zeroes
|
||||
if 'sum_squares_hi' in h_values and 'sum_squares_lo' in h_values:
|
||||
outarray[-4] = h_values.get('sum_squares_hi', 0)
|
||||
outarray[-3] = h_values.get('sum_squares_lo', 0)
|
||||
elif 'log_sum' in h_values and 'log_sum_squares' in h_values:
|
||||
outarray[-4] = h_values.get('log_sum', 0)
|
||||
outarray[-3] = h_values.get('log_sum_squares', 0)
|
||||
if type(outarray[-4]) not in (int, long, float):
|
||||
if verbose:
|
||||
print >> sys.stderr, ("sum_squares_hi or log_sum is type %s" %
|
||||
type(outarray[-4]))
|
||||
continue
|
||||
if type(outarray[-3]) not in (int, long, float):
|
||||
if verbose:
|
||||
msg = ("sum_squares_lo or log_sum_squares is type %s" %
|
||||
type(outarray[-3]))
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
outarray[-2] = histogram_sum
|
||||
outarray[-1] = 1 # count
|
||||
try:
|
||||
context.write((channel, appVersion, h_name), {path: outarray})
|
||||
except TypeError:
|
||||
dict_locations = [p for p, t in enumerate(path) if type(t) is dict]
|
||||
if dict_locations:
|
||||
field_names = ["buildDate", "reason", "appName", "OS",
|
||||
"osVersion", "arch"]
|
||||
dict_field_names = [field_names[i] for i in dict_locations]
|
||||
msg = ("unable to hash the following `path` fields: %s" %
|
||||
(' '.join(dict_field_names)))
|
||||
else:
|
||||
msg = "TypeError when writing map output."
|
||||
if verbose:
|
||||
print >> sys.stderr, msg
|
||||
continue
|
||||
|
||||
# Abort if bucket length doesn't match
|
||||
if len(hgramValues) == len(bucket2index[0]) + 5:
|
||||
write_to_cache((channel, majorVersion, hgramName),
|
||||
{filterPath: hgramValues + [1]}, context)
|
||||
|
||||
# Now read and output simple measures
|
||||
simple_measures = payload.get('simpleMeasurements', None)
|
||||
if simple_measures is None:
|
||||
if verbose:
|
||||
msg = "SimpleMeasures are missing..."
|
||||
print >> sys.stderr, msg
|
||||
return
|
||||
for sm_name, sm_value in simple_measures.iteritems():
|
||||
for name, value in payload.get('simpleMeasurements', {}).iteritems():
|
||||
# Handle cases where the value is a dictionary of simple measures
|
||||
if type(sm_value) == dict:
|
||||
for sub_name, sub_value in sm_value.iteritems():
|
||||
map_simplemeasure(channel, appVersion, path,
|
||||
sm_name + "_" + sub_name, sub_value, context)
|
||||
if type(value) == dict:
|
||||
for subName, subValue in value.iteritems():
|
||||
map_simplemeasure(channel, majorVersion, filterPath,
|
||||
name + "_" + str(subName), subValue, context)
|
||||
else:
|
||||
map_simplemeasure(channel, appVersion, path, sm_name, sm_value,
|
||||
context)
|
||||
|
||||
map_simplemeasure(channel, majorVersion, filterPath, str(name),
|
||||
value, context)
|
||||
|
||||
# Map a simple measure
|
||||
def map_simplemeasure(channel, appVersion, path, name, value, context):
|
||||
def map_simplemeasure(channel, majorVersion, filterPath, name, value, context):
|
||||
# Sanity check value
|
||||
if type(value) not in (int, long):
|
||||
if verbose:
|
||||
msg = ("%s is not a value type for simpleMeasurements \"%s\"" %
|
||||
(type(value), name))
|
||||
print >> sys.stderr, msg
|
||||
if type(value) not in (int, long, float):
|
||||
log("%s is not a value type for simpleMeasurements \"%s\"",
|
||||
type(value), name)
|
||||
return
|
||||
|
||||
bucket = simple_measures_buckets[1]
|
||||
outarray = [0] * (len(bucket) + 5)
|
||||
outarray = [0] * (len(bucket) + 6)
|
||||
for i in reversed(range(0, len(bucket))):
|
||||
if value >= bucket[i]:
|
||||
outarray[i] = 1
|
||||
break
|
||||
|
||||
log_val = math.log(math.fabs(value) + 1)
|
||||
outarray[-4] = log_val # log_sum
|
||||
outarray[-3] = log_val * log_val # log_sum_squares
|
||||
outarray[-2] = value # sum
|
||||
outarray[-6] = value # sum
|
||||
outarray[-5] = log_val # log_sum
|
||||
outarray[-4] = log_val * log_val # log_sum_squares
|
||||
outarray[-3] = 0 # sum_squares_lo
|
||||
outarray[-2] = 0 # sum_squares_hi
|
||||
outarray[-1] = 1 # count
|
||||
|
||||
# Output result array
|
||||
context.write((channel, appVersion, "SIMPLE_MEASURES_" + name.upper()),
|
||||
{path: outarray})
|
||||
|
||||
write_to_cache((channel, majorVersion, "SIMPLE_MEASURES_" + name.upper()),
|
||||
{filterPath: outarray}, context)
|
||||
|
||||
def commonCombine(values):
|
||||
out = {}
|
||||
output = {}
|
||||
for d in values:
|
||||
for filter_path, histogram in d.iteritems():
|
||||
existing = out.get(filter_path, None)
|
||||
for filterPath, hgramValues in d.iteritems():
|
||||
existing = output.get(filterPath, None)
|
||||
if existing is None:
|
||||
out[filter_path] = histogram
|
||||
output[filterPath] = hgramValues
|
||||
continue
|
||||
for y in range(0, len(histogram)):
|
||||
existing[y] += (histogram[y] or 0)
|
||||
return out
|
||||
|
||||
|
||||
def combine(key, values, context):
|
||||
out = commonCombine(values)
|
||||
context.write(key, out)
|
||||
|
||||
for y in xrange(0, len(hgramValues)):
|
||||
existing[y] += (hgramValues[y] or 0)
|
||||
return output
|
||||
|
||||
def reduce(key, values, context):
|
||||
out = commonCombine(values)
|
||||
out_values = {}
|
||||
h_name = key[2]
|
||||
for (filter_path, histogram) in out.iteritems():
|
||||
# first, discard any malformed (non int) entries, while allowing floats
|
||||
# in the statistics
|
||||
for i, val in enumerate(histogram):
|
||||
T = type(val)
|
||||
if T is not int:
|
||||
if T is float:
|
||||
if i is len(histogram) - 3 or i is len(histogram) - 4:
|
||||
continue # allow elements of stats to be floats
|
||||
msg = ("discarding %s - %s malformed type: %s on index %i" %
|
||||
('/'.join(filter_path), h_name, T, i))
|
||||
if verbose:
|
||||
print >> sys.stderr, msg
|
||||
return
|
||||
out_values["/".join(filter_path)] = histogram
|
||||
# Produce output ready for json serialization
|
||||
output = {}
|
||||
for filterPath, hgramValues in commonCombine(values).iteritems():
|
||||
output["/".join(filterPath)] = hgramValues
|
||||
|
||||
if h_name.startswith("SIMPLE_MEASURES_"):
|
||||
# Get histogram name
|
||||
hgramName = key[2]
|
||||
if hgramName.startswith("SIMPLE_MEASURES_"):
|
||||
buckets = simple_measures_buckets[1];
|
||||
else:
|
||||
# histogram_specs lookup below is guaranteed to succeed, because of mapper
|
||||
buckets = histogram_specs.get(h_name)[1]
|
||||
buckets = specs.histograms.get(hgramName)[1]
|
||||
|
||||
# Write final output
|
||||
final_out = {
|
||||
'buckets': buckets,
|
||||
'values': out_values
|
||||
'buckets': buckets,
|
||||
'values': output
|
||||
}
|
||||
context.write("/".join(key), json.dumps(final_out))
|
||||
|
||||
|
||||
def output(path, results):
|
||||
f = open(path, 'w')
|
||||
for k, v in results:
|
||||
f.write(k + "\t" + v + "\n")
|
||||
|
|
208
test.py
208
test.py
|
@ -1,208 +0,0 @@
|
|||
try:
|
||||
import simplejson as json
|
||||
except ImportError:
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
|
||||
# Import histogram specs and generated by makefile using specgen.py
|
||||
import specs
|
||||
|
||||
# Counts number of times we've printed a log message
|
||||
logMsgCount = {}
|
||||
|
||||
# Auxiliary method to write log messages
|
||||
def log(msg, *args):
|
||||
# We only print a log message the first 10 times we see it
|
||||
n = logMsgCount.get(msg, 10)
|
||||
if n > 0:
|
||||
logMsgCount[msg] = n - 1
|
||||
print >> sys.stderr, msg % args
|
||||
|
||||
# Auxiliary method for computing bucket offsets from parameters, it is stolen
|
||||
# from histogram_tools.py, though slightly modified...
|
||||
def exponential_buckets(dmin, dmax, n_buckets):
|
||||
log_max = math.log(dmax);
|
||||
ret_array = [0] * n_buckets
|
||||
current = dmin
|
||||
ret_array[1] = current
|
||||
for bucket_index in range(2, n_buckets):
|
||||
log_current = math.log(current)
|
||||
log_ratio = (log_max - log_current) / (n_buckets - bucket_index)
|
||||
log_next = log_current + log_ratio
|
||||
next_value = int(math.floor(math.exp(log_next) + 0.5))
|
||||
if next_value > current:
|
||||
current = next_value
|
||||
else:
|
||||
current = current + 1
|
||||
ret_array[bucket_index] = current
|
||||
return ret_array
|
||||
|
||||
# Create buckets from buckets2index from ranges... snippet pretty much stolen
|
||||
# from specgen.py
|
||||
def buckets2index_from_ranges(ranges):
|
||||
buckets = map(str, ranges)
|
||||
bucket2index = {}
|
||||
for i in range(0, len(buckets)):
|
||||
bucket2index[buckets[i]] = i
|
||||
return bucket2index
|
||||
|
||||
# Bucket offsets for simple measures
|
||||
simple_measures_buckets = (
|
||||
buckets2index_from_ranges(
|
||||
exponential_buckets(1, 30000, 50)),
|
||||
exponential_buckets(1, 30000, 50)
|
||||
)
|
||||
|
||||
# Cache of all output values, we do a linear scan so we won't write any thing
|
||||
# until map_finished() gets called... This is hack that makes linear scans
|
||||
# a lot faster :)
|
||||
cache = {}
|
||||
|
||||
def map_finished(context):
|
||||
log("map_finished outputing: %s keys", len(cache))
|
||||
global cache
|
||||
for key, value in cache.iteritems():
|
||||
context.write(key, value)
|
||||
cache = {}
|
||||
|
||||
# Auxiliary function for aggregating a result to the cache, we pass in context
|
||||
# so we can skip caching here should we ever want to do this
|
||||
def write_to_cache(key, value, context):
|
||||
global cache
|
||||
cachedValue = cache.get(key, None)
|
||||
if cachedValue is None:
|
||||
cache[key] = value
|
||||
else:
|
||||
for filterPath, hgramValues in value.iteritems():
|
||||
existing = cachedValue.get(filterPath, None)
|
||||
if existing is None:
|
||||
cachedValue[filterPath] = hgramValues
|
||||
continue
|
||||
for y in xrange(0, len(hgramValues)):
|
||||
existing[y] += (hgramValues[y] or 0)
|
||||
|
||||
# histogram incoming format:
|
||||
# [
|
||||
# bucket0, bucket1, ..., bucketN,
|
||||
# sum, log_sum, log_sum_squares, sum_squares_lo, sum_squares_hi
|
||||
# ]
|
||||
# Aggregated histogram format:
|
||||
# [
|
||||
# bucket0, bucket1, ..., bucketN,
|
||||
# sum, log_sum, log_sum_squares, sum_squares_lo, sum_squares_hi, count
|
||||
# ]
|
||||
# where count is the number of histograms aggregated in the histogram.
|
||||
|
||||
def map(key, dims, value, context):
|
||||
# Unpack dimensions
|
||||
reason, appName, channel, version, buildId, submissionDate = dims
|
||||
|
||||
# Get the major version
|
||||
majorVersion = version.split('.')[0]
|
||||
|
||||
# Get the build date, ignore the rest of the buildId
|
||||
buildDate = buildId[:8]
|
||||
|
||||
# Load JSON payload
|
||||
payload = json.loads(value)
|
||||
|
||||
# Get OS, osVersion and architecture information
|
||||
try:
|
||||
info = payload['info']
|
||||
OS = info['OS']
|
||||
osVersion = str(info['version'])
|
||||
arch = info['arch']
|
||||
except (KeyError, IndexError, UnicodeEncodeError):
|
||||
log("error while unpacking the payload")
|
||||
return
|
||||
|
||||
# todo combine OS + osVersion + santize on crazy platforms like linux to
|
||||
# reduce pointless choices
|
||||
if OS == "Linux":
|
||||
osVersion = osVersion[:3]
|
||||
|
||||
# Create filter path
|
||||
filterPath = (buildDate, reason, appName, OS, osVersion, arch)
|
||||
|
||||
# For each histogram
|
||||
for hgramName, hgramValues in payload.get('histograms', {}).iteritems():
|
||||
# Check that we have bucket information on this histogram
|
||||
bucket2index = specs.histograms.get(hgramName, None)
|
||||
if bucket2index == None:
|
||||
log("Missing bucket2index for %s", hgramName)
|
||||
continue
|
||||
# Abort if bucket length doesn't match
|
||||
if len(hgramValues) == len(bucket2index[0]) + 5:
|
||||
write_to_cache((channel, majorVersion, hgramName),
|
||||
{filterPath: hgramValues + [1]}, context)
|
||||
|
||||
# Now read and output simple measures
|
||||
for name, value in payload.get('simpleMeasurements', {}).iteritems():
|
||||
# Handle cases where the value is a dictionary of simple measures
|
||||
if type(value) == dict:
|
||||
for subName, subValue in value.iteritems():
|
||||
map_simplemeasure(channel, majorVersion, filterPath,
|
||||
name + "_" + str(subName), subValue, context)
|
||||
else:
|
||||
map_simplemeasure(channel, majorVersion, filterPath, str(name),
|
||||
value, context)
|
||||
|
||||
# Map a simple measure
|
||||
def map_simplemeasure(channel, majorVersion, filterPath, name, value, context):
|
||||
# Sanity check value
|
||||
if type(value) not in (int, long, float):
|
||||
log("%s is not a value type for simpleMeasurements \"%s\"",
|
||||
type(value), name)
|
||||
return
|
||||
|
||||
bucket = simple_measures_buckets[1]
|
||||
outarray = [0] * (len(bucket) + 6)
|
||||
for i in reversed(range(0, len(bucket))):
|
||||
if value >= bucket[i]:
|
||||
outarray[i] = 1
|
||||
break
|
||||
|
||||
log_val = math.log(math.fabs(value) + 1)
|
||||
outarray[-6] = value # sum
|
||||
outarray[-5] = log_val # log_sum
|
||||
outarray[-4] = log_val * log_val # log_sum_squares
|
||||
outarray[-3] = 0 # sum_squares_lo
|
||||
outarray[-2] = 0 # sum_squares_hi
|
||||
outarray[-1] = 1 # count
|
||||
|
||||
# Output result array
|
||||
write_to_cache((channel, majorVersion, "SIMPLE_MEASURES_" + name.upper()),
|
||||
{filterPath: outarray}, context)
|
||||
|
||||
def commonCombine(values):
|
||||
output = {}
|
||||
for d in values:
|
||||
for filterPath, hgramValues in d.iteritems():
|
||||
existing = output.get(filterPath, None)
|
||||
if existing is None:
|
||||
output[filterPath] = hgramValues
|
||||
continue
|
||||
for y in xrange(0, len(hgramValues)):
|
||||
existing[y] += (hgramValues[y] or 0)
|
||||
return output
|
||||
|
||||
def reduce(key, values, context):
|
||||
# Produce output ready for json serialization
|
||||
output = {}
|
||||
for filterPath, hgramValues in commonCombine(values).iteritems():
|
||||
output["/".join(filterPath)] = hgramValues
|
||||
|
||||
# Get histogram name
|
||||
hgramName = key[2]
|
||||
if hgramName.startswith("SIMPLE_MEASURES_"):
|
||||
buckets = simple_measures_buckets[1];
|
||||
else:
|
||||
buckets = specs.histograms.get(hgramName)[1]
|
||||
|
||||
# Write final output
|
||||
final_out = {
|
||||
'buckets': buckets,
|
||||
'values': output
|
||||
}
|
||||
context.write("/".join(key), json.dumps(final_out))
|
Загрузка…
Ссылка в новой задаче