зеркало из https://github.com/mozilla/inferno.git
renaming result_postprocess to parts_post_process
This commit is contained in:
Родитель
cac6a10ba7
Коммит
ee3d344ce9
|
@ -64,10 +64,10 @@ Output
|
|||
Run the last name counting map/reduce job::
|
||||
|
||||
diana@ubuntu:~$ inferno -i names.last_names_json
|
||||
2012-03-09 INFO [inferno.lib.job] Processing tags: ['example:chunk:users']
|
||||
2012-03-09 INFO [inferno.lib.job] Started job last_names_json@533:40914:c355f processing 1 blobs
|
||||
2012-03-09 INFO [inferno.lib.job] Done waiting for job last_names_json@533:40914:c355f
|
||||
2012-03-09 INFO [inferno.lib.job] Finished job job last_names_json@533:40914:c355f
|
||||
2012-03-09 Processing tags: ['example:chunk:users']
|
||||
2012-03-09 Started job last_names_json@533:40914:c355f processing 1 blobs
|
||||
2012-03-09 Done waiting for job last_names_json@533:40914:c355f
|
||||
2012-03-09 Finished job job last_names_json@533:40914:c355f
|
||||
|
||||
The output::
|
||||
|
||||
|
|
|
@ -44,11 +44,11 @@ Contributions by Candidate
|
|||
|
||||
Run the contributions_by_candidate_name map/reduce job::
|
||||
|
||||
diana@ubuntu:~$ inferno -i election.presidential_2012.contributions_by_candidate_name
|
||||
2012-03-19 INFO [inferno.lib.job] Processing tags: ['gov:chunk:presidential_campaign_finance']
|
||||
2012-03-19 INFO [inferno.lib.job] Started job presidential_2012@533:87210:81a1b processing 1 blobs
|
||||
2012-03-19 INFO [inferno.lib.job] Done waiting for job presidential_2012@533:87210:81a1b
|
||||
2012-03-19 INFO [inferno.lib.job] Finished job presidential_2012@533:87210:81a1b
|
||||
diana@ubuntu:~$ inferno -i election.presidential_2012.by_candidate
|
||||
2012-03-19 Processing tags: ['gov:chunk:presidential_campaign_finance']
|
||||
2012-03-19 Started job presidential_2012@533:87210:81a1b processing 1 blobs
|
||||
2012-03-19 Done waiting for job presidential_2012@533:87210:81a1b
|
||||
2012-03-19 Finished job presidential_2012@533:87210:81a1b
|
||||
|
||||
The output in CSV::
|
||||
|
||||
|
@ -80,32 +80,34 @@ Contributions by Occupation
|
|||
|
||||
Run the contributions_by_occupation_and_candidate_name map/reduce job::
|
||||
|
||||
diana@ubuntu:~$ inferno -i election.presidential_2012.contributions_by_occupation_and_candidate_name > occupations.csv
|
||||
2012-03-19 INFO [inferno.lib.job] Processing tags: ['gov:chunk:presidential_campaign_finance']
|
||||
2012-03-19 INFO [inferno.lib.job] Started job presidential_2012@533:87782:c7c98 processing 1 blobs
|
||||
2012-03-19 INFO [inferno.lib.job] Done waiting for job presidential_2012@533:87782:c7c98
|
||||
2012-03-19 INFO [inferno.lib.job] Finished job presidential_2012@533:87782:c7c98
|
||||
diana@ubuntu:~$ inferno -i election.presidential_2012.by_occupation > occupations.csv
|
||||
2012-03-19 Processing tags: ['gov:chunk:presidential_campaign_finance']
|
||||
2012-03-19 Started job presidential_2012@533:87782:c7c98 processing 1 blobs
|
||||
2012-03-19 Done waiting for job presidential_2012@533:87782:c7c98
|
||||
2012-03-19 Finished job presidential_2012@533:87782:c7c98
|
||||
|
||||
The output::
|
||||
|
||||
diana@ubuntu:~$ tail -n 20 occupations.csv
|
||||
YOUTH CARE WORKER,"Paul, Ron",7,268.96
|
||||
YOUTH CAREER SPECIALIST,"Obama, Barack",3,96.0
|
||||
YOUTH DEVELOPMENT,"Obama, Barack",5,450.0
|
||||
YOUTH DIRECTOR,"Obama, Barack",5,550.0
|
||||
YOUTH MINISTER,"Obama, Barack",3,275.0
|
||||
YOUTH MINISTER,"Paul, Ron",6,230.24
|
||||
YOUTH MINISTER,"Santorum, Rick",1,250.0
|
||||
YOUTH MINISTRY DIRECTOR,"Paul, Ron",2,150.0
|
||||
YOUTH OUTREACH DIRECTOR,"Romney, Mitt",1,1000.0
|
||||
YOUTH PROGRAMS DIRECTOR,"Obama, Barack",6,130.0
|
||||
YOUTH SERVICE COORDINATOR,"Obama, Barack",5,350.0
|
||||
YOUTH SERVICES LIBRARIAN,"Obama, Barack",3,290.0
|
||||
YOUTH SPECIALIST,"Obama, Barack",4,525.0
|
||||
YOUTH WORKER,"Paul, Ron",8,595.12
|
||||
ZEN BUDDHIST PRIEST,"Obama, Barack",1,300.0
|
||||
ZEPPOS AND ASSOCIATES,"Obama, Barack",1,1000.0
|
||||
ZIG ZAG RESTAURANT GROUP,"Paul, Ron",5,950.0
|
||||
ZIMMERMANS DAIRY,"Paul, Ron",5,83.71
|
||||
ZOMBIE SLAYER,"Paul, Ron",8,1556.0
|
||||
ZOOLOGIST,"Obama, Barack",1,100.0
|
||||
diana@ubuntu:~$ grep retired occupations.csv
|
||||
retired,gingrich newt,8810,2279602.27
|
||||
retired,obama barack,74465,15086766.92
|
||||
retired,paul ron,9373,1800563.88
|
||||
retired,romney mitt,12798,6483596.24
|
||||
retired,santorum rick,1752,421952.98
|
||||
|
||||
The output as a table:
|
||||
|
||||
+------------+---------------+--------+-----------------+
|
||||
| Occupation | Candidate | Count | Amount |
|
||||
+============+===============+========+=================+
|
||||
| retired | Obama Barack | 74,465 | $ 15,086,766.92 |
|
||||
+------------+---------------+--------+-----------------+
|
||||
| retired | Romney Mitt | 12,798 | $ 6,483,596.24 |
|
||||
+------------+---------------+--------+-----------------+
|
||||
| retired | Gingrich Newt | 8,810 | $ 2,279,602.27 |
|
||||
+------------+---------------+--------+-----------------+
|
||||
| retired | Paul Ron | 9,373 | $ 1,800,563.88 |
|
||||
+------------+---------------+--------+-----------------+
|
||||
| retired | Santorum Rick | 1,752 | $ 421,952.98 |
|
||||
+------------+---------------+--------+-----------------+
|
||||
|
||||
|
|
|
@ -26,6 +26,11 @@ def candidate_filter(parts, params):
|
|||
yield parts
|
||||
|
||||
|
||||
def occupation_count_filter(parts, params):
|
||||
if parts['count_occupation_candidate'] > 1000:
|
||||
yield parts
|
||||
|
||||
|
||||
RULES = [
|
||||
InfernoRule(
|
||||
name='presidential_2012',
|
||||
|
@ -36,9 +41,8 @@ RULES = [
|
|||
'cand_nm':alphanumeric,
|
||||
'contbr_occupation':alphanumeric,
|
||||
},
|
||||
parts_preprocess=[
|
||||
candidate_filter,
|
||||
count],
|
||||
parts_preprocess=[candidate_filter, count],
|
||||
parts_postprocess=[occupation_count_filter],
|
||||
csv_fields=(
|
||||
'cmte_id', 'cand_id', 'cand_nm', 'contbr_nm', 'contbr_city',
|
||||
'contbr_st', 'contbr_zip', 'contbr_employer', 'contbr_occupation',
|
||||
|
@ -47,7 +51,7 @@ RULES = [
|
|||
),
|
||||
csv_dialect='excel',
|
||||
keysets={
|
||||
'contributions_by_candidate_name':Keyset(
|
||||
'by_candidate':Keyset(
|
||||
key_parts=['cand_nm'],
|
||||
value_parts=['count', 'contb_receipt_amt'],
|
||||
column_mappings={
|
||||
|
@ -55,10 +59,11 @@ RULES = [
|
|||
'contb_receipt_amt': 'amount',
|
||||
},
|
||||
),
|
||||
'contributions_by_occupation_and_candidate_name':Keyset(
|
||||
'by_occupation':Keyset(
|
||||
key_parts=['contbr_occupation', 'cand_nm'],
|
||||
value_parts=['count', 'contb_receipt_amt'],
|
||||
column_mappings={
|
||||
'count': 'count_occupation_candidate',
|
||||
'cand_nm': 'candidate',
|
||||
'contb_receipt_amt': 'amount',
|
||||
'contbr_occupation': 'occupation',
|
||||
|
|
|
@ -11,10 +11,10 @@ def keyset_result(iter, params, **kwargs):
|
|||
return name
|
||||
|
||||
def _post_process(parts_list, params):
|
||||
if hasattr(params, 'result_postprocess'):
|
||||
if hasattr(params, 'parts_postprocess'):
|
||||
# each post-processor may generate multiple 'parts',
|
||||
# these need to be fed into subsequent post-processors
|
||||
for name in params.result_postprocess:
|
||||
for name in params.parts_postprocess:
|
||||
func = getattr(params, name)
|
||||
new_list = []
|
||||
for parts in parts_list:
|
||||
|
|
|
@ -100,7 +100,7 @@ class InfernoRule(object):
|
|||
# other
|
||||
rule_init_function=None,
|
||||
parts_preprocess=None,
|
||||
result_postprocess=None,
|
||||
parts_postprocess=None,
|
||||
field_transforms=None,
|
||||
**kwargs):
|
||||
|
||||
|
@ -181,13 +181,13 @@ class InfernoRule(object):
|
|||
self.params.parts_preprocess = []
|
||||
|
||||
# postprocess
|
||||
if result_postprocess:
|
||||
self.params.result_postprocess = map(
|
||||
lambda func: func.__name__, result_postprocess)
|
||||
for func in result_postprocess:
|
||||
if parts_postprocess:
|
||||
self.params.parts_postprocess = map(
|
||||
lambda func: func.__name__, parts_postprocess)
|
||||
for func in parts_postprocess:
|
||||
self.params.__setattr__(func.__name__, func)
|
||||
else:
|
||||
self.params.result_postprocess = []
|
||||
self.params.parts_postprocess = []
|
||||
|
||||
# transforms
|
||||
if field_transforms:
|
||||
|
@ -224,4 +224,4 @@ class InfernoRule(object):
|
|||
reduce_function=fstr(self.reduce_function),
|
||||
keysets=self.params.keysets,
|
||||
parts_preprocess=self.params.parts_preprocess,
|
||||
result_postprocess=self.params.result_postprocess)
|
||||
parts_postprocess=self.params.parts_postprocess)
|
||||
|
|
|
@ -93,7 +93,7 @@ class TestRuleHandler(DiscoBallHandlerTestCase):
|
|||
'map_function': 'inferno.lib.map.keyset_map',
|
||||
'reduce_function': 'inferno.lib.reduce.keyset_reduce',
|
||||
'parts_preprocess': [],
|
||||
'result_postprocess': [],
|
||||
'parts_postprocess': [],
|
||||
'keysets': {
|
||||
'keyset_1': {
|
||||
'key_parts': ['_keyset', 'key_1'],
|
||||
|
|
|
@ -68,7 +68,7 @@ class TestKeysetResult(object):
|
|||
'Martin,30']
|
||||
mapping = {'count': 'last_name_count'}
|
||||
self.params.keysets['last_name_keyset']['column_mappings'] = mapping
|
||||
self.params.result_postprocess = ['some_filter', 'some_expander']
|
||||
self.params.parts_postprocess = ['some_filter', 'some_expander']
|
||||
self.params.__setattr__('some_filter', some_filter)
|
||||
self.params.__setattr__('some_expander', some_expander)
|
||||
self._assert_keyset_result(data, self.params, expected)
|
||||
|
|
Загрузка…
Ссылка в новой задаче