renaming result_postprocess to parts_post_process

This commit is contained in:
diana 2012-03-22 11:29:00 -04:00
Родитель cac6a10ba7
Коммит ee3d344ce9
7 изменённых файлов: 58 добавлений и 51 удалений

Просмотреть файл

@ -64,10 +64,10 @@ Output
Run the last name counting map/reduce job::
diana@ubuntu:~$ inferno -i names.last_names_json
2012-03-09 INFO [inferno.lib.job] Processing tags: ['example:chunk:users']
2012-03-09 INFO [inferno.lib.job] Started job last_names_json@533:40914:c355f processing 1 blobs
2012-03-09 INFO [inferno.lib.job] Done waiting for job last_names_json@533:40914:c355f
2012-03-09 INFO [inferno.lib.job] Finished job job last_names_json@533:40914:c355f
2012-03-09 Processing tags: ['example:chunk:users']
2012-03-09 Started job last_names_json@533:40914:c355f processing 1 blobs
2012-03-09 Done waiting for job last_names_json@533:40914:c355f
2012-03-09 Finished job job last_names_json@533:40914:c355f
The output::

Просмотреть файл

@ -44,11 +44,11 @@ Contributions by Candidate
Run the contributions_by_candidate_name map/reduce job::
diana@ubuntu:~$ inferno -i election.presidential_2012.contributions_by_candidate_name
2012-03-19 INFO [inferno.lib.job] Processing tags: ['gov:chunk:presidential_campaign_finance']
2012-03-19 INFO [inferno.lib.job] Started job presidential_2012@533:87210:81a1b processing 1 blobs
2012-03-19 INFO [inferno.lib.job] Done waiting for job presidential_2012@533:87210:81a1b
2012-03-19 INFO [inferno.lib.job] Finished job presidential_2012@533:87210:81a1b
diana@ubuntu:~$ inferno -i election.presidential_2012.by_candidate
2012-03-19 Processing tags: ['gov:chunk:presidential_campaign_finance']
2012-03-19 Started job presidential_2012@533:87210:81a1b processing 1 blobs
2012-03-19 Done waiting for job presidential_2012@533:87210:81a1b
2012-03-19 Finished job presidential_2012@533:87210:81a1b
The output in CSV::
@ -80,32 +80,34 @@ Contributions by Occupation
Run the contributions_by_occupation_and_candidate_name map/reduce job::
diana@ubuntu:~$ inferno -i election.presidential_2012.contributions_by_occupation_and_candidate_name > occupations.csv
2012-03-19 INFO [inferno.lib.job] Processing tags: ['gov:chunk:presidential_campaign_finance']
2012-03-19 INFO [inferno.lib.job] Started job presidential_2012@533:87782:c7c98 processing 1 blobs
2012-03-19 INFO [inferno.lib.job] Done waiting for job presidential_2012@533:87782:c7c98
2012-03-19 INFO [inferno.lib.job] Finished job presidential_2012@533:87782:c7c98
diana@ubuntu:~$ inferno -i election.presidential_2012.by_occupation > occupations.csv
2012-03-19 Processing tags: ['gov:chunk:presidential_campaign_finance']
2012-03-19 Started job presidential_2012@533:87782:c7c98 processing 1 blobs
2012-03-19 Done waiting for job presidential_2012@533:87782:c7c98
2012-03-19 Finished job presidential_2012@533:87782:c7c98
The output::
diana@ubuntu:~$ tail -n 20 occupations.csv
YOUTH CARE WORKER,"Paul, Ron",7,268.96
YOUTH CAREER SPECIALIST,"Obama, Barack",3,96.0
YOUTH DEVELOPMENT,"Obama, Barack",5,450.0
YOUTH DIRECTOR,"Obama, Barack",5,550.0
YOUTH MINISTER,"Obama, Barack",3,275.0
YOUTH MINISTER,"Paul, Ron",6,230.24
YOUTH MINISTER,"Santorum, Rick",1,250.0
YOUTH MINISTRY DIRECTOR,"Paul, Ron",2,150.0
YOUTH OUTREACH DIRECTOR,"Romney, Mitt",1,1000.0
YOUTH PROGRAMS DIRECTOR,"Obama, Barack",6,130.0
YOUTH SERVICE COORDINATOR,"Obama, Barack",5,350.0
YOUTH SERVICES LIBRARIAN,"Obama, Barack",3,290.0
YOUTH SPECIALIST,"Obama, Barack",4,525.0
YOUTH WORKER,"Paul, Ron",8,595.12
ZEN BUDDHIST PRIEST,"Obama, Barack",1,300.0
ZEPPOS AND ASSOCIATES,"Obama, Barack",1,1000.0
ZIG ZAG RESTAURANT GROUP,"Paul, Ron",5,950.0
ZIMMERMANS DAIRY,"Paul, Ron",5,83.71
ZOMBIE SLAYER,"Paul, Ron",8,1556.0
ZOOLOGIST,"Obama, Barack",1,100.0
diana@ubuntu:~$ grep retired occupations.csv
retired,gingrich newt,8810,2279602.27
retired,obama barack,74465,15086766.92
retired,paul ron,9373,1800563.88
retired,romney mitt,12798,6483596.24
retired,santorum rick,1752,421952.98
The output as a table:
+------------+---------------+--------+-----------------+
| Occupation | Candidate | Count | Amount |
+============+===============+========+=================+
| retired | Obama Barack | 74,465 | $ 15,086,766.92 |
+------------+---------------+--------+-----------------+
| retired | Romney Mitt | 12,798 | $ 6,483,596.24 |
+------------+---------------+--------+-----------------+
| retired | Gingrich Newt | 8,810 | $ 2,279,602.27 |
+------------+---------------+--------+-----------------+
| retired | Paul Ron | 9,373 | $ 1,800,563.88 |
+------------+---------------+--------+-----------------+
| retired | Santorum Rick | 1,752 | $ 421,952.98 |
+------------+---------------+--------+-----------------+

Просмотреть файл

@ -26,6 +26,11 @@ def candidate_filter(parts, params):
yield parts
def occupation_count_filter(parts, params):
if parts['count_occupation_candidate'] > 1000:
yield parts
RULES = [
InfernoRule(
name='presidential_2012',
@ -36,9 +41,8 @@ RULES = [
'cand_nm':alphanumeric,
'contbr_occupation':alphanumeric,
},
parts_preprocess=[
candidate_filter,
count],
parts_preprocess=[candidate_filter, count],
parts_postprocess=[occupation_count_filter],
csv_fields=(
'cmte_id', 'cand_id', 'cand_nm', 'contbr_nm', 'contbr_city',
'contbr_st', 'contbr_zip', 'contbr_employer', 'contbr_occupation',
@ -47,7 +51,7 @@ RULES = [
),
csv_dialect='excel',
keysets={
'contributions_by_candidate_name':Keyset(
'by_candidate':Keyset(
key_parts=['cand_nm'],
value_parts=['count', 'contb_receipt_amt'],
column_mappings={
@ -55,10 +59,11 @@ RULES = [
'contb_receipt_amt': 'amount',
},
),
'contributions_by_occupation_and_candidate_name':Keyset(
'by_occupation':Keyset(
key_parts=['contbr_occupation', 'cand_nm'],
value_parts=['count', 'contb_receipt_amt'],
column_mappings={
'count': 'count_occupation_candidate',
'cand_nm': 'candidate',
'contb_receipt_amt': 'amount',
'contbr_occupation': 'occupation',

Просмотреть файл

@ -11,10 +11,10 @@ def keyset_result(iter, params, **kwargs):
return name
def _post_process(parts_list, params):
if hasattr(params, 'result_postprocess'):
if hasattr(params, 'parts_postprocess'):
# each post-processor may generate multiple 'parts',
# these need to be fed into subsequent post-processors
for name in params.result_postprocess:
for name in params.parts_postprocess:
func = getattr(params, name)
new_list = []
for parts in parts_list:

Просмотреть файл

@ -100,7 +100,7 @@ class InfernoRule(object):
# other
rule_init_function=None,
parts_preprocess=None,
result_postprocess=None,
parts_postprocess=None,
field_transforms=None,
**kwargs):
@ -181,13 +181,13 @@ class InfernoRule(object):
self.params.parts_preprocess = []
# postprocess
if result_postprocess:
self.params.result_postprocess = map(
lambda func: func.__name__, result_postprocess)
for func in result_postprocess:
if parts_postprocess:
self.params.parts_postprocess = map(
lambda func: func.__name__, parts_postprocess)
for func in parts_postprocess:
self.params.__setattr__(func.__name__, func)
else:
self.params.result_postprocess = []
self.params.parts_postprocess = []
# transforms
if field_transforms:
@ -224,4 +224,4 @@ class InfernoRule(object):
reduce_function=fstr(self.reduce_function),
keysets=self.params.keysets,
parts_preprocess=self.params.parts_preprocess,
result_postprocess=self.params.result_postprocess)
parts_postprocess=self.params.parts_postprocess)

Просмотреть файл

@ -93,7 +93,7 @@ class TestRuleHandler(DiscoBallHandlerTestCase):
'map_function': 'inferno.lib.map.keyset_map',
'reduce_function': 'inferno.lib.reduce.keyset_reduce',
'parts_preprocess': [],
'result_postprocess': [],
'parts_postprocess': [],
'keysets': {
'keyset_1': {
'key_parts': ['_keyset', 'key_1'],

Просмотреть файл

@ -68,7 +68,7 @@ class TestKeysetResult(object):
'Martin,30']
mapping = {'count': 'last_name_count'}
self.params.keysets['last_name_keyset']['column_mappings'] = mapping
self.params.result_postprocess = ['some_filter', 'some_expander']
self.params.parts_postprocess = ['some_filter', 'some_expander']
self.params.__setattr__('some_filter', some_filter)
self.params.__setattr__('some_expander', some_expander)
self._assert_keyset_result(data, self.params, expected)