Merge pull request #11 from mmmavis/issue-10-parse-data-from-new-sheet-template
Parse data from the new Google Spreadsheet template
This commit is contained in:
Коммит
69279b0e99
|
@ -0,0 +1,3 @@
|
|||
venv
|
||||
.env
|
||||
*.json
|
|
@ -12,8 +12,8 @@ from oauth2client.client import SignedJwtAssertionCredentials
|
|||
|
||||
GITHUB_CONFIG = {
|
||||
'TOKEN': os.environ['GITHUB_TOKEN'],
|
||||
'REPO_OWNER': 'mozilla',
|
||||
'REPO_NAME': 'mozfest-schedule-app',
|
||||
'REPO_OWNER': os.environ['REPO_OWNER'],
|
||||
'REPO_NAME': os.environ['REPO_NAME'],
|
||||
'TARGET_FILE': 'sessions.json',
|
||||
'TARGET_BRANCHES': ['gh-pages',],
|
||||
}
|
||||
|
@ -29,10 +29,11 @@ GOOGLE_API_CONFIG = {
|
|||
GOOGLE_SPREADSHEET_KEY = os.environ['GOOGLE_SPREADSHEET_KEY'] or ''
|
||||
|
||||
FETCH_MULTIPLE_WORKSHEETS = True
|
||||
WORKSHEETS_TO_FETCH = ['For All Participants', 'Italian', 'Japanese']
|
||||
WORKSHEETS_TO_SKIP = ['Template', '(backup) original imported data']
|
||||
|
||||
MAKE_LOCAL_JSON = False
|
||||
COMMIT_JSON_TO_GITHUB = True
|
||||
MAKE_LOCAL_JSON = True
|
||||
COMMIT_JSON_TO_GITHUB = False
|
||||
|
||||
|
||||
def authenticate_with_google():
|
||||
|
@ -58,26 +59,82 @@ def open_google_spreadsheet():
|
|||
def fetch_data(multiple_sheets=False, worksheets_to_skip=[]):
|
||||
spreadsheet = open_google_spreadsheet()
|
||||
|
||||
data = {
|
||||
'timeblocks': fetch_worksheets(spreadsheet, multiple_sheets, ['* Timeblock Values']),
|
||||
'sessions': fetch_worksheets(spreadsheet, multiple_sheets, WORKSHEETS_TO_FETCH)
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
def fetch_worksheets(spreadsheet, multiple_sheets=False, worksheets_to_fetch=[]):
|
||||
if not multiple_sheets:
|
||||
# Return data from first worksheet in Google spreadsheet.
|
||||
worksheet = spreadsheet.get_worksheet(0)
|
||||
data = worksheet.get_all_records(empty2zero=False)
|
||||
|
||||
else:
|
||||
# Return data from all worksheets in Google spreadsheet, optionally
|
||||
# skipping sheets identified by title in `WORKSHEETS_TO_SKIP`.
|
||||
# Return data from all worksheets in worksheets_to_fetch
|
||||
data = []
|
||||
|
||||
worksheet_list = [
|
||||
sheet for sheet in spreadsheet.worksheets() if sheet.title not in WORKSHEETS_TO_SKIP
|
||||
sheet for sheet in spreadsheet.worksheets() if sheet.title in worksheets_to_fetch
|
||||
]
|
||||
|
||||
for worksheet in worksheet_list:
|
||||
worksheet.title
|
||||
data.extend(worksheet.get_all_records(empty2zero=False))
|
||||
|
||||
return data
|
||||
return data
|
||||
|
||||
def transform_data(data):
|
||||
def slugify_timeblock(timeblock):
|
||||
time_data = timeblock.split('(')
|
||||
# "slugified" version of timeblock
|
||||
timeblock = time_data[0].strip()
|
||||
timeblock = timeblock.lower().replace(' ','-').replace(',','').replace(':','-').replace('*','')
|
||||
return timeblock
|
||||
|
||||
def transform_timeblock_data(data):
|
||||
def _transform_response_item(item, skip=False):
|
||||
# make sure vars are strings
|
||||
_transformed_item = {k: unicode(v) for k, v in item.iteritems() if k}
|
||||
|
||||
if 'order' in _transformed_item:
|
||||
_transformed_item['order'] = _transformed_item.pop('order', '')
|
||||
|
||||
# remove rows with `id` that is blank or provides instructions
|
||||
try:
|
||||
int(_transformed_item['order'])
|
||||
except:
|
||||
skip = True
|
||||
|
||||
#
|
||||
if 'reserved for everyone' in _transformed_item:
|
||||
reserved_for_everyone = _transformed_item.pop('reserved for everyone', '')
|
||||
if reserved_for_everyone:
|
||||
_transformed_item['reserved for everyone'] = reserved_for_everyone
|
||||
|
||||
# transform `Auto Generated. Do Not Modify.` column name into `key` key
|
||||
if 'Auto Generated. Do Not Modify.' in _transformed_item:
|
||||
_transformed_item['key'] = slugify_timeblock(_transformed_item.pop('Auto Generated. Do Not Modify.', ''))
|
||||
|
||||
# if we've triggered the skip flag anywhere, drop this record
|
||||
if skip:
|
||||
_transformed_item = None
|
||||
|
||||
return _transformed_item
|
||||
|
||||
# empty list to hold any items we need to duplicate
|
||||
cloned_data = []
|
||||
# pass initial data through the transformer
|
||||
transformed_data = filter(None, [_transform_response_item(item) for item in data])
|
||||
# and add in any items we had to duplicate
|
||||
transformed_data.extend(
|
||||
filter(None, [_transform_response_item(item) for item in cloned_data])
|
||||
)
|
||||
|
||||
return transformed_data
|
||||
|
||||
def transform_session_data(data):
|
||||
'''
|
||||
Transforms data and filters individual schedule items for fields we want
|
||||
to publish. Currently, this:
|
||||
|
@ -89,7 +146,7 @@ def transform_data(data):
|
|||
* removes any rows that don't have a numeric `id`
|
||||
* creates a concatenated `facilitators` key
|
||||
* removes invalid pathway labels that were used for GitHub workflow
|
||||
* creates a `scheduleblock` key based on data in `time` column
|
||||
* creates a `timeblock` key based on data in `time` column
|
||||
* creates Saturday and Sunday versions of sessions marked 'all-weekend'
|
||||
* infers a `day` and `start` key based on data in `time` column
|
||||
* prepends `location` with the word 'Floor'
|
||||
|
@ -98,10 +155,6 @@ def transform_data(data):
|
|||
# make sure vars are strings
|
||||
_transformed_item = {k: unicode(v) for k, v in item.iteritems() if k}
|
||||
|
||||
# don't need `proposalSpreadsheetRowNumber` for schedule app
|
||||
if 'proposalSpreadsheetRowNumber' in _transformed_item:
|
||||
del _transformed_item['proposalSpreadsheetRowNumber']
|
||||
|
||||
# transform `name` column name into `title` key
|
||||
# and skip rows that represent pathways, or have no name
|
||||
if 'name' in _transformed_item:
|
||||
|
@ -113,8 +166,8 @@ def transform_data(data):
|
|||
|
||||
# transform `githubIssueNumber` column name into `id` key
|
||||
# (and skip rows without a valid id)
|
||||
if 'githubIssueNumber' in _transformed_item:
|
||||
_transformed_item['id'] = _transformed_item.pop('githubIssueNumber', '')
|
||||
if 'id' in _transformed_item:
|
||||
_transformed_item['id'] = _transformed_item.pop('id', '')
|
||||
|
||||
# remove rows with `id` that is blank or provides instructions
|
||||
try:
|
||||
|
@ -127,7 +180,7 @@ def transform_data(data):
|
|||
name_list = []
|
||||
name_detail_list = []
|
||||
for key in _transformed_item.keys():
|
||||
if key.startswith('facilitator_'):
|
||||
if key.startswith('facilitator'):
|
||||
name_list.append(_transformed_item[key].split(",")[0])
|
||||
name_detail_list.append(_transformed_item.pop(key))
|
||||
_transformed_item['facilitators'] = ', '.join(filter(None, name_list))
|
||||
|
@ -135,45 +188,45 @@ def transform_data(data):
|
|||
|
||||
# remove invalid pathway labels that were used for GitHub workflow
|
||||
pathway_skip_keywords = ['accepted','consideration','stipend','sample']
|
||||
pathway_list = _transformed_item['pathways'].split(',')
|
||||
pathway_list = _transformed_item['tags'].split(',')
|
||||
pathway_list = [
|
||||
name for name in pathway_list if not set(pathway_skip_keywords).intersection(set(name.lower().split()))
|
||||
]
|
||||
_transformed_item['pathways'] = ','.join(pathway_list)
|
||||
_transformed_item['tags'] = ','.join(pathway_list)
|
||||
|
||||
# create `scheduleblock` key based on `time`
|
||||
time_data = _transformed_item.pop('time', '').split('(')
|
||||
# "slugified" version of scheduleblock
|
||||
scheduleblock = time_data[0].strip()
|
||||
scheduleblock = scheduleblock.lower().replace(' ','-')
|
||||
_transformed_item['scheduleblock'] = scheduleblock
|
||||
# create `timeblock` key based on `timeblock`
|
||||
time_data = _transformed_item.pop('timeblock', '')
|
||||
timeblock = slugify_timeblock(time_data)
|
||||
_transformed_item['timeblock'] = timeblock
|
||||
|
||||
# infer session day
|
||||
if 'saturday' in _transformed_item['scheduleblock']:
|
||||
if 'saturday' in _transformed_item['timeblock']:
|
||||
_transformed_item['day'] = 'Saturday'
|
||||
if 'sunday' in _transformed_item['scheduleblock']:
|
||||
if 'sunday' in _transformed_item['timeblock']:
|
||||
_transformed_item['day'] = 'Sunday'
|
||||
if 'all-s' in _transformed_item['scheduleblock']:
|
||||
_transformed_item['start'] = 'All Day'
|
||||
# infer start time
|
||||
# if 'all-s' in _transformed_item['timeblock']:
|
||||
# _transformed_item['start'] = 'All Day'
|
||||
# start time
|
||||
if len(time_data) > 1:
|
||||
start_time = time_data[1].strip('()').split(' ')[0]
|
||||
start_time = time_data.split('(')
|
||||
start_time = start_time[len(start_time)-1].strip(')')[-5:] # return the last 5 character
|
||||
try:
|
||||
# attempt to coerce to 12-hour format
|
||||
d = datetime.strptime(start_time, "%H:%M")
|
||||
start_time = d.strftime("%I:%M %p")
|
||||
except:
|
||||
start_time = ''
|
||||
pass
|
||||
_transformed_item['start'] = start_time
|
||||
# create Saturday and Sunday versions of sessions marked 'all-weekend'
|
||||
if 'weekend' in _transformed_item['scheduleblock']:
|
||||
if 'weekend' in _transformed_item['timeblock']:
|
||||
_transformed_item['start'] = 'All Weekend'
|
||||
if 'clone_flag' in item:
|
||||
_transformed_item['scheduleblock'] = 'all-sunday'
|
||||
_transformed_item['timeblock'] = 'all-sunday'
|
||||
_transformed_item['day'] = 'Sunday'
|
||||
_transformed_item['start'] = 'All Day'
|
||||
else:
|
||||
_transformed_item['scheduleblock'] = 'all-saturday'
|
||||
_transformed_item['timeblock'] = 'all-saturday'
|
||||
_transformed_item['day'] = 'Saturday'
|
||||
_transformed_item['start'] = 'All Day'
|
||||
# create a cloned version for Sunday
|
||||
|
@ -182,8 +235,8 @@ def transform_data(data):
|
|||
cloned_data.append(cloned_item)
|
||||
|
||||
# prepend `location` with the word 'Floor'
|
||||
if _transformed_item['location'] and not _transformed_item['location'].startswith('Floor'):
|
||||
_transformed_item['location'] = 'Floor {0}'.format(_transformed_item['location'])
|
||||
# if _transformed_item['location'] and not _transformed_item['location'].startswith('Floor'):
|
||||
# _transformed_item['location'] = 'Floor {0}'.format(_transformed_item['location'])
|
||||
|
||||
# if we've triggered the skip flag anywhere, drop this record
|
||||
if skip:
|
||||
|
@ -266,9 +319,12 @@ def commit_json(data, target_config=GITHUB_CONFIG, commit=COMMIT_JSON_TO_GITHUB)
|
|||
|
||||
def update_schedule():
|
||||
data = fetch_data(multiple_sheets=FETCH_MULTIPLE_WORKSHEETS, worksheets_to_skip=WORKSHEETS_TO_SKIP)
|
||||
#print 'Fetched the data ...'
|
||||
print 'Fetched the data ...'
|
||||
|
||||
data = transform_data(data)
|
||||
data = {
|
||||
'timeblocks': transform_timeblock_data(data['timeblocks']),
|
||||
'sessions': transform_session_data(data['sessions'])
|
||||
}
|
||||
#print 'Prepped the data ...'
|
||||
|
||||
session_json = make_json(data, store_locally=MAKE_LOCAL_JSON)
|
||||
|
|
Загрузка…
Ссылка в новой задаче