Add geo_subdivision{1,2} to clients_daily_v5 (#215)

This commit is contained in:
Daniel Thorn 2018-04-02 16:54:39 -07:00 коммит произвёл GitHub
Родитель dbcb2b23d8
Коммит 2f07be25fb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 54 добавлений и 3 удалений

Просмотреть файл

@ -51,8 +51,27 @@ _FIELD_AGGREGATORS = [
# attribution
agg_first('blocklist_enabled'),
agg_first('channel'),
agg_first('city'),
agg_first('country'),
F.first(
F.expr(
"IF(country IS NOT NULL AND country != '??'," \
" IF(city IS NOT NULL, city, '??'), NULL)"
)
).alias('city'),
F.first(
F.expr(
"IF(country IS NOT NULL AND country != '??'," \
" IF(geo_subdivision1 IS NOT NULL, geo_subdivision1, '??'), NULL)"
)
).alias('geo_subdivision1'),
F.first(
F.expr(
"IF(country IS NOT NULL AND country != '??'," \
" IF(geo_subdivision2 IS NOT NULL, geo_subdivision2, '??'), NULL)"
)
).alias('geo_subdivision2'),
F.first(
F.expr("IF(country IS NOT NULL AND country != '??', country, NULL)")
).alias('country'),
agg_sum('crashes_detected_content'),
agg_sum('crashes_detected_gmplugin'),
agg_sum('crashes_detected_plugin'),
@ -192,3 +211,5 @@ EXPERIMENT_FIELD_AGGREGATORS = _FIELD_AGGREGATORS[:15] + [
ACTIVITY_DATE_COLUMN = F.expr(
"substr(subsession_start_date, 1, 10)"
).alias("activity_date")
NULL_STRING_COLUMN = F.expr("STRING(NULL)")

Просмотреть файл

@ -111,6 +111,12 @@ def to_profile_day_aggregates(frame_with_extracts):
)
else:
with_activity_date = frame_with_extracts
if "geo_subdivision1" not in with_activity_date.columns:
from fields import NULL_STRING_COLUMN
with_activity_date = with_activity_date.withColumn("geo_subdivision1", NULL_STRING_COLUMN)
if "geo_subdivision2" not in with_activity_date.columns:
from fields import NULL_STRING_COLUMN
with_activity_date = with_activity_date.withColumn("geo_subdivision2", NULL_STRING_COLUMN)
grouped = with_activity_date.groupby('client_id', 'activity_date')
return grouped.agg(*MAIN_SUMMARY_FIELD_AGGREGATORS)

Просмотреть файл

@ -36,6 +36,18 @@
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "geo_subdivision1",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "geo_subdivision2",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "os",
@ -1513,4 +1525,4 @@
}
],
"type": "struct"
}
}

Просмотреть файл

@ -36,6 +36,18 @@
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "geo_subdivision1",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "geo_subdivision2",
"nullable": true,
"type": "string"
},
{
"metadata": {},
"name": "os",