Add geo_subdivision{1,2} to clients_daily_v5 (#215)
This commit is contained in:
Родитель
dbcb2b23d8
Коммит
2f07be25fb
|
@ -51,8 +51,27 @@ _FIELD_AGGREGATORS = [
|
|||
# attribution
|
||||
agg_first('blocklist_enabled'),
|
||||
agg_first('channel'),
|
||||
agg_first('city'),
|
||||
agg_first('country'),
|
||||
F.first(
|
||||
F.expr(
|
||||
"IF(country IS NOT NULL AND country != '??'," \
|
||||
" IF(city IS NOT NULL, city, '??'), NULL)"
|
||||
)
|
||||
).alias('city'),
|
||||
F.first(
|
||||
F.expr(
|
||||
"IF(country IS NOT NULL AND country != '??'," \
|
||||
" IF(geo_subdivision1 IS NOT NULL, geo_subdivision1, '??'), NULL)"
|
||||
)
|
||||
).alias('geo_subdivision1'),
|
||||
F.first(
|
||||
F.expr(
|
||||
"IF(country IS NOT NULL AND country != '??'," \
|
||||
" IF(geo_subdivision2 IS NOT NULL, geo_subdivision2, '??'), NULL)"
|
||||
)
|
||||
).alias('geo_subdivision2'),
|
||||
F.first(
|
||||
F.expr("IF(country IS NOT NULL AND country != '??', country, NULL)")
|
||||
).alias('country'),
|
||||
agg_sum('crashes_detected_content'),
|
||||
agg_sum('crashes_detected_gmplugin'),
|
||||
agg_sum('crashes_detected_plugin'),
|
||||
|
@ -192,3 +211,5 @@ EXPERIMENT_FIELD_AGGREGATORS = _FIELD_AGGREGATORS[:15] + [
|
|||
ACTIVITY_DATE_COLUMN = F.expr(
|
||||
"substr(subsession_start_date, 1, 10)"
|
||||
).alias("activity_date")
|
||||
|
||||
NULL_STRING_COLUMN = F.expr("STRING(NULL)")
|
||||
|
|
|
@ -111,6 +111,12 @@ def to_profile_day_aggregates(frame_with_extracts):
|
|||
)
|
||||
else:
|
||||
with_activity_date = frame_with_extracts
|
||||
if "geo_subdivision1" not in with_activity_date.columns:
|
||||
from fields import NULL_STRING_COLUMN
|
||||
with_activity_date = with_activity_date.withColumn("geo_subdivision1", NULL_STRING_COLUMN)
|
||||
if "geo_subdivision2" not in with_activity_date.columns:
|
||||
from fields import NULL_STRING_COLUMN
|
||||
with_activity_date = with_activity_date.withColumn("geo_subdivision2", NULL_STRING_COLUMN)
|
||||
grouped = with_activity_date.groupby('client_id', 'activity_date')
|
||||
return grouped.agg(*MAIN_SUMMARY_FIELD_AGGREGATORS)
|
||||
|
||||
|
|
|
@ -36,6 +36,18 @@
|
|||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "geo_subdivision1",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "geo_subdivision2",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "os",
|
||||
|
@ -1513,4 +1525,4 @@
|
|||
}
|
||||
],
|
||||
"type": "struct"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,18 @@
|
|||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "geo_subdivision1",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "geo_subdivision2",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "os",
|
||||
|
|
Загрузка…
Ссылка в новой задаче