Support range partitioning and remove GLAM init.sql (#5346)
* Add support for range partitioning * Remove init.sql for GLAM tables
This commit is contained in:
Родитель
75db39c884
Коммит
c2b77ba3bd
|
@ -2167,6 +2167,15 @@ def _attach_metadata(query_file_path: Path, table: bigquery.Table) -> None:
|
|||
),
|
||||
expiration_ms=metadata.bigquery.time_partitioning.expiration_ms,
|
||||
)
|
||||
elif metadata.bigquery and metadata.bigquery.range_paritioning:
|
||||
table.range_partitioning = bigquery.RangePartitioning(
|
||||
field=metadata.bigquery.range_partitioning.field,
|
||||
range_=bigquery.PartitionRange(
|
||||
start=metadata.bigquery.range_partitioning.range.start,
|
||||
end=metadata.bigquery.range_partitioning.range.end,
|
||||
interval=metadata.bigquery.range_partitioning.range.interval,
|
||||
),
|
||||
)
|
||||
|
||||
if metadata.bigquery and metadata.bigquery.clustering:
|
||||
table.clustering_fields = metadata.bigquery.clustering.fields
|
||||
|
|
|
@ -75,6 +75,23 @@ class PartitionMetadata:
|
|||
return int(self.expiration_days * 86400000)
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class PartitionRange:
|
||||
"""Metadata for defining the partition range."""
|
||||
|
||||
start: int
|
||||
end: int
|
||||
interval: int
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class RangePartitionMetadata:
|
||||
"""Metadata for defining range partitioned tables."""
|
||||
|
||||
range: PartitionRange
|
||||
field: Optional[str] = attr.ib(None)
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True)
|
||||
class ClusteringMetadata:
|
||||
"""Metadata for defining BigQuery table clustering."""
|
||||
|
@ -91,6 +108,7 @@ class BigQueryMetadata:
|
|||
"""
|
||||
|
||||
time_partitioning: Optional[PartitionMetadata] = attr.ib(None)
|
||||
range_partitioning: Optional[RangePartitionMetadata] = attr.ib(None)
|
||||
clustering: Optional[ClusteringMetadata] = attr.ib(None)
|
||||
|
||||
|
||||
|
@ -406,12 +424,13 @@ class Metadata:
|
|||
|
||||
def set_bigquery_clustering(self, clustering_fields):
|
||||
"""Update the BigQuery partitioning metadata."""
|
||||
partitioning = None
|
||||
if self.bigquery and self.bigquery.time_partitioning:
|
||||
partitioning = self.bigquery.time_partitioning
|
||||
if self.bigquery:
|
||||
time_partitioning = self.bigquery.time_partitioning
|
||||
range_partitioning = self.bigquery.range_partitioning
|
||||
|
||||
self.bigquery = BigQueryMetadata(
|
||||
time_partitioning=partitioning,
|
||||
time_partitioning=time_partitioning,
|
||||
range_partitioning=range_partitioning,
|
||||
clustering=ClusteringMetadata(fields=clustering_fields),
|
||||
)
|
||||
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
-- init for firefox_desktop_glam_nightly__clients_histogram_aggregates_v1;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1`(
|
||||
sample_id INT64,
|
||||
client_id STRING,
|
||||
ping_type STRING,
|
||||
os STRING,
|
||||
app_version INT64,
|
||||
app_build_id STRING,
|
||||
channel STRING,
|
||||
histogram_aggregates ARRAY<
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>
|
||||
>
|
||||
)
|
||||
PARTITION BY
|
||||
RANGE_BUCKET(sample_id, GENERATE_ARRAY(0, 100, 1))
|
||||
CLUSTER BY
|
||||
app_version,
|
||||
channel,
|
||||
client_id
|
|
@ -4,3 +4,15 @@ description: |-
|
|||
[DESCRIPTION_MISSING]
|
||||
owners:
|
||||
- efilho@mozilla.com
|
||||
bigquery:
|
||||
range_partitioning:
|
||||
field: sample_id
|
||||
range:
|
||||
start: 0
|
||||
end: 100
|
||||
interval: 1
|
||||
clustering:
|
||||
fields:
|
||||
- app_version
|
||||
- channel
|
||||
- client_id
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: sample_id
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: client_id
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: ping_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: os
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: app_version
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: app_build_id
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: channel
|
||||
type: STRING
|
||||
- fields:
|
||||
- mode: NULLABLE
|
||||
name: metric
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: metric_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: key
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: agg_type
|
||||
type: STRING
|
||||
- fields:
|
||||
- mode: NULLABLE
|
||||
name: key
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: value
|
||||
type: INTEGER
|
||||
mode: REPEATED
|
||||
name: value
|
||||
type: RECORD
|
||||
mode: REPEATED
|
||||
name: histogram_aggregates
|
||||
type: RECORD
|
|
@ -1,19 +0,0 @@
|
|||
-- init for firefox_desktop_glam_nightly__clients_scalar_aggregates_v1;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1`(
|
||||
client_id STRING,
|
||||
ping_type STRING,
|
||||
os STRING,
|
||||
app_version INT64,
|
||||
app_build_id STRING,
|
||||
channel STRING,
|
||||
scalar_aggregates ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
|
||||
>
|
||||
)
|
||||
PARTITION BY
|
||||
RANGE_BUCKET(app_version, GENERATE_ARRAY(0, 100, 1))
|
||||
CLUSTER BY
|
||||
app_version,
|
||||
channel,
|
||||
client_id
|
|
@ -4,3 +4,15 @@ description: |-
|
|||
[DESCRIPTION_MISSING]
|
||||
owners:
|
||||
- efilho@mozilla.com
|
||||
bigquery:
|
||||
range_partitioning:
|
||||
field: app_version
|
||||
range:
|
||||
start: 0
|
||||
end: 100
|
||||
interval: 1
|
||||
clustering:
|
||||
fields:
|
||||
- app_version
|
||||
- channel
|
||||
- client_id
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
fields:
|
||||
- name: client_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: ping_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: os
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: app_version
|
||||
type: INTEGER
|
||||
mode: NULLABLE
|
||||
- name: app_build_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: channel
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: scalar_aggregates
|
||||
type: RECORD
|
||||
mode: REPEATED
|
||||
fields:
|
||||
- name: metric
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: metric_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: key
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: agg_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: value
|
||||
type: FLOAT
|
||||
mode: NULLABLE
|
|
@ -1,26 +0,0 @@
|
|||
-- init for org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1`(
|
||||
sample_id INT64,
|
||||
client_id STRING,
|
||||
ping_type STRING,
|
||||
os STRING,
|
||||
app_version INT64,
|
||||
app_build_id STRING,
|
||||
channel STRING,
|
||||
histogram_aggregates ARRAY<
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>
|
||||
>
|
||||
)
|
||||
PARTITION BY
|
||||
RANGE_BUCKET(sample_id, GENERATE_ARRAY(0, 100, 1))
|
||||
CLUSTER BY
|
||||
app_version,
|
||||
channel,
|
||||
client_id
|
|
@ -4,3 +4,15 @@ description: |-
|
|||
[DESCRIPTION_MISSING]
|
||||
owners:
|
||||
- efilho@mozilla.com
|
||||
bigquery:
|
||||
range_partitioning:
|
||||
field: sample_id
|
||||
range:
|
||||
start: 0
|
||||
end: 100
|
||||
interval: 1
|
||||
clustering:
|
||||
fields:
|
||||
- app_version
|
||||
- channel
|
||||
- client_id
|
||||
|
|
Загрузка…
Ссылка в новой задаче