Migrate UDFs to new format

2020-08-31 13:34:56 -07:00 · 2020-08-31 13:34:56 -07:00 · 2b29d24f59
--- a/script/legacy/migrate_udfs
+++ b/script/legacy/migrate_udfs
@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+"""Migrate legacy UDFs to the new UDF structure."""
+
+from argparse import ArgumentParser
+import os
+from pathlib import Path
+import re
+import shutil
+import string
+import yaml
+
+
+UDF_DIRS = ("udf/", "udf_js/")
+DESCRIPTION_RE = re.compile(r"(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)")
+
+parser = ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--udf", help="Migrate the specified UDF.",
+)
+parser.add_argument(
+    "--udf-dirs",
+    "--udf_dirs",
+    nargs="+",
+    default=UDF_DIRS,
+    help="Directories containing UDFs to migrate",
+)
+
+
+def migrate_udf(udf_file):
+    """Migrate a speficif UDF to the new format."""
+    udf_file = Path(udf_file)
+    print(f"Migrate {udf_file}")
+
+    udf_name = udf_file.name.replace(".sql", "")
+    friendly_name = string.capwords(udf_name.replace("_", " "))
+
+    description = ""
+    with open(udf_file) as udf:
+        udf_content = udf.read()
+        comment = re.findall(DESCRIPTION_RE, udf_content)
+
+        if len(comment) > 0:
+            description = comment[0][0].replace("/*", "")
+            description = description.replace("*/", "").strip()
+            description = description.replace("\n", " ").strip()
+
+    # move files to directory
+    migrated_udf_dir = udf_file.parent / udf_name
+    migrated_udf_dir.mkdir(parents=True, exist_ok=True)
+    shutil.move(str(udf_file), str(migrated_udf_dir / "udf.sql"))
+
+    # create metdata file
+    metadata_file = migrated_udf_dir / "metadata.yaml"
+    metadata = {"friendly_name": friendly_name, "description": description}
+    metadata_file.write_text(yaml.dump(metadata))
+
+
+def main():
+    """Run the UDF migration"""
+    args = parser.parse_args()
+
+    if args.udf:
+        # migrate a single UDF
+        migrate_udf(args.udf)
+        return
+
+    # iterate through udfs and migrate one by one
+    for udf_dir in args.udf_dirs:
+        if os.path.isdir(udf_dir):
+            for root, dirs, files in os.walk(udf_dir):
+                for udf_file in files:
+                    if udf_file.endswith(".sql") and not udf_file.endswith("udf.sql"):
+                        migrate_udf(os.path.join(root, udf_file))
+
+
+if __name__ == "__main__":
+    main()
--- a/udf/active_n_weeks_ago/metadata.yaml
+++ b/udf/active_n_weeks_ago/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Active N Weeks Ago
--- a/udf/active_n_weeks_ago/udf.sql
+++ b/udf/active_n_weeks_ago/udf.sql
--- a/udf/active_values_from_days_seen_map/metadata.yaml
+++ b/udf/active_values_from_days_seen_map/metadata.yaml
@ -0,0 +1,4 @@
+description: Given a map of representing activity for STRING `key`s, this function
+  returns an array of which `key`s were active for the time period in question.  start_offset
+  should be at most 0. n_bits should be at most the remaining bits.
+friendly_name: Active Values From Days Seen Map
--- a/udf/active_values_from_days_seen_map/udf.sql
+++ b/udf/active_values_from_days_seen_map/udf.sql
--- a/udf/add_monthly_engine_searches/metadata.yaml
+++ b/udf/add_monthly_engine_searches/metadata.yaml
@ -0,0 +1,10 @@
+description: 'This function specifically windows searches into calendar-month windows.
+  This means groups are not necessarily directly comparable, since different months
+  have different numbers of days.  On the first of each month, a new month is appended,
+  and the first month is dropped.  If the date is not the first of the month, the
+  new entry is added to the last element in the array.  For example, if we were adding
+  12 to [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]:  On the first of the month, the result
+  would be [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 12] On any other day of the month,
+  the result would be [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24]  This happens for every
+  aggregate (searches, ad clicks, etc.)'
+friendly_name: Add Monthly Engine Searches
--- a/udf/add_monthly_engine_searches/udf.sql
+++ b/udf/add_monthly_engine_searches/udf.sql
--- a/udf/add_monthly_searches/metadata.yaml
+++ b/udf/add_monthly_searches/metadata.yaml
@ -0,0 +1,5 @@
+description: Adds together two engine searches structs. Each engine searches struct
+  has a MAP[engine -> search_counts_struct]. We want to add add together the prev
+  and curr's values for a certain engine.  This allows us to be flexible with the
+  number of engines we're using.
+friendly_name: Add Monthly Searches
--- a/udf/add_monthly_searches/udf.sql
+++ b/udf/add_monthly_searches/udf.sql
--- a/udf/add_searches_by_index/metadata.yaml
+++ b/udf/add_searches_by_index/metadata.yaml
@ -0,0 +1,3 @@
+description: Return sums of each search type grouped by the index.  Results are ordered
+  by index.
+friendly_name: Add Searches By Index
--- a/udf/add_searches_by_index/udf.sql
+++ b/udf/add_searches_by_index/udf.sql
--- a/udf/aggregate_active_addons/metadata.yaml
+++ b/udf/aggregate_active_addons/metadata.yaml
@ -0,0 +1,5 @@
+description: This function selects most frequently occuring value for each addon_id,
+  using the latest value in the input among ties. The type for active_addons is ARRAY<STRUCT<addon_id
+  STRING, ...>>, i.e. the output of `SELECT ARRAY_CONCAT_AGG(active_addons) FROM telemetry.main_summary_v4`,
+  and is left unspecified to allow changes to the fields of the STRUCT.
+friendly_name: Aggregate Active Addons
--- a/udf/aggregate_active_addons/udf.sql
+++ b/udf/aggregate_active_addons/udf.sql
--- a/udf/aggregate_map_first/metadata.yaml
+++ b/udf/aggregate_map_first/metadata.yaml
@ -0,0 +1,3 @@
+description: Returns an aggregated map with all the keys and the first corresponding
+  value from the given maps
+friendly_name: Aggregate Map First
--- a/udf/aggregate_map_first/udf.sql
+++ b/udf/aggregate_map_first/udf.sql
--- a/udf/aggregate_search_counts/metadata.yaml
+++ b/udf/aggregate_search_counts/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Aggregate Search Counts
--- a/udf/aggregate_search_counts/udf.sql
+++ b/udf/aggregate_search_counts/udf.sql
--- a/udf/aggregate_search_map/metadata.yaml
+++ b/udf/aggregate_search_map/metadata.yaml
@ -0,0 +1,2 @@
+description: Aggregates the total counts of the given search counters
+friendly_name: Aggregate Search Map
--- a/udf/aggregate_search_map/udf.sql
+++ b/udf/aggregate_search_map/udf.sql
--- a/udf/array_11_zeroes_then/metadata.yaml
+++ b/udf/array_11_zeroes_then/metadata.yaml
@ -0,0 +1,2 @@
+description: An array of 11 zeroes, followed by a supplied value
+friendly_name: Array 11 Zeroes Then
--- a/udf/array_11_zeroes_then/udf.sql
+++ b/udf/array_11_zeroes_then/udf.sql
--- a/udf/array_drop_first_and_append/metadata.yaml
+++ b/udf/array_drop_first_and_append/metadata.yaml
@ -0,0 +1,3 @@
+description: Drop the first element of an array, and append the given element.  Result
+  is an array with the same length as the input.
+friendly_name: Array Drop First And Append
--- a/udf/array_drop_first_and_append/udf.sql
+++ b/udf/array_drop_first_and_append/udf.sql
--- a/udf/array_of_12_zeroes/metadata.yaml
+++ b/udf/array_of_12_zeroes/metadata.yaml
@ -0,0 +1,2 @@
+description: An array of 12 zeroes
+friendly_name: Array Of 12 Zeroes
--- a/udf/array_of_12_zeroes/udf.sql
+++ b/udf/array_of_12_zeroes/udf.sql
--- a/udf/array_slice/metadata.yaml
+++ b/udf/array_slice/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Array Slice
--- a/udf/array_slice/udf.sql
+++ b/udf/array_slice/udf.sql
--- a/udf/bitcount_lowest_7/metadata.yaml
+++ b/udf/bitcount_lowest_7/metadata.yaml
@ -0,0 +1,2 @@
+description: This function counts the 1s in lowest 7 bits of an INT64
+friendly_name: Bitcount Lowest 7
--- a/udf/bitcount_lowest_7/udf.sql
+++ b/udf/bitcount_lowest_7/udf.sql
--- a/udf/bitmask_365/metadata.yaml
+++ b/udf/bitmask_365/metadata.yaml
@ -0,0 +1,2 @@
+description: A bitmask for 365 bits
+friendly_name: Bitmask 365
--- a/udf/bitmask_365/udf.sql
+++ b/udf/bitmask_365/udf.sql
--- a/udf/bitmask_lowest_28/metadata.yaml
+++ b/udf/bitmask_lowest_28/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bitmask Lowest 28
--- a/udf/bitmask_lowest_28/udf.sql
+++ b/udf/bitmask_lowest_28/udf.sql
--- a/udf/bitmask_lowest_7/metadata.yaml
+++ b/udf/bitmask_lowest_7/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bitmask Lowest 7
--- a/udf/bitmask_lowest_7/udf.sql
+++ b/udf/bitmask_lowest_7/udf.sql
--- a/udf/bitmask_range/metadata.yaml
+++ b/udf/bitmask_range/metadata.yaml
@ -0,0 +1,6 @@
+description: Returns a bitmask that can be used to return a subset of an integer representing
+  a bit array. The start_ordinal argument is an integer specifying the starting position
+  of the slice, with start_ordinal = 1 indicating the first bit. The length argument
+  is the number of bits to include in the mask.  The arguments were chosen to match
+  the semantics of the SUBSTR function; see https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#substr
+friendly_name: Bitmask Range
--- a/udf/bitmask_range/udf.sql
+++ b/udf/bitmask_range/udf.sql
--- a/udf/bits28_active_in_range/metadata.yaml
+++ b/udf/bits28_active_in_range/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 Active In Range
--- a/udf/bits28_active_in_range/udf.sql
+++ b/udf/bits28_active_in_range/udf.sql
--- a/udf/bits28_days_since_seen/metadata.yaml
+++ b/udf/bits28_days_since_seen/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 Days Since Seen
--- a/udf/bits28_days_since_seen/udf.sql
+++ b/udf/bits28_days_since_seen/udf.sql
--- a/udf/bits28_from_string/metadata.yaml
+++ b/udf/bits28_from_string/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 From String
--- a/udf/bits28_from_string/udf.sql
+++ b/udf/bits28_from_string/udf.sql
--- a/udf/bits28_range/metadata.yaml
+++ b/udf/bits28_range/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 Range
--- a/udf/bits28_range/udf.sql
+++ b/udf/bits28_range/udf.sql
--- a/udf/bits28_retention/metadata.yaml
+++ b/udf/bits28_retention/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 Retention
--- a/udf/bits28_retention/udf.sql
+++ b/udf/bits28_retention/udf.sql
--- a/udf/bits28_to_dates/metadata.yaml
+++ b/udf/bits28_to_dates/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 To Dates
--- a/udf/bits28_to_dates/udf.sql
+++ b/udf/bits28_to_dates/udf.sql
--- a/udf/bits28_to_string/metadata.yaml
+++ b/udf/bits28_to_string/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Bits28 To String
--- a/udf/bits28_to_string/udf.sql
+++ b/udf/bits28_to_string/udf.sql
--- a/udf/bits_to_active_n_weeks_ago/metadata.yaml
+++ b/udf/bits_to_active_n_weeks_ago/metadata.yaml
@ -0,0 +1,3 @@
+description: Given a BYTE and an INT64, return whether the user was active that many
+  weeks ago.  NULL input returns NULL output.
+friendly_name: Bits To Active N Weeks Ago
--- a/udf/bits_to_active_n_weeks_ago/udf.sql
+++ b/udf/bits_to_active_n_weeks_ago/udf.sql
--- a/udf/bits_to_days_seen/metadata.yaml
+++ b/udf/bits_to_days_seen/metadata.yaml
@ -0,0 +1,3 @@
+description: Given a BYTE, get the number of days the user was seen.  NULL input returns
+  NULL output.
+friendly_name: Bits To Days Seen
--- a/udf/bits_to_days_seen/udf.sql
+++ b/udf/bits_to_days_seen/udf.sql
--- a/udf/bits_to_days_since_first_seen/metadata.yaml
+++ b/udf/bits_to_days_since_first_seen/metadata.yaml
@ -0,0 +1,7 @@
+description: 'Given a BYTES, return the number of days since the client was first
+  seen.  If no bits are set, returns NULL, indicating we don''t know. Otherwise the
+  result is 0-indexed, meaning that for \x01, it will return 0.  Results showed this
+  being between 5-10x faster than the simpler alternative: CREATE OR REPLACE FUNCTION   udf.bits_to_days_since_first_seen(b
+  BYTES) AS ((     SELECT MAX(n)     FROM UNNEST(GENERATE_ARRAY(0, 8 * BYTE_LENGTH(b)))
+  AS n     WHERE BIT_COUNT(SUBSTR(b >> n, -1) & b''\x01'') > 0));  See also: bits_to_days_since_seen.sql'
+friendly_name: Bits To Days Since First Seen
--- a/udf/bits_to_days_since_first_seen/udf.sql
+++ b/udf/bits_to_days_since_first_seen/udf.sql
--- a/udf/bits_to_days_since_seen/metadata.yaml
+++ b/udf/bits_to_days_since_seen/metadata.yaml
@ -0,0 +1,7 @@
+description: 'Given a BYTES, return the number of days since the client was last seen.  If
+  no bits are set, returns NULL, indicating we don''t know. Otherwise the results
+  are 0-indexed, meaning \x01 will return 0.  Tests showed this being 5-10x faster
+  than the simpler alternative: CREATE OR REPLACE FUNCTION   udf.bits_to_days_since_seen(b
+  BYTES) AS ((     SELECT MIN(n)     FROM UNNEST(GENERATE_ARRAY(0, 364)) AS n     WHERE
+  BIT_COUNT(SUBSTR(b >> n, -1) & b''\x01'') > 0));  See also: bits_to_days_since_first_seen.sql'
+friendly_name: Bits To Days Since Seen
--- a/udf/bits_to_days_since_seen/udf.sql
+++ b/udf/bits_to_days_since_seen/udf.sql
--- a/udf/bool_to_365_bits/metadata.yaml
+++ b/udf/bool_to_365_bits/metadata.yaml
@ -0,0 +1,2 @@
+description: Convert a boolean to 365 bit byte array
+friendly_name: Bool To 365 Bits
--- a/udf/bool_to_365_bits/udf.sql
+++ b/udf/bool_to_365_bits/udf.sql
--- a/udf/boolean_histogram_to_boolean/metadata.yaml
+++ b/udf/boolean_histogram_to_boolean/metadata.yaml
@ -0,0 +1,3 @@
+description: Given histogram h, return TRUE if it has a value in the "true" bucket,
+  or FALSE if it has a value in the "false" bucket, or NULL otherwise.  https://github.com/mozilla/telemetry-batch-view/blob/ea0733c/src/main/scala/com/mozilla/telemetry/utils/MainPing.scala#L309-L317
+friendly_name: Boolean Histogram To Boolean
--- a/udf/boolean_histogram_to_boolean/udf.sql
+++ b/udf/boolean_histogram_to_boolean/udf.sql
--- a/udf/coalesce_adjacent_days_28_bits/metadata.yaml
+++ b/udf/coalesce_adjacent_days_28_bits/metadata.yaml
@ -0,0 +1,6 @@
+description: We generally want to believe only the first reasonable profile creation
+  date that we receive from a client. Given bits representing usage from the previous
+  day and the current day, this function shifts the first argument by one day and
+  returns either that value if non-zero and non-null, the current day value if non-zero
+  and non-null, or else 0.
+friendly_name: Coalesce Adjacent Days 28 Bits
--- a/udf/coalesce_adjacent_days_28_bits/udf.sql
+++ b/udf/coalesce_adjacent_days_28_bits/udf.sql
--- a/udf/coalesce_adjacent_days_365_bits/metadata.yaml
+++ b/udf/coalesce_adjacent_days_365_bits/metadata.yaml
@ -0,0 +1,7 @@
+description: Coalesce previous data's PCD with the new data's PCD.  We generally want
+  to believe only the first reasonable profile creation date that we receive from
+  a client. Given bytes representing usage from the previous day and the current day,
+  this function shifts the first argument by one day and returns either that value
+  if non-zero and non-null, the current day value if non-zero and non-null, or else
+  0.
+friendly_name: Coalesce Adjacent Days 365 Bits
--- a/udf/coalesce_adjacent_days_365_bits/udf.sql
+++ b/udf/coalesce_adjacent_days_365_bits/udf.sql
--- a/udf/combine_adjacent_days_28_bits/metadata.yaml
+++ b/udf/combine_adjacent_days_28_bits/metadata.yaml
@ -0,0 +1,5 @@
+description: Combines two bit patterns. The first pattern represents activity over
+  a 28-day period ending "yesterday". The second pattern represents activity as observed
+  today (usually just 0 or 1). We shift the bits in the first pattern by one to set
+  the new baseline as "today", then perform a bitwise OR of the two patterns.
+friendly_name: Combine Adjacent Days 28 Bits
--- a/udf/combine_adjacent_days_28_bits/udf.sql
+++ b/udf/combine_adjacent_days_28_bits/udf.sql
--- a/udf/combine_adjacent_days_365_bits/metadata.yaml
+++ b/udf/combine_adjacent_days_365_bits/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Combine Adjacent Days 365 Bits
--- a/udf/combine_adjacent_days_365_bits/udf.sql
+++ b/udf/combine_adjacent_days_365_bits/udf.sql
--- a/udf/combine_days_seen_maps/metadata.yaml
+++ b/udf/combine_days_seen_maps/metadata.yaml
@ -0,0 +1,7 @@
+description: The "clients_last_seen" class of tables represent various types of client
+  activity within a 28-day window as bit patterns.  This function takes in two arrays
+  of structs (aka maps) where each entry gives the bit pattern for days in which we
+  saw a ping for a given user in a given key. We combine the bit patterns for the
+  previous day and the current day, returning a single map.  See `udf.combine_experiment_days`
+  for a more specific example of this approach.
+friendly_name: Combine Days Seen Maps
--- a/udf/combine_days_seen_maps/udf.sql
+++ b/udf/combine_days_seen_maps/udf.sql
--- a/udf/combine_experiment_days/metadata.yaml
+++ b/udf/combine_experiment_days/metadata.yaml
@ -0,0 +1,6 @@
+description: The "clients_last_seen" class of tables represent various types of client
+  activity within a 28-day window as bit patterns.  This function takes in two arrays
+  of structs where each entry gives the bit pattern for days in which we saw a ping
+  for a given user in a given experiment. We combine the bit patterns for the previous
+  day and the current day, returning a single array of experiment structs.
+friendly_name: Combine Experiment Days
--- a/udf/combine_experiment_days/udf.sql
+++ b/udf/combine_experiment_days/udf.sql
--- a/udf/country_code_to_flag/metadata.yaml
+++ b/udf/country_code_to_flag/metadata.yaml
@ -0,0 +1,5 @@
+description: 'For a given two-letter ISO 3166-1 alpha-2 country code, returns a string
+  consisting of two Unicode regional indicator symbols, which is rendered in supporting
+  fonts (such as in the BigQuery console or STMO) as flag emoji.  This is just for
+  fun.  See:  - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 - https://en.wikipedia.org/wiki/Regional_Indicator_Symbol'
+friendly_name: Country Code To Flag
--- a/udf/country_code_to_flag/udf.sql
+++ b/udf/country_code_to_flag/udf.sql
--- a/udf/days_seen_bytes_to_rfm/metadata.yaml
+++ b/udf/days_seen_bytes_to_rfm/metadata.yaml
@ -0,0 +1,4 @@
+description: Return the frequency, recency, and T from a BYTE array, as defined in
+  https://lifetimes.readthedocs.io/en/latest/Quickstart.html#the-shape-of-your-data  RFM
+  refers to Recency, Frequency, and Monetary value.
+friendly_name: Days Seen Bytes To Rfm
--- a/udf/days_seen_bytes_to_rfm/udf.sql
+++ b/udf/days_seen_bytes_to_rfm/udf.sql
--- a/udf/days_since_created_profile_as_28_bits/metadata.yaml
+++ b/udf/days_since_created_profile_as_28_bits/metadata.yaml
@ -0,0 +1,6 @@
+description: Takes in a difference between submission date and profile creation date
+  and returns a bit pattern representing the profile creation date IFF the profile
+  date is the same as the submission date or no more than 6 days earlier.  Analysis
+  has shown that client-reported profile creation dates are much less reliable outside
+  of this range and cannot be used as reliable indicators of new profile creation.
+friendly_name: Days Since Created Profile As 28 Bits
--- a/udf/days_since_created_profile_as_28_bits/udf.sql
+++ b/udf/days_since_created_profile_as_28_bits/udf.sql
--- a/udf/deanonymize_event/metadata.yaml
+++ b/udf/deanonymize_event/metadata.yaml
@ -0,0 +1,2 @@
+description: Rename struct fields in anonymous event tuples to meaningful names.
+friendly_name: Deanonymize Event
--- a/udf/deanonymize_event/udf.sql
+++ b/udf/deanonymize_event/udf.sql
--- a/udf/decode_int64/metadata.yaml
+++ b/udf/decode_int64/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Decode Int64
--- a/udf/decode_int64/udf.sql
+++ b/udf/decode_int64/udf.sql
--- a/udf/dedupe_array/metadata.yaml
+++ b/udf/dedupe_array/metadata.yaml
@ -0,0 +1,2 @@
+description: Return an array containing only distinct values of the given array
+friendly_name: Dedupe Array
--- a/udf/dedupe_array/udf.sql
+++ b/udf/dedupe_array/udf.sql
--- a/udf/extract_count_histogram_value/metadata.yaml
+++ b/udf/extract_count_histogram_value/metadata.yaml
@ -0,0 +1,2 @@
+description: ''
+friendly_name: Extract Count Histogram Value
--- a/udf/extract_count_histogram_value/udf.sql
+++ b/udf/extract_count_histogram_value/udf.sql
--- a/udf/extract_document_type/metadata.yaml
+++ b/udf/extract_document_type/metadata.yaml
@ -0,0 +1,2 @@
+description: Extract the document type from a table name e.g. _TABLE_SUFFIX.
+friendly_name: Extract Document Type
--- a/udf/extract_document_type/udf.sql
+++ b/udf/extract_document_type/udf.sql
--- a/udf/extract_document_version/metadata.yaml
+++ b/udf/extract_document_version/metadata.yaml
@ -0,0 +1,2 @@
+description: Extract the document version from a table name e.g. _TABLE_SUFFIX.
+friendly_name: Extract Document Version
--- a/udf/extract_document_version/udf.sql
+++ b/udf/extract_document_version/udf.sql
--- a/udf/extract_histogram_sum/metadata.yaml
+++ b/udf/extract_histogram_sum/metadata.yaml
@ -0,0 +1,5 @@
+description: This is a performance optimization compared to the more general mozfun.hist.extract
+  for cases where only the histogram sum is needed.  It must support all the same
+  format variants as mozfun.hist.extract but this simplification is necessary to keep
+  the main_summary query complexity in check.
+friendly_name: Extract Histogram Sum
--- a/udf/extract_histogram_sum/udf.sql
+++ b/udf/extract_histogram_sum/udf.sql
--- a/udf/extract_schema_validation_path/metadata.yaml
+++ b/udf/extract_schema_validation_path/metadata.yaml
@ -0,0 +1,2 @@
+description: Return a path derived from an error message in `payload_bytes_error`
+friendly_name: Extract Schema Validation Path
--- a/udf/extract_schema_validation_path/udf.sql
+++ b/udf/extract_schema_validation_path/udf.sql
--- a/udf/fenix_build_to_datetime/metadata.yaml
+++ b/udf/fenix_build_to_datetime/metadata.yaml
@ -0,0 +1,6 @@
+description: 'Convert the Fenix client_info.app_build-format string to a DATETIME.
+  May return NULL on failure.  The Fenix app_build format is documented here: https://github.com/mozilla-mobile/fenix/blob/c72834479eb3e13ee91f82b529e59aa08392a92d/automation/gradle/versionCode.gradle#L13  In
+  short it is yDDDHHmm  * y is years since 2018  * DDD is day of year, 0-padded, 001-366  *
+  HH is hour of day, 00-23  * mm is minute of hour, 00-59  After using this you may
+  wish to DATETIME_TRUNC(result, DAY) for grouping by build date.'
+friendly_name: Fenix Build To Datetime
--- a/udf/fenix_build_to_datetime/udf.sql
+++ b/udf/fenix_build_to_datetime/udf.sql
--- a/udf/geo_struct/metadata.yaml
+++ b/udf/geo_struct/metadata.yaml
@ -0,0 +1,5 @@
+description: Convert geoip lookup fields to a struct, replacing '??' with NULL.  Returns
+  NULL if if required field country would be NULL.  Replaces '??' with NULL because
+  '??' is a placeholder that may be used if there was an issue during geoip lookup
+  in hindsight.
+friendly_name: Geo Struct
--- a/Показать больше
+++ b/Показать больше