chromium-dashboard/internals/feature_links.py

221 строка
8.1 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2023 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import datetime
from framework import cloud_tasks_helpers
from framework import basehandlers
from typing import Any
from internals.core_models import FeatureEntry
from internals.link_helpers import Link
from google.cloud import ndb # type: ignore
LINK_STALE_MINUTES = 30
class FeatureLinks(ndb.Model):
"""Links that occur in the fields of the feature.
This helps show a preview of information of linked pages, saving users the trouble of clicking.
"""
created = ndb.DateTimeProperty(auto_now_add=True)
updated = ndb.DateTimeProperty(auto_now=True)
feature_ids = ndb.IntegerProperty(repeated=True)
url = ndb.StringProperty(required=True)
type = ndb.StringProperty(required=True)
information = ndb.JsonProperty()
is_error = ndb.BooleanProperty(default=False)
http_error_code = ndb.IntegerProperty()
def update_feature_links(fe: FeatureEntry, changed_fields: list[tuple[str, Any, Any]]) -> None:
"""Update the links in the given feature entry."""
for field, old_val, new_val in changed_fields:
if new_val != old_val:
if old_val is None and not bool(new_val):
continue
old_val_urls = Link.extract_urls_from_value(old_val)
new_val_urls = Link.extract_urls_from_value(new_val)
urls_to_remove = set(old_val_urls) - set(new_val_urls)
urls_to_add = set(new_val_urls) - set(old_val_urls)
fe_values = fe.to_dict(exclude=[field]).values()
all_urls = [url for value in fe_values for url in Link.extract_urls_from_value(value)]
for url in urls_to_remove:
if url not in all_urls:
# if the url is not in any other field in this feature, then remove it from the index
link = Link(url)
_remove_link(link, fe)
for url in urls_to_add:
link = Link(url)
if link.type:
feature_link = _get_index_link(link, fe, should_parse_new_link=True)
feature_link.put()
logging.info(f'Indexed feature_link {feature_link.url} to {feature_link.key.integer_id()} for feature {fe.key.integer_id()}')
def _get_index_link(link: Link, fe: FeatureEntry, should_parse_new_link: bool = False) -> FeatureLinks:
"""
indexes a given link for a specific feature by creating or updating a `FeatureLinks` object.
Returns the `FeatureLinks` object.
"""
feature_id = fe.key.integer_id()
feature_links = FeatureLinks.query(FeatureLinks.url == link.url).fetch(None)
feature_link: FeatureLinks = feature_links[0] if feature_links else None
if hasattr(feature_link, 'feature_ids'):
if feature_id not in feature_link.feature_ids:
feature_link.feature_ids.append(feature_id)
feature_link.type = link.type
else:
if should_parse_new_link:
link.parse()
feature_link = FeatureLinks(
feature_ids=[feature_id],
type=link.type,
url=link.url,
information=link.information,
is_error=link.is_error,
http_error_code=link.http_error_code
)
return feature_link
def _remove_link(link: Link, fe: FeatureEntry) -> None:
"""Un-index the given link."""
feature_id = fe.key.integer_id()
feature_links = FeatureLinks.query(FeatureLinks.url == link.url).fetch(None)
feature_link: FeatureLinks = feature_links[0] if feature_links else None
if hasattr(feature_link, 'feature_ids'):
if feature_id in feature_link.feature_ids:
feature_link.feature_ids.remove(feature_id)
if feature_link.feature_ids:
feature_link.put()
logging.info(f'Updated indexed link {link.url}')
else:
# delete the link if it is not used by any feature
feature_link.key.delete()
logging.info(f'Delete indexed link {link.url}')
def _get_feature_links(feature_id: int) -> list[FeatureLinks]:
"""Return a list of FeatureLinks for a given feature id"""
feature_links = FeatureLinks.query(
FeatureLinks.feature_ids == feature_id).fetch(None)
return feature_links if feature_links else []
def get_by_feature_id(feature_id: int, update_stale_links: bool) -> tuple[list[dict], bool]:
"""Return a list of dicts of FeatureLinks for a given feature id
The returned dicts only include the url, type, and information fields.
This is used by the api to return json to the client.
update_stale_links: if True, then trigger a background task to update the information of the links.
"""
feature_links = _get_feature_links(feature_id)
stale_time = datetime.datetime.now(
tz=datetime.timezone.utc) - datetime.timedelta(minutes=LINK_STALE_MINUTES)
stale_time = stale_time.replace(tzinfo=None)
stale_feature_links = [
link for link in feature_links if link.updated < stale_time]
has_stale_links = len(stale_feature_links) > 0
if has_stale_links and update_stale_links:
logging.info(
f'Found {len(stale_feature_links)} stale links for feature_id {feature_id}, send links to cloud task')
feature_link_ids = [link.key.id() for link in stale_feature_links]
cloud_tasks_helpers.enqueue_task(
'/tasks/update-feature-links', {
'feature_link_ids': feature_link_ids
})
return [link.to_dict(include=['url', 'type', 'information', 'http_error_code']) for link in feature_links], has_stale_links
class FeatureLinksUpdateHandler(basehandlers.FlaskHandler):
"""This task handles update feature links information with the given ids."""
IS_INTERNAL_HANDLER = True
def process_post_data(self, **kwargs):
self.require_task_header()
logging.info('Starting indexing feature links')
feature_link_ids = self.get_param('feature_link_ids')
_index_feature_links_by_ids(feature_link_ids)
logging.info('Finished indexing feature links')
return {'message': 'Done'}
def _index_feature_links_by_ids(feature_link_ids: list[Any]) -> None:
"""index the links in the given feature links ids"""
for feature_link_id in feature_link_ids:
feature_link: FeatureLinks = FeatureLinks.get_by_id(feature_link_id)
if feature_link:
link = Link(feature_link.url)
link.parse()
if link.is_error:
feature_link.is_error = link.is_error
else:
# only update the information if it is not an error
feature_link.information = link.information
if link.http_error_code:
feature_link.http_error_code = link.http_error_code
feature_link.type = link.type
feature_link.put()
logging.info(f'Update information for indexed link {link.url}')
def _extract_feature_urls(fe: FeatureEntry) -> list[str]:
fe_values = fe.to_dict().values()
all_urls = [url for value in fe_values for url in Link.extract_urls_from_value(value)]
return list(set(all_urls))
def batch_index_feature_entries(fes: list[FeatureEntry], skip_existing: bool) -> int:
"""
The function `batch_index_feature_entries` takes a list of `FeatureEntry` objects, generates feature
links for each entry, and stores them in batches in the database, skipping existing entries if
specified.
:param fes: fes is a list of FeatureEntry
:param skip_existing: A boolean value indicating whether to skip feature entries that already have
existing feature links
"""
link_count = 0
for fe in fes:
if skip_existing:
feature_links = _get_feature_links(fe.key.integer_id())
if len(feature_links) > 0:
continue
urls = _extract_feature_urls(fe)
feature_links = []
for url in urls:
link = Link(url)
if link.type:
fl = _get_index_link(link, fe, should_parse_new_link=False)
feature_links.append(fl)
ndb.put_multi(feature_links)
link_count += len(feature_links)
logging.info(f'Feature {fe.key.integer_id()} indexed {len(feature_links)} urls')
return link_count