From 763771013b8bcd94b274b578a2e37366198325a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Bregu=C5=82a?= Date: Mon, 14 Dec 2020 22:59:34 +0100 Subject: [PATCH] Fetch inventories for third-party services only once (#13068) * Fetch inventories for third-party services only once * fixup! Fetch inventories for third-party services only once (cherry picked from commit fa9c6b47d3d9c313b5f09c1de1b6769f6b6d18b4) --- docs/conf.py | 90 ++++++++++--------- docs/exts/docs_build/fetch_inventories.py | 25 ++++-- .../docs_build/third_party_inventories.py | 52 +++++++++++ 3 files changed, 119 insertions(+), 48 deletions(-) create mode 100644 docs/exts/docs_build/third_party_inventories.py diff --git a/docs/conf.py b/docs/conf.py index bf93803fcb..845e0e75c7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,10 +40,14 @@ import yaml import airflow from airflow.configuration import default_config_yaml +from docs.exts.docs_build.third_party_inventories import ( # pylint: disable=no-name-in-module,wrong-import-order + THIRD_PARTY_INDEXES, +) sys.path.append(os.path.join(os.path.dirname(__file__), 'exts')) CONF_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) +INVENTORY_CACHE_DIR = os.path.join(CONF_DIR, '_inventory_cache') ROOT_DIR = os.path.abspath(os.path.join(CONF_DIR, os.pardir)) FOR_PRODUCTION = os.environ.get('AIRFLOW_FOR_PRODUCTION', 'false') == 'true' @@ -70,7 +74,7 @@ elif PACKAGE_NAME.startswith('apache-airflow-providers-'): else: PACKAGE_DIR = None PACKAGE_VERSION = 'master' -# Adds to environment variables for easy access from other plugins like airflow_internsphinx. +# Adds to environment variables for easy access from other plugins like airflow_intersphinx. os.environ['AIRFLOW_PACKAGE_NAME'] = PACKAGE_NAME if PACKAGE_DIR: os.environ['AIRFLOW_PACKAGE_DIR'] = PACKAGE_DIR @@ -383,52 +387,56 @@ autodoc_default_options = {'show-inheritance': True, 'members': True} # -- Options for sphinx.ext.intersphinx ---------------------------------------- # See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html -# This config value contains the locations and names of other projects that should +# This config value contains names of other projects that should # be linked to in this documentation. +# Inventories are only downloaded once by docs/exts/docs_build/fetch_inventories.py. intersphinx_mapping = { - 'boto3': ('https://boto3.amazonaws.com/v1/documentation/api/latest/', None), - 'celery': ('https://docs.celeryproject.org/en/stable/', None), - 'hdfs': ('https://hdfscli.readthedocs.io/en/latest/', None), - 'jinja2': ('https://jinja.palletsprojects.com/en/master/', None), - 'mongodb': ('https://api.mongodb.com/python/current/', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'python': ('https://docs.python.org/3/', None), - 'requests': ('https://requests.readthedocs.io/en/master/', None), - 'sqlalchemy': ('https://docs.sqlalchemy.org/en/latest/', None), + pkg_name: (f"{THIRD_PARTY_INDEXES[pkg_name]}/", (f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',)) + for pkg_name in [ + 'boto3', + 'celery', + 'hdfs', + 'jinja2', + 'mongodb', + 'pandas', + 'python', + 'requests', + 'sqlalchemy', + ] } if PACKAGE_NAME in ('apache-airflow-providers-google', 'apache-airflow'): intersphinx_mapping.update( { - 'google-api-core': ('https://googleapis.dev/python/google-api-core/latest', None), - 'google-cloud-automl': ('https://googleapis.dev/python/automl/latest', None), - 'google-cloud-bigquery': ('https://googleapis.dev/python/bigquery/latest', None), - 'google-cloud-bigquery-datatransfer': ( - 'https://googleapis.dev/python/bigquerydatatransfer/latest', - None, - ), - 'google-cloud-bigquery-storage': ('https://googleapis.dev/python/bigquerystorage/latest', None), - 'google-cloud-bigtable': ('https://googleapis.dev/python/bigtable/latest', None), - 'google-cloud-container': ('https://googleapis.dev/python/container/latest', None), - 'google-cloud-core': ('https://googleapis.dev/python/google-cloud-core/latest', None), - 'google-cloud-datacatalog': ('https://googleapis.dev/python/datacatalog/latest', None), - 'google-cloud-datastore': ('https://googleapis.dev/python/datastore/latest', None), - 'google-cloud-dlp': ('https://googleapis.dev/python/dlp/latest', None), - 'google-cloud-kms': ('https://googleapis.dev/python/cloudkms/latest', None), - 'google-cloud-language': ('https://googleapis.dev/python/language/latest', None), - 'google-cloud-monitoring': ('https://googleapis.dev/python/monitoring/latest', None), - 'google-cloud-pubsub': ('https://googleapis.dev/python/pubsub/latest', None), - 'google-cloud-redis': ('https://googleapis.dev/python/redis/latest', None), - 'google-cloud-spanner': ('https://googleapis.dev/python/spanner/latest', None), - 'google-cloud-speech': ('https://googleapis.dev/python/speech/latest', None), - 'google-cloud-storage': ('https://googleapis.dev/python/storage/latest', None), - 'google-cloud-tasks': ('https://googleapis.dev/python/cloudtasks/latest', None), - 'google-cloud-texttospeech': ('https://googleapis.dev/python/texttospeech/latest', None), - 'google-cloud-translate': ('https://googleapis.dev/python/translation/latest', None), - 'google-cloud-videointelligence': ( - 'https://googleapis.dev/python/videointelligence/latest', - None, - ), - 'google-cloud-vision': ('https://googleapis.dev/python/vision/latest', None), + pkg_name: ( + f"{THIRD_PARTY_INDEXES[pkg_name]}/", + (f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',), + ) + for pkg_name in [ + 'google-api-core', + 'google-cloud-automl', + 'google-cloud-bigquery', + 'google-cloud-bigquery-datatransfer', + 'google-cloud-bigquery-storage', + 'google-cloud-bigtable', + 'google-cloud-container', + 'google-cloud-core', + 'google-cloud-datacatalog', + 'google-cloud-datastore', + 'google-cloud-dlp', + 'google-cloud-kms', + 'google-cloud-language', + 'google-cloud-monitoring', + 'google-cloud-pubsub', + 'google-cloud-redis', + 'google-cloud-spanner', + 'google-cloud-speech', + 'google-cloud-storage', + 'google-cloud-tasks', + 'google-cloud-texttospeech', + 'google-cloud-translate', + 'google-cloud-videointelligence', + 'google-cloud-vision', + ] } ) diff --git a/docs/exts/docs_build/fetch_inventories.py b/docs/exts/docs_build/fetch_inventories.py index a7eb3f1b8a..147d9c2445 100644 --- a/docs/exts/docs_build/fetch_inventories.py +++ b/docs/exts/docs_build/fetch_inventories.py @@ -27,6 +27,7 @@ from requests.adapters import DEFAULT_POOLSIZE from docs.exts.docs_build.docs_builder import ( # pylint: disable=no-name-in-module get_available_providers_packages, ) +from docs.exts.docs_build.third_party_inventories import THIRD_PARTY_INDEXES CURRENT_DIR = os.path.dirname(__file__) ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir)) @@ -40,7 +41,6 @@ S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv" def _fetch_file(session: requests.Session, url: str, path: str): - response = session.get(url, allow_redirects=True, stream=True) if not response.ok: print(f"Failed to fetch inventory: {url}") @@ -54,12 +54,14 @@ def _fetch_file(session: requests.Session, url: str, path: str): def _is_outdated(path: str): + if not os.path.exists(path): + return True delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path)) - return delta < datetime.timedelta(hours=12) + return delta > datetime.timedelta(hours=12) def fetch_inventories(): - """Fetch all inventories for Airflow documentatio packages and store in cache.""" + """Fetch all inventories for Airflow documentation packages and store in cache.""" os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True) to_download = [] @@ -82,12 +84,21 @@ def fetch_inventories(): f'{CACHE_DIR}/apache-airflow-providers/objects.inv', ) ) - to_download = [ - (url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path)) - ] - print(f"To download {len(to_download)} inventorie(s)") + to_download.extend( + ( + f"{doc_url}/objects.inv", + f'{CACHE_DIR}/{pkg_name}/objects.inv', + ) + for pkg_name, doc_url in THIRD_PARTY_INDEXES.items() + ) + + to_download = [(url, path) for url, path in to_download if _is_outdated(path)] if not to_download: + print("Nothing to do") return + + print(f"To download {len(to_download)} inventorie(s)") + with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool: for url, path in to_download: pool.submit(_fetch_file, session=session, url=url, path=path) diff --git a/docs/exts/docs_build/third_party_inventories.py b/docs/exts/docs_build/third_party_inventories.py new file mode 100644 index 0000000000..02defdf14c --- /dev/null +++ b/docs/exts/docs_build/third_party_inventories.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +THIRD_PARTY_INDEXES = { + 'boto3': 'https://boto3.amazonaws.com/v1/documentation/api/latest', + 'celery': 'https://docs.celeryproject.org/en/stable', + 'hdfs': 'https://hdfscli.readthedocs.io/en/latest', + 'jinja2': 'https://jinja.palletsprojects.com/en/master', + 'mongodb': 'https://api.mongodb.com/python/current', + 'pandas': 'https://pandas.pydata.org/pandas-docs/stable', + 'python': 'https://docs.python.org/3', + 'requests': 'https://requests.readthedocs.io/en/master', + 'sqlalchemy': 'https://docs.sqlalchemy.org/en/latest', + 'google-api-core': 'https://googleapis.dev/python/google-api-core/latest', + 'google-cloud-automl': 'https://googleapis.dev/python/automl/latest', + 'google-cloud-bigquery': 'https://googleapis.dev/python/bigquery/latest', + 'google-cloud-bigquery-datatransfer': 'https://googleapis.dev/python/bigquerydatatransfer/latest', + 'google-cloud-bigquery-storage': 'https://googleapis.dev/python/bigquerystorage/latest', + 'google-cloud-bigtable': 'https://googleapis.dev/python/bigtable/latest', + 'google-cloud-container': 'https://googleapis.dev/python/container/latest', + 'google-cloud-core': 'https://googleapis.dev/python/google-cloud-core/latest', + 'google-cloud-datacatalog': 'https://googleapis.dev/python/datacatalog/latest', + 'google-cloud-datastore': 'https://googleapis.dev/python/datastore/latest', + 'google-cloud-dlp': 'https://googleapis.dev/python/dlp/latest', + 'google-cloud-kms': 'https://googleapis.dev/python/cloudkms/latest', + 'google-cloud-language': 'https://googleapis.dev/python/language/latest', + 'google-cloud-monitoring': 'https://googleapis.dev/python/monitoring/latest', + 'google-cloud-pubsub': 'https://googleapis.dev/python/pubsub/latest', + 'google-cloud-redis': 'https://googleapis.dev/python/redis/latest', + 'google-cloud-spanner': 'https://googleapis.dev/python/spanner/latest', + 'google-cloud-speech': 'https://googleapis.dev/python/speech/latest', + 'google-cloud-storage': 'https://googleapis.dev/python/storage/latest', + 'google-cloud-tasks': 'https://googleapis.dev/python/cloudtasks/latest', + 'google-cloud-texttospeech': 'https://googleapis.dev/python/texttospeech/latest', + 'google-cloud-translate': 'https://googleapis.dev/python/translation/latest', + 'google-cloud-videointelligence': 'https://googleapis.dev/python/videointelligence/latest', + 'google-cloud-vision': 'https://googleapis.dev/python/vision/latest', +}