Fetch inventories for third-party services only once (#13068)

* Fetch inventories for third-party services only once

* fixup! Fetch inventories for third-party services only once

(cherry picked from commit fa9c6b47d3)
This commit is contained in:
Kamil Breguła 2020-12-14 22:59:34 +01:00 коммит произвёл Kaxil Naik
Родитель 3e0c71c5a9
Коммит 763771013b
3 изменённых файлов: 119 добавлений и 48 удалений

Просмотреть файл

@ -40,10 +40,14 @@ import yaml
import airflow import airflow
from airflow.configuration import default_config_yaml from airflow.configuration import default_config_yaml
from docs.exts.docs_build.third_party_inventories import ( # pylint: disable=no-name-in-module,wrong-import-order
THIRD_PARTY_INDEXES,
)
sys.path.append(os.path.join(os.path.dirname(__file__), 'exts')) sys.path.append(os.path.join(os.path.dirname(__file__), 'exts'))
CONF_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) CONF_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
INVENTORY_CACHE_DIR = os.path.join(CONF_DIR, '_inventory_cache')
ROOT_DIR = os.path.abspath(os.path.join(CONF_DIR, os.pardir)) ROOT_DIR = os.path.abspath(os.path.join(CONF_DIR, os.pardir))
FOR_PRODUCTION = os.environ.get('AIRFLOW_FOR_PRODUCTION', 'false') == 'true' FOR_PRODUCTION = os.environ.get('AIRFLOW_FOR_PRODUCTION', 'false') == 'true'
@ -70,7 +74,7 @@ elif PACKAGE_NAME.startswith('apache-airflow-providers-'):
else: else:
PACKAGE_DIR = None PACKAGE_DIR = None
PACKAGE_VERSION = 'master' PACKAGE_VERSION = 'master'
# Adds to environment variables for easy access from other plugins like airflow_internsphinx. # Adds to environment variables for easy access from other plugins like airflow_intersphinx.
os.environ['AIRFLOW_PACKAGE_NAME'] = PACKAGE_NAME os.environ['AIRFLOW_PACKAGE_NAME'] = PACKAGE_NAME
if PACKAGE_DIR: if PACKAGE_DIR:
os.environ['AIRFLOW_PACKAGE_DIR'] = PACKAGE_DIR os.environ['AIRFLOW_PACKAGE_DIR'] = PACKAGE_DIR
@ -383,52 +387,56 @@ autodoc_default_options = {'show-inheritance': True, 'members': True}
# -- Options for sphinx.ext.intersphinx ---------------------------------------- # -- Options for sphinx.ext.intersphinx ----------------------------------------
# See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html # See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
# This config value contains the locations and names of other projects that should # This config value contains names of other projects that should
# be linked to in this documentation. # be linked to in this documentation.
# Inventories are only downloaded once by docs/exts/docs_build/fetch_inventories.py.
intersphinx_mapping = { intersphinx_mapping = {
'boto3': ('https://boto3.amazonaws.com/v1/documentation/api/latest/', None), pkg_name: (f"{THIRD_PARTY_INDEXES[pkg_name]}/", (f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',))
'celery': ('https://docs.celeryproject.org/en/stable/', None), for pkg_name in [
'hdfs': ('https://hdfscli.readthedocs.io/en/latest/', None), 'boto3',
'jinja2': ('https://jinja.palletsprojects.com/en/master/', None), 'celery',
'mongodb': ('https://api.mongodb.com/python/current/', None), 'hdfs',
'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'jinja2',
'python': ('https://docs.python.org/3/', None), 'mongodb',
'requests': ('https://requests.readthedocs.io/en/master/', None), 'pandas',
'sqlalchemy': ('https://docs.sqlalchemy.org/en/latest/', None), 'python',
'requests',
'sqlalchemy',
]
} }
if PACKAGE_NAME in ('apache-airflow-providers-google', 'apache-airflow'): if PACKAGE_NAME in ('apache-airflow-providers-google', 'apache-airflow'):
intersphinx_mapping.update( intersphinx_mapping.update(
{ {
'google-api-core': ('https://googleapis.dev/python/google-api-core/latest', None), pkg_name: (
'google-cloud-automl': ('https://googleapis.dev/python/automl/latest', None), f"{THIRD_PARTY_INDEXES[pkg_name]}/",
'google-cloud-bigquery': ('https://googleapis.dev/python/bigquery/latest', None), (f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',),
'google-cloud-bigquery-datatransfer': ( )
'https://googleapis.dev/python/bigquerydatatransfer/latest', for pkg_name in [
None, 'google-api-core',
), 'google-cloud-automl',
'google-cloud-bigquery-storage': ('https://googleapis.dev/python/bigquerystorage/latest', None), 'google-cloud-bigquery',
'google-cloud-bigtable': ('https://googleapis.dev/python/bigtable/latest', None), 'google-cloud-bigquery-datatransfer',
'google-cloud-container': ('https://googleapis.dev/python/container/latest', None), 'google-cloud-bigquery-storage',
'google-cloud-core': ('https://googleapis.dev/python/google-cloud-core/latest', None), 'google-cloud-bigtable',
'google-cloud-datacatalog': ('https://googleapis.dev/python/datacatalog/latest', None), 'google-cloud-container',
'google-cloud-datastore': ('https://googleapis.dev/python/datastore/latest', None), 'google-cloud-core',
'google-cloud-dlp': ('https://googleapis.dev/python/dlp/latest', None), 'google-cloud-datacatalog',
'google-cloud-kms': ('https://googleapis.dev/python/cloudkms/latest', None), 'google-cloud-datastore',
'google-cloud-language': ('https://googleapis.dev/python/language/latest', None), 'google-cloud-dlp',
'google-cloud-monitoring': ('https://googleapis.dev/python/monitoring/latest', None), 'google-cloud-kms',
'google-cloud-pubsub': ('https://googleapis.dev/python/pubsub/latest', None), 'google-cloud-language',
'google-cloud-redis': ('https://googleapis.dev/python/redis/latest', None), 'google-cloud-monitoring',
'google-cloud-spanner': ('https://googleapis.dev/python/spanner/latest', None), 'google-cloud-pubsub',
'google-cloud-speech': ('https://googleapis.dev/python/speech/latest', None), 'google-cloud-redis',
'google-cloud-storage': ('https://googleapis.dev/python/storage/latest', None), 'google-cloud-spanner',
'google-cloud-tasks': ('https://googleapis.dev/python/cloudtasks/latest', None), 'google-cloud-speech',
'google-cloud-texttospeech': ('https://googleapis.dev/python/texttospeech/latest', None), 'google-cloud-storage',
'google-cloud-translate': ('https://googleapis.dev/python/translation/latest', None), 'google-cloud-tasks',
'google-cloud-videointelligence': ( 'google-cloud-texttospeech',
'https://googleapis.dev/python/videointelligence/latest', 'google-cloud-translate',
None, 'google-cloud-videointelligence',
), 'google-cloud-vision',
'google-cloud-vision': ('https://googleapis.dev/python/vision/latest', None), ]
} }
) )

Просмотреть файл

@ -27,6 +27,7 @@ from requests.adapters import DEFAULT_POOLSIZE
from docs.exts.docs_build.docs_builder import ( # pylint: disable=no-name-in-module from docs.exts.docs_build.docs_builder import ( # pylint: disable=no-name-in-module
get_available_providers_packages, get_available_providers_packages,
) )
from docs.exts.docs_build.third_party_inventories import THIRD_PARTY_INDEXES
CURRENT_DIR = os.path.dirname(__file__) CURRENT_DIR = os.path.dirname(__file__)
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir)) ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir))
@ -40,7 +41,6 @@ S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv"
def _fetch_file(session: requests.Session, url: str, path: str): def _fetch_file(session: requests.Session, url: str, path: str):
response = session.get(url, allow_redirects=True, stream=True) response = session.get(url, allow_redirects=True, stream=True)
if not response.ok: if not response.ok:
print(f"Failed to fetch inventory: {url}") print(f"Failed to fetch inventory: {url}")
@ -54,12 +54,14 @@ def _fetch_file(session: requests.Session, url: str, path: str):
def _is_outdated(path: str): def _is_outdated(path: str):
if not os.path.exists(path):
return True
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path)) delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path))
return delta < datetime.timedelta(hours=12) return delta > datetime.timedelta(hours=12)
def fetch_inventories(): def fetch_inventories():
"""Fetch all inventories for Airflow documentatio packages and store in cache.""" """Fetch all inventories for Airflow documentation packages and store in cache."""
os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True) os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
to_download = [] to_download = []
@ -82,12 +84,21 @@ def fetch_inventories():
f'{CACHE_DIR}/apache-airflow-providers/objects.inv', f'{CACHE_DIR}/apache-airflow-providers/objects.inv',
) )
) )
to_download = [ to_download.extend(
(url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path)) (
] f"{doc_url}/objects.inv",
print(f"To download {len(to_download)} inventorie(s)") f'{CACHE_DIR}/{pkg_name}/objects.inv',
)
for pkg_name, doc_url in THIRD_PARTY_INDEXES.items()
)
to_download = [(url, path) for url, path in to_download if _is_outdated(path)]
if not to_download: if not to_download:
print("Nothing to do")
return return
print(f"To download {len(to_download)} inventorie(s)")
with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool: with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
for url, path in to_download: for url, path in to_download:
pool.submit(_fetch_file, session=session, url=url, path=path) pool.submit(_fetch_file, session=session, url=url, path=path)

Просмотреть файл

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
THIRD_PARTY_INDEXES = {
'boto3': 'https://boto3.amazonaws.com/v1/documentation/api/latest',
'celery': 'https://docs.celeryproject.org/en/stable',
'hdfs': 'https://hdfscli.readthedocs.io/en/latest',
'jinja2': 'https://jinja.palletsprojects.com/en/master',
'mongodb': 'https://api.mongodb.com/python/current',
'pandas': 'https://pandas.pydata.org/pandas-docs/stable',
'python': 'https://docs.python.org/3',
'requests': 'https://requests.readthedocs.io/en/master',
'sqlalchemy': 'https://docs.sqlalchemy.org/en/latest',
'google-api-core': 'https://googleapis.dev/python/google-api-core/latest',
'google-cloud-automl': 'https://googleapis.dev/python/automl/latest',
'google-cloud-bigquery': 'https://googleapis.dev/python/bigquery/latest',
'google-cloud-bigquery-datatransfer': 'https://googleapis.dev/python/bigquerydatatransfer/latest',
'google-cloud-bigquery-storage': 'https://googleapis.dev/python/bigquerystorage/latest',
'google-cloud-bigtable': 'https://googleapis.dev/python/bigtable/latest',
'google-cloud-container': 'https://googleapis.dev/python/container/latest',
'google-cloud-core': 'https://googleapis.dev/python/google-cloud-core/latest',
'google-cloud-datacatalog': 'https://googleapis.dev/python/datacatalog/latest',
'google-cloud-datastore': 'https://googleapis.dev/python/datastore/latest',
'google-cloud-dlp': 'https://googleapis.dev/python/dlp/latest',
'google-cloud-kms': 'https://googleapis.dev/python/cloudkms/latest',
'google-cloud-language': 'https://googleapis.dev/python/language/latest',
'google-cloud-monitoring': 'https://googleapis.dev/python/monitoring/latest',
'google-cloud-pubsub': 'https://googleapis.dev/python/pubsub/latest',
'google-cloud-redis': 'https://googleapis.dev/python/redis/latest',
'google-cloud-spanner': 'https://googleapis.dev/python/spanner/latest',
'google-cloud-speech': 'https://googleapis.dev/python/speech/latest',
'google-cloud-storage': 'https://googleapis.dev/python/storage/latest',
'google-cloud-tasks': 'https://googleapis.dev/python/cloudtasks/latest',
'google-cloud-texttospeech': 'https://googleapis.dev/python/texttospeech/latest',
'google-cloud-translate': 'https://googleapis.dev/python/translation/latest',
'google-cloud-videointelligence': 'https://googleapis.dev/python/videointelligence/latest',
'google-cloud-vision': 'https://googleapis.dev/python/vision/latest',
}