Fetch inventories for third-party services only once (#13068)
* Fetch inventories for third-party services only once
* fixup! Fetch inventories for third-party services only once
(cherry picked from commit fa9c6b47d3
)
This commit is contained in:
Родитель
3e0c71c5a9
Коммит
763771013b
90
docs/conf.py
90
docs/conf.py
|
@ -40,10 +40,14 @@ import yaml
|
||||||
|
|
||||||
import airflow
|
import airflow
|
||||||
from airflow.configuration import default_config_yaml
|
from airflow.configuration import default_config_yaml
|
||||||
|
from docs.exts.docs_build.third_party_inventories import ( # pylint: disable=no-name-in-module,wrong-import-order
|
||||||
|
THIRD_PARTY_INDEXES,
|
||||||
|
)
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'exts'))
|
sys.path.append(os.path.join(os.path.dirname(__file__), 'exts'))
|
||||||
|
|
||||||
CONF_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
|
CONF_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__)))
|
||||||
|
INVENTORY_CACHE_DIR = os.path.join(CONF_DIR, '_inventory_cache')
|
||||||
ROOT_DIR = os.path.abspath(os.path.join(CONF_DIR, os.pardir))
|
ROOT_DIR = os.path.abspath(os.path.join(CONF_DIR, os.pardir))
|
||||||
FOR_PRODUCTION = os.environ.get('AIRFLOW_FOR_PRODUCTION', 'false') == 'true'
|
FOR_PRODUCTION = os.environ.get('AIRFLOW_FOR_PRODUCTION', 'false') == 'true'
|
||||||
|
|
||||||
|
@ -70,7 +74,7 @@ elif PACKAGE_NAME.startswith('apache-airflow-providers-'):
|
||||||
else:
|
else:
|
||||||
PACKAGE_DIR = None
|
PACKAGE_DIR = None
|
||||||
PACKAGE_VERSION = 'master'
|
PACKAGE_VERSION = 'master'
|
||||||
# Adds to environment variables for easy access from other plugins like airflow_internsphinx.
|
# Adds to environment variables for easy access from other plugins like airflow_intersphinx.
|
||||||
os.environ['AIRFLOW_PACKAGE_NAME'] = PACKAGE_NAME
|
os.environ['AIRFLOW_PACKAGE_NAME'] = PACKAGE_NAME
|
||||||
if PACKAGE_DIR:
|
if PACKAGE_DIR:
|
||||||
os.environ['AIRFLOW_PACKAGE_DIR'] = PACKAGE_DIR
|
os.environ['AIRFLOW_PACKAGE_DIR'] = PACKAGE_DIR
|
||||||
|
@ -383,52 +387,56 @@ autodoc_default_options = {'show-inheritance': True, 'members': True}
|
||||||
# -- Options for sphinx.ext.intersphinx ----------------------------------------
|
# -- Options for sphinx.ext.intersphinx ----------------------------------------
|
||||||
# See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
|
# See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
|
||||||
|
|
||||||
# This config value contains the locations and names of other projects that should
|
# This config value contains names of other projects that should
|
||||||
# be linked to in this documentation.
|
# be linked to in this documentation.
|
||||||
|
# Inventories are only downloaded once by docs/exts/docs_build/fetch_inventories.py.
|
||||||
intersphinx_mapping = {
|
intersphinx_mapping = {
|
||||||
'boto3': ('https://boto3.amazonaws.com/v1/documentation/api/latest/', None),
|
pkg_name: (f"{THIRD_PARTY_INDEXES[pkg_name]}/", (f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',))
|
||||||
'celery': ('https://docs.celeryproject.org/en/stable/', None),
|
for pkg_name in [
|
||||||
'hdfs': ('https://hdfscli.readthedocs.io/en/latest/', None),
|
'boto3',
|
||||||
'jinja2': ('https://jinja.palletsprojects.com/en/master/', None),
|
'celery',
|
||||||
'mongodb': ('https://api.mongodb.com/python/current/', None),
|
'hdfs',
|
||||||
'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
|
'jinja2',
|
||||||
'python': ('https://docs.python.org/3/', None),
|
'mongodb',
|
||||||
'requests': ('https://requests.readthedocs.io/en/master/', None),
|
'pandas',
|
||||||
'sqlalchemy': ('https://docs.sqlalchemy.org/en/latest/', None),
|
'python',
|
||||||
|
'requests',
|
||||||
|
'sqlalchemy',
|
||||||
|
]
|
||||||
}
|
}
|
||||||
if PACKAGE_NAME in ('apache-airflow-providers-google', 'apache-airflow'):
|
if PACKAGE_NAME in ('apache-airflow-providers-google', 'apache-airflow'):
|
||||||
intersphinx_mapping.update(
|
intersphinx_mapping.update(
|
||||||
{
|
{
|
||||||
'google-api-core': ('https://googleapis.dev/python/google-api-core/latest', None),
|
pkg_name: (
|
||||||
'google-cloud-automl': ('https://googleapis.dev/python/automl/latest', None),
|
f"{THIRD_PARTY_INDEXES[pkg_name]}/",
|
||||||
'google-cloud-bigquery': ('https://googleapis.dev/python/bigquery/latest', None),
|
(f'{INVENTORY_CACHE_DIR}/{pkg_name}/objects.inv',),
|
||||||
'google-cloud-bigquery-datatransfer': (
|
)
|
||||||
'https://googleapis.dev/python/bigquerydatatransfer/latest',
|
for pkg_name in [
|
||||||
None,
|
'google-api-core',
|
||||||
),
|
'google-cloud-automl',
|
||||||
'google-cloud-bigquery-storage': ('https://googleapis.dev/python/bigquerystorage/latest', None),
|
'google-cloud-bigquery',
|
||||||
'google-cloud-bigtable': ('https://googleapis.dev/python/bigtable/latest', None),
|
'google-cloud-bigquery-datatransfer',
|
||||||
'google-cloud-container': ('https://googleapis.dev/python/container/latest', None),
|
'google-cloud-bigquery-storage',
|
||||||
'google-cloud-core': ('https://googleapis.dev/python/google-cloud-core/latest', None),
|
'google-cloud-bigtable',
|
||||||
'google-cloud-datacatalog': ('https://googleapis.dev/python/datacatalog/latest', None),
|
'google-cloud-container',
|
||||||
'google-cloud-datastore': ('https://googleapis.dev/python/datastore/latest', None),
|
'google-cloud-core',
|
||||||
'google-cloud-dlp': ('https://googleapis.dev/python/dlp/latest', None),
|
'google-cloud-datacatalog',
|
||||||
'google-cloud-kms': ('https://googleapis.dev/python/cloudkms/latest', None),
|
'google-cloud-datastore',
|
||||||
'google-cloud-language': ('https://googleapis.dev/python/language/latest', None),
|
'google-cloud-dlp',
|
||||||
'google-cloud-monitoring': ('https://googleapis.dev/python/monitoring/latest', None),
|
'google-cloud-kms',
|
||||||
'google-cloud-pubsub': ('https://googleapis.dev/python/pubsub/latest', None),
|
'google-cloud-language',
|
||||||
'google-cloud-redis': ('https://googleapis.dev/python/redis/latest', None),
|
'google-cloud-monitoring',
|
||||||
'google-cloud-spanner': ('https://googleapis.dev/python/spanner/latest', None),
|
'google-cloud-pubsub',
|
||||||
'google-cloud-speech': ('https://googleapis.dev/python/speech/latest', None),
|
'google-cloud-redis',
|
||||||
'google-cloud-storage': ('https://googleapis.dev/python/storage/latest', None),
|
'google-cloud-spanner',
|
||||||
'google-cloud-tasks': ('https://googleapis.dev/python/cloudtasks/latest', None),
|
'google-cloud-speech',
|
||||||
'google-cloud-texttospeech': ('https://googleapis.dev/python/texttospeech/latest', None),
|
'google-cloud-storage',
|
||||||
'google-cloud-translate': ('https://googleapis.dev/python/translation/latest', None),
|
'google-cloud-tasks',
|
||||||
'google-cloud-videointelligence': (
|
'google-cloud-texttospeech',
|
||||||
'https://googleapis.dev/python/videointelligence/latest',
|
'google-cloud-translate',
|
||||||
None,
|
'google-cloud-videointelligence',
|
||||||
),
|
'google-cloud-vision',
|
||||||
'google-cloud-vision': ('https://googleapis.dev/python/vision/latest', None),
|
]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ from requests.adapters import DEFAULT_POOLSIZE
|
||||||
from docs.exts.docs_build.docs_builder import ( # pylint: disable=no-name-in-module
|
from docs.exts.docs_build.docs_builder import ( # pylint: disable=no-name-in-module
|
||||||
get_available_providers_packages,
|
get_available_providers_packages,
|
||||||
)
|
)
|
||||||
|
from docs.exts.docs_build.third_party_inventories import THIRD_PARTY_INDEXES
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(__file__)
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir))
|
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir))
|
||||||
|
@ -40,7 +41,6 @@ S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv"
|
||||||
|
|
||||||
|
|
||||||
def _fetch_file(session: requests.Session, url: str, path: str):
|
def _fetch_file(session: requests.Session, url: str, path: str):
|
||||||
|
|
||||||
response = session.get(url, allow_redirects=True, stream=True)
|
response = session.get(url, allow_redirects=True, stream=True)
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
print(f"Failed to fetch inventory: {url}")
|
print(f"Failed to fetch inventory: {url}")
|
||||||
|
@ -54,12 +54,14 @@ def _fetch_file(session: requests.Session, url: str, path: str):
|
||||||
|
|
||||||
|
|
||||||
def _is_outdated(path: str):
|
def _is_outdated(path: str):
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return True
|
||||||
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
||||||
return delta < datetime.timedelta(hours=12)
|
return delta > datetime.timedelta(hours=12)
|
||||||
|
|
||||||
|
|
||||||
def fetch_inventories():
|
def fetch_inventories():
|
||||||
"""Fetch all inventories for Airflow documentatio packages and store in cache."""
|
"""Fetch all inventories for Airflow documentation packages and store in cache."""
|
||||||
os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
|
os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
|
||||||
to_download = []
|
to_download = []
|
||||||
|
|
||||||
|
@ -82,12 +84,21 @@ def fetch_inventories():
|
||||||
f'{CACHE_DIR}/apache-airflow-providers/objects.inv',
|
f'{CACHE_DIR}/apache-airflow-providers/objects.inv',
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
to_download = [
|
to_download.extend(
|
||||||
(url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path))
|
(
|
||||||
]
|
f"{doc_url}/objects.inv",
|
||||||
print(f"To download {len(to_download)} inventorie(s)")
|
f'{CACHE_DIR}/{pkg_name}/objects.inv',
|
||||||
|
)
|
||||||
|
for pkg_name, doc_url in THIRD_PARTY_INDEXES.items()
|
||||||
|
)
|
||||||
|
|
||||||
|
to_download = [(url, path) for url, path in to_download if _is_outdated(path)]
|
||||||
if not to_download:
|
if not to_download:
|
||||||
|
print("Nothing to do")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
print(f"To download {len(to_download)} inventorie(s)")
|
||||||
|
|
||||||
with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
|
with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
|
||||||
for url, path in to_download:
|
for url, path in to_download:
|
||||||
pool.submit(_fetch_file, session=session, url=url, path=path)
|
pool.submit(_fetch_file, session=session, url=url, path=path)
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
THIRD_PARTY_INDEXES = {
|
||||||
|
'boto3': 'https://boto3.amazonaws.com/v1/documentation/api/latest',
|
||||||
|
'celery': 'https://docs.celeryproject.org/en/stable',
|
||||||
|
'hdfs': 'https://hdfscli.readthedocs.io/en/latest',
|
||||||
|
'jinja2': 'https://jinja.palletsprojects.com/en/master',
|
||||||
|
'mongodb': 'https://api.mongodb.com/python/current',
|
||||||
|
'pandas': 'https://pandas.pydata.org/pandas-docs/stable',
|
||||||
|
'python': 'https://docs.python.org/3',
|
||||||
|
'requests': 'https://requests.readthedocs.io/en/master',
|
||||||
|
'sqlalchemy': 'https://docs.sqlalchemy.org/en/latest',
|
||||||
|
'google-api-core': 'https://googleapis.dev/python/google-api-core/latest',
|
||||||
|
'google-cloud-automl': 'https://googleapis.dev/python/automl/latest',
|
||||||
|
'google-cloud-bigquery': 'https://googleapis.dev/python/bigquery/latest',
|
||||||
|
'google-cloud-bigquery-datatransfer': 'https://googleapis.dev/python/bigquerydatatransfer/latest',
|
||||||
|
'google-cloud-bigquery-storage': 'https://googleapis.dev/python/bigquerystorage/latest',
|
||||||
|
'google-cloud-bigtable': 'https://googleapis.dev/python/bigtable/latest',
|
||||||
|
'google-cloud-container': 'https://googleapis.dev/python/container/latest',
|
||||||
|
'google-cloud-core': 'https://googleapis.dev/python/google-cloud-core/latest',
|
||||||
|
'google-cloud-datacatalog': 'https://googleapis.dev/python/datacatalog/latest',
|
||||||
|
'google-cloud-datastore': 'https://googleapis.dev/python/datastore/latest',
|
||||||
|
'google-cloud-dlp': 'https://googleapis.dev/python/dlp/latest',
|
||||||
|
'google-cloud-kms': 'https://googleapis.dev/python/cloudkms/latest',
|
||||||
|
'google-cloud-language': 'https://googleapis.dev/python/language/latest',
|
||||||
|
'google-cloud-monitoring': 'https://googleapis.dev/python/monitoring/latest',
|
||||||
|
'google-cloud-pubsub': 'https://googleapis.dev/python/pubsub/latest',
|
||||||
|
'google-cloud-redis': 'https://googleapis.dev/python/redis/latest',
|
||||||
|
'google-cloud-spanner': 'https://googleapis.dev/python/spanner/latest',
|
||||||
|
'google-cloud-speech': 'https://googleapis.dev/python/speech/latest',
|
||||||
|
'google-cloud-storage': 'https://googleapis.dev/python/storage/latest',
|
||||||
|
'google-cloud-tasks': 'https://googleapis.dev/python/cloudtasks/latest',
|
||||||
|
'google-cloud-texttospeech': 'https://googleapis.dev/python/texttospeech/latest',
|
||||||
|
'google-cloud-translate': 'https://googleapis.dev/python/translation/latest',
|
||||||
|
'google-cloud-videointelligence': 'https://googleapis.dev/python/videointelligence/latest',
|
||||||
|
'google-cloud-vision': 'https://googleapis.dev/python/vision/latest',
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче