Download inventories only once (#12989)
This commit is contained in:
Родитель
2ec03cd926
Коммит
aa58ef1501
|
@ -87,9 +87,9 @@ instance/
|
|||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
docs/_api/
|
||||
docs/_doctrees/
|
||||
docs/_inventory_cache/
|
||||
docs/*/_api/
|
||||
docs/_doctrees
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
|
|
@ -33,6 +33,7 @@ from docs.exts.docs_build.errors import ( # pylint: disable=no-name-in-module
|
|||
DocBuildError,
|
||||
display_errors_summary,
|
||||
)
|
||||
from docs.exts.docs_build.fetch_inventories import fetch_inventories
|
||||
from docs.exts.docs_build.github_action_utils import with_group # pylint: disable=no-name-in-module
|
||||
from docs.exts.docs_build.spelling_checks import ( # pylint: disable=no-name-in-module
|
||||
SpellingError,
|
||||
|
@ -83,7 +84,7 @@ def _get_parser():
|
|||
'--for-production',
|
||||
dest='for_production',
|
||||
action='store_true',
|
||||
help=('Builds documentation for official release i.e. all links point to stable version'),
|
||||
help='Builds documentation for official release i.e. all links point to stable version',
|
||||
)
|
||||
|
||||
return parser
|
||||
|
@ -173,6 +174,8 @@ def main():
|
|||
with with_group(f"Documentation will be built for {len(current_packages)} package(s)"):
|
||||
for pkg in current_packages:
|
||||
print(f" - {pkg}")
|
||||
with with_group("Fetching inventories"):
|
||||
fetch_inventories()
|
||||
|
||||
all_build_errors: Dict[Optional[str], List[DocBuildError]] = {}
|
||||
all_spelling_errors: Dict[Optional[str], List[SpellingError]] = {}
|
||||
|
|
|
@ -26,7 +26,6 @@ CURRENT_DIR = os.path.dirname(__file__)
|
|||
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir))
|
||||
DOCS_DIR = os.path.join(ROOT_DIR, 'docs')
|
||||
DOCS_PROVIDER_DIR = os.path.join(ROOT_DIR, 'docs')
|
||||
S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com"
|
||||
|
||||
|
||||
def _create_init_py(app, config):
|
||||
|
@ -51,54 +50,32 @@ def _generate_provider_intersphinx_mapping():
|
|||
continue
|
||||
|
||||
provider_base_url = f'/docs/{package_name}/{current_version}/'
|
||||
doc_inventory = f'{DOCS_DIR}/_build/docs/{package_name}/{current_version}/objects.inv'
|
||||
cache_inventory = f'{DOCS_DIR}/_inventory_cache/{package_name}/objects.inv'
|
||||
|
||||
airflow_mapping[package_name] = (
|
||||
# base URI
|
||||
provider_base_url,
|
||||
# Index locations list
|
||||
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
|
||||
# If we pass a path containing `://` then we will try to index from the given address.
|
||||
# Otherwise, it will try to read the local file
|
||||
#
|
||||
# In this case, the local index will be read. If unsuccessful, the remote index
|
||||
# will be fetched.
|
||||
(
|
||||
f'{DOCS_DIR}/_build/docs/{package_name}/{current_version}/objects.inv',
|
||||
f'{S3_DOC_URL}/docs/{package_name}/latest/objects.inv',
|
||||
),
|
||||
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
|
||||
)
|
||||
if os.environ.get('AIRFLOW_PACKAGE_NAME') != 'apache-airflow':
|
||||
doc_inventory = f'{DOCS_DIR}/_build/docs/apache-airflow/{current_version}/objects.inv'
|
||||
cache_inventory = f'{DOCS_DIR}/_inventory_cache/apache-airflow/objects.inv'
|
||||
|
||||
airflow_mapping['apache-airflow'] = (
|
||||
# base URI
|
||||
f'/docs/apache-airflow/{current_version}/',
|
||||
# Index locations list
|
||||
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
|
||||
# If we pass a path containing `://` then we will try to index from the given address.
|
||||
# Otherwise, it will try to read the local file
|
||||
#
|
||||
# In this case, the local index will be read. If unsuccessful, the remote index
|
||||
# will be fetched.
|
||||
(
|
||||
f'{DOCS_DIR}/_build/docs/apache-airflow/{current_version}/objects.inv',
|
||||
f'{S3_DOC_URL}/docs/apache-airflow/latest/objects.inv',
|
||||
),
|
||||
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
|
||||
)
|
||||
|
||||
if os.environ.get('AIRFLOW_PACKAGE_NAME') != 'apache-airflow-providers':
|
||||
doc_inventory = f'{DOCS_DIR}/_build/docs/apache-airflow-providers/objects.inv'
|
||||
cache_inventory = f'{DOCS_DIR}/_inventory_cache/apache-airflow-providers/objects.inv'
|
||||
|
||||
airflow_mapping['apache-airflow-providers'] = (
|
||||
# base URI
|
||||
'/docs/apache-airflow-providers/',
|
||||
# Index locations list
|
||||
# If passed None, this will try to fetch the index from `[base_url]/objects.inv`
|
||||
# If we pass a path containing `://` then we will try to index from the given address.
|
||||
# Otherwise, it will try to read the local file
|
||||
#
|
||||
# In this case, the local index will be read. If unsuccessful, the remote index
|
||||
# will be fetched.
|
||||
(
|
||||
f'{DOCS_DIR}/_build/docs/apache-airflow-providers/objects.inv',
|
||||
f'{S3_DOC_URL}/docs/apache-airflow-providers/objects.inv',
|
||||
),
|
||||
(doc_inventory if os.path.exists(doc_inventory) else cache_inventory,),
|
||||
)
|
||||
|
||||
return airflow_mapping
|
||||
|
|
|
@ -200,7 +200,12 @@ class AirflowDocsBuilder:
|
|||
print()
|
||||
|
||||
|
||||
def get_available_providers_packages():
|
||||
"""Get list of all available providers packages to build."""
|
||||
return [provider['package-name'] for provider in ALL_PROVIDER_YAMLS]
|
||||
|
||||
|
||||
def get_available_packages():
|
||||
"""Get list of all available packages to build."""
|
||||
provider_package_names = [provider['package-name'] for provider in ALL_PROVIDER_YAMLS]
|
||||
provider_package_names = get_available_providers_packages()
|
||||
return ["apache-airflow", *provider_package_names, "apache-airflow-providers"]
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import concurrent
|
||||
import concurrent.futures
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import requests
|
||||
from requests.adapters import DEFAULT_POOLSIZE
|
||||
|
||||
from docs.exts.docs_build.docs_builder import get_available_providers_packages
|
||||
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
ROOT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir, os.pardir))
|
||||
DOCS_DIR = os.path.join(ROOT_DIR, 'docs')
|
||||
CACHE_DIR = os.path.join(DOCS_DIR, '_inventory_cache')
|
||||
EXPIRATION_DATE_PATH = os.path.join(DOCS_DIR, '_inventory_cache', "expiration-date")
|
||||
|
||||
S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com"
|
||||
S3_DOC_URL_VERSIONED = S3_DOC_URL + "/docs/{package_name}/latest/objects.inv"
|
||||
S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv"
|
||||
|
||||
|
||||
def _fetch_file(session: requests.Session, url: str, path: str):
|
||||
|
||||
response = session.get(url, allow_redirects=True, stream=True)
|
||||
if not response.ok:
|
||||
print(f"Failed to fetch inventory: {url}")
|
||||
return
|
||||
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, 'wb') as f:
|
||||
response.raw.decode_content = True
|
||||
shutil.copyfileobj(response.raw, f)
|
||||
print(f"Fetched inventory: {url}")
|
||||
|
||||
|
||||
def _is_outdated(path: str):
|
||||
delta = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(path))
|
||||
return delta < datetime.timedelta(hours=12)
|
||||
|
||||
|
||||
def fetch_inventories():
|
||||
"""Fetch all inventories for Airflow documentatio packages and store in cache."""
|
||||
os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True)
|
||||
to_download = []
|
||||
|
||||
for pkg_name in get_available_providers_packages():
|
||||
to_download.append(
|
||||
(
|
||||
S3_DOC_URL_VERSIONED.format(package_name=pkg_name),
|
||||
f'{CACHE_DIR}/{pkg_name}/objects.inv',
|
||||
)
|
||||
)
|
||||
to_download.append(
|
||||
(
|
||||
S3_DOC_URL_VERSIONED.format(package_name='apache-airflow'),
|
||||
f'{CACHE_DIR}/apache-airflow/objects.inv',
|
||||
)
|
||||
)
|
||||
to_download.append(
|
||||
(
|
||||
S3_DOC_URL_NON_VERSIONED.format(package_name='apache-airflow-providers'),
|
||||
f'{CACHE_DIR}/apache-airflow-providers/objects.inv',
|
||||
)
|
||||
)
|
||||
to_download = [
|
||||
(url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path))
|
||||
]
|
||||
print(f"To download {len(to_download)} inventorie(s)")
|
||||
if not to_download:
|
||||
return
|
||||
with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool:
|
||||
for url, path in to_download:
|
||||
pool.submit(_fetch_file, session=session, url=url, path=path)
|
Загрузка…
Ссылка в новой задаче