From 523e2f48ca8b10399cebb88d6867482b80dd840c Mon Sep 17 00:00:00 2001 From: Jarek Potiuk <jarek.potiuk@polidea.com> Date: Mon, 4 Jan 2021 11:15:04 +0100 Subject: [PATCH] Additional properties should be allowed in provider schema (#13440) The additional properties should be allowed in provider schema, otherwise future version of providers will not be compatible with older versions of Airflow. Specifying 'additionalProperties' as allowed we are opening up to adding more properties to provider.yaml. This change fixes this is for now by removing extra fields added since the Airlow 2.0.0 schema and verifying that the 2.0.0 schema correctly validates such modified dictionary. In the future we might deprecate 2.0.0 and add >=2.0.1 limitation to the provider packages in which case we will be able to remove this modification of the provider_info dict. Also added additional test for provider packages whether they install on Airflow 2.0.0. This tests might remain even after the deprecation of 2.0.0 - we can just move it to 2.0.1. However this will give us much bigger confidence that the providers will continue work even for older versions of Airflow 2.0. We might have to modify that test and only include the providers that are backwards-compatible, in case we have some providers that depend on future Airflow versions. For now we assume all providers should be installable from master on 2.0.0. --- .github/workflows/ci.yml | 40 +++- ...stomized_form_field_behaviours.schema.json | 2 +- .../provider-2.0.0.yaml.schema.json | 199 ++++++++++++++++++ airflow/provider.yaml.schema.json | 12 +- .../prepare_provider_packages.py | 56 ++++- setup.py | 1 + 6 files changed, 299 insertions(+), 11 deletions(-) create mode 100644 airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1abecddf92..637f39c122 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -387,7 +387,7 @@ jobs: AIRFLOW_EXTRAS: "all" PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} BACKPORT_PACKAGES: "true" - VERSION_SUFFIX_FOR_PYPI: "rc1" + VERSION_SUFFIX_FOR_PYPI: "dev" PACKAGE_FORMAT: ${{ matrix.package-format }} if: needs.build-info.outputs.image-build == 'true' steps: @@ -433,7 +433,7 @@ jobs: INSTALL_AIRFLOW_VERSION: "${{ matrix.package-format }}" AIRFLOW_EXTRAS: "all" PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} - VERSION_SUFFIX_FOR_PYPI: "rc1" + VERSION_SUFFIX_FOR_PYPI: "dev" PACKAGE_FORMAT: ${{ matrix.package-format }} strategy: matrix: @@ -475,6 +475,41 @@ jobs: path: "./files/airflow-readme-*" retention-days: 7 + test-provider-packages-released-airflow: + timeout-minutes: 30 + name: "Test Provider packages with 2.0.0 version ${{ matrix.package-format }}" + runs-on: ubuntu-20.04 + needs: [build-info, ci-images] + env: + INSTALL_AIRFLOW_VERSION: "2.0.0" + AIRFLOW_EXTRAS: "all" + PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}} + VERSION_SUFFIX_FOR_PYPI: "dev" + PACKAGE_FORMAT: ${{ matrix.package-format }} + strategy: + matrix: + package-format: ['wheel', 'sdist'] + if: needs.build-info.outputs.image-build == 'true' + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: "Setup python" + uses: actions/setup-python@v2 + with: + python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} + - name: "Free space" + run: ./scripts/ci/tools/ci_free_space_on_ci.sh + - name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" + run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh + - name: "Prepare provider readmes" + run: ./scripts/ci/provider_packages/ci_prepare_provider_readmes.sh + - name: "Prepare provider packages: ${{ matrix.package-format }}" + run: ./scripts/ci/provider_packages/ci_prepare_provider_packages.sh + - name: "Install and test provider packages and airflow via ${{ matrix.package-format }} files" + run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh + tests-helm: timeout-minutes: 20 name: "Python unit tests for helm chart" @@ -941,6 +976,7 @@ jobs: - tests-kubernetes - prepare-backport-provider-packages - prepare-provider-packages + - test-provider-packages-released-airflow - prod-images - verify-prod-images - docs diff --git a/airflow/customized_form_field_behaviours.schema.json b/airflow/customized_form_field_behaviours.schema.json index 18f10aa7bf..11ac6736c0 100644 --- a/airflow/customized_form_field_behaviours.schema.json +++ b/airflow/customized_form_field_behaviours.schema.json @@ -24,7 +24,7 @@ } } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "hidden_fields", "relabeling" diff --git a/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json b/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json new file mode 100644 index 0000000000..d6fea5019c --- /dev/null +++ b/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json @@ -0,0 +1,199 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "package-name": { + "description": "Package name available under which the package is available in the PyPI repository.", + "type": "string" + }, + "name": { + "description": "Provider name", + "type": "string" + }, + "description": { + "description": "Information about the package in RST format", + "type": "string" + }, + "versions": { + "description": "List of available versions in Pypi. Sorted descending according to release date.", + "type": "array", + "items": { + "type": "string" + } + }, + "integrations": { + "description": "List of integrations supported by the provider.", + "type": "array", + "items": { + "type": "object", + "properties": { + "integration-name": { + "type": "string", + "description": "Name of the integration." + }, + "external-doc-url": { + "type": "string", + "description": "URL to external documentation for the integration." + }, + "how-to-guide": { + "description": "List of paths to how-to-guide for the integration. The path must start with '/docs/'", + "type": "array", + "items": { + "type": "string" + } + }, + "tags": { + "description": "List of tags describing the integration. While we're using RST, only one tag is supported per integration.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "apache", + "aws", + "azure", + "gcp", + "gmp", + "google", + "protocol", + "service", + "software", + "yandex" + ] + }, + "minItems": 1, + "maxItems": 1 + } + }, + "additionalProperties": false, + "required": [ + "integration-name", + "external-doc-url", + "tags" + ] + } + }, + "operators": { + "type": "array", + "items": { + "type": "object", + "properties": { + "integration-name": { + "type": "string", + "description": "Integration name. It must have a matching item in the 'integration' section of any provider." + }, + "python-modules": { + "description": "List of python modules containing the operators.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "integration-name", + "python-modules" + ] + } + }, + "sensors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "integration-name": { + "type": "string", + "description": "Integration name. It must have a matching item in the 'integration' section of any provider." + }, + "python-modules": { + "description": "List of python modules containing the sensors.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "integration-name", + "python-modules" + ], + "additionalProperties": false + } + }, + "hooks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "integration-name": { + "type": "string", + "description": "Integration name. It must have a matching item in the 'integration' section of any provider." + }, + "python-modules": { + "description": "List of python modules containing the hooks.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "integration-name", + "python-modules" + ] + } + }, + "transfers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "how-to-guide": { + "description": "Path to how-to-guide for the transfer. The path must start with '/docs/'", + "type": "string" + }, + "source-integration-name": { + "type": "string", + "description": "Integration name. It must have a matching item in the 'integration' section of any provider." + }, + "target-integration-name": { + "type": "string", + "description": "Target integration name. It must have a matching item in the 'integration' section of any provider." + }, + "python-module": { + "type": "string", + "description": "List of python modules containing the transfers." + } + }, + "additionalProperties": false, + "required": [ + "source-integration-name", + "target-integration-name", + "python-module" + ] + } + }, + "hook-class-names": { + "type": "array", + "description": "Hook class names that provide connection types to core", + "items": { + "type": "string" + } + }, + "extra-links": { + "type": "array", + "description": "Class name that provide extra link functionality", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "name", + "package-name", + "description", + "versions" + ] +} diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json index 287b80e7db..0ee3b56dcb 100644 --- a/airflow/provider.yaml.schema.json +++ b/airflow/provider.yaml.schema.json @@ -68,7 +68,7 @@ "maxItems": 1 } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "integration-name", "external-doc-url", @@ -93,7 +93,7 @@ } } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "integration-name", "python-modules" @@ -121,7 +121,7 @@ "integration-name", "python-modules" ], - "additionalProperties": false + "additionalProperties": true } }, "hooks": { @@ -141,7 +141,7 @@ } } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "integration-name", "python-modules" @@ -170,7 +170,7 @@ "description": "List of python modules containing the transfers." } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "source-integration-name", "target-integration-name", @@ -193,7 +193,7 @@ } } }, - "additionalProperties": false, + "additionalProperties": true, "required": [ "name", "package-name", diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py index 2459a586ad..5b59886a0c 100644 --- a/dev/provider_packages/prepare_provider_packages.py +++ b/dev/provider_packages/prepare_provider_packages.py @@ -28,6 +28,7 @@ import sys import tempfile import textwrap from contextlib import contextmanager +from copy import deepcopy from datetime import datetime, timedelta from enum import Enum from os import listdir @@ -35,6 +36,8 @@ from os.path import dirname from shutil import copyfile from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Type +import jsonpath_ng +import jsonschema import yaml from packaging.version import Version @@ -50,6 +53,11 @@ PROVIDERS_PATH = os.path.join(AIRFLOW_PATH, "providers") TARGET_PROVIDER_PACKAGES_PATH = os.path.join(SOURCE_DIR_PATH, "provider_packages") GENERATED_AIRFLOW_PATH = os.path.join(TARGET_PROVIDER_PACKAGES_PATH, "airflow") GENERATED_PROVIDERS_PATH = os.path.join(GENERATED_AIRFLOW_PATH, "providers") + +PROVIDER_2_0_0_DATA_SCHEMA_PATH = os.path.join( + SOURCE_DIR_PATH, "airflow", "deprecated_schemas", "provider-2.0.0.yaml.schema.json" +) + sys.path.insert(0, SOURCE_DIR_PATH) # those imports need to come after the above sys.path.insert to make sure that Airflow @@ -1155,12 +1163,56 @@ def get_package_pip_name(provider_package_id: str, backport_packages: bool): return f"apache-airflow-providers-{provider_package_id.replace('.', '-')}" -def get_provider_info(provider_package_id: str): +def validate_provider_info_with_2_0_0_schema(provider_info: Dict[str, Any]) -> None: + """ + Validates provider info against 2.0.0 schema. + :param provider_info: provider info to validate + """ + + def _load_schema() -> Dict[str, Any]: + with open(PROVIDER_2_0_0_DATA_SCHEMA_PATH) as schema_file: + content = json.load(schema_file) + return content + + schema = _load_schema() + try: + jsonschema.validate(provider_info, schema=schema) + except jsonschema.ValidationError as e: + raise Exception( + "Error when validating schema. The schema must be Airflow 2.0.0 compatible. " + "If you added any fields please remove them via 'remove_extra_fields' method.", + e, + ) + + +def remove_logo_field(original_provider_info: Dict[str, Any]): + updated_provider_info = deepcopy(original_provider_info) + expression = jsonpath_ng.parse("integrations..logo") + updated_provider_info = expression.filter(lambda x: True, updated_provider_info) + return updated_provider_info + + +def remove_extra_fields(provider_info: Dict[str, Any]) -> Dict[str, Any]: + """ + In Airflow 2.0.0 we set 'additionalProperties" to 'false' in provider's schema, which makes the schema + non future-compatible. While we changed tho additionalProperties to 'true' in 2.0.1, we have to + make sure that the returned provider_info when preparing package is compatible with the older version + of the schema and remove all the newly added fields until we deprecate (possibly even yank) 2.0.0 + and make provider packages depend on Airflow >=2.0.1. + """ + provider_info = remove_logo_field(provider_info) + return provider_info + + +def get_provider_info(provider_package_id: str) -> Dict[str, Any]: provider_yaml_file_name = os.path.join(get_source_package_path(provider_package_id), "provider.yaml") if not os.path.exists(provider_yaml_file_name): raise Exception(f"The provider.yaml file is missing: {provider_yaml_file_name}") with open(provider_yaml_file_name) as provider_file: - return yaml.safe_load(provider_file.read()) + provider_info = yaml.safe_load(provider_file.read()) + stripped_provider_info = remove_extra_fields(provider_info) + validate_provider_info_with_2_0_0_schema(stripped_provider_info) + return stripped_provider_info def update_generated_files_for_package( diff --git a/setup.py b/setup.py index 422f03e88d..a918718baa 100644 --- a/setup.py +++ b/setup.py @@ -465,6 +465,7 @@ devel = [ 'importlib-resources~=1.4', 'ipdb', 'jira', + 'jsonpath-ng', # HACK: Moto is not compatible with newer versions # See: https://github.com/spulec/moto/issues/3535 'mock<4.0.3',