Additional properties should be allowed in provider schema (#13440)

The additional properties should be allowed in provider schema,
otherwise future version of providers will not be compatible with
older versions of Airflow.

Specifying 'additionalProperties' as allowed we are opening up to
adding more properties to provider.yaml.

This change fixes this is for now by removing extra fields
added since the Airlow 2.0.0 schema and verifying that the 2.0.0
schema correctly validates such modified dictionary.

In the future we might deprecate 2.0.0 and add >=2.0.1 limitation
to the provider packages in which case we will be able to remove
this modification of the provider_info dict.

Also added additional test for provider packages whether they
install on Airflow 2.0.0. This tests might remain even after the
deprecation of 2.0.0 - we can just move it to 2.0.1. However this
will give us much bigger confidence that the providers will
continue work even for older versions of Airflow 2.0.

We might have to modify that test and only include the providers
that are backwards-compatible, in case we have some providers
that depend on future Airflow versions. For now we assume
all providers should be installable from master on 2.0.0.
This commit is contained in:
Jarek Potiuk 2021-01-04 11:15:04 +01:00 коммит произвёл GitHub
Родитель 4437137eff
Коммит 523e2f48ca
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 299 добавлений и 11 удалений

40
.github/workflows/ci.yml поставляемый
Просмотреть файл

@ -387,7 +387,7 @@ jobs:
AIRFLOW_EXTRAS: "all"
PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
BACKPORT_PACKAGES: "true"
VERSION_SUFFIX_FOR_PYPI: "rc1"
VERSION_SUFFIX_FOR_PYPI: "dev"
PACKAGE_FORMAT: ${{ matrix.package-format }}
if: needs.build-info.outputs.image-build == 'true'
steps:
@ -433,7 +433,7 @@ jobs:
INSTALL_AIRFLOW_VERSION: "${{ matrix.package-format }}"
AIRFLOW_EXTRAS: "all"
PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
VERSION_SUFFIX_FOR_PYPI: "rc1"
VERSION_SUFFIX_FOR_PYPI: "dev"
PACKAGE_FORMAT: ${{ matrix.package-format }}
strategy:
matrix:
@ -475,6 +475,41 @@ jobs:
path: "./files/airflow-readme-*"
retention-days: 7
test-provider-packages-released-airflow:
timeout-minutes: 30
name: "Test Provider packages with 2.0.0 version ${{ matrix.package-format }}"
runs-on: ubuntu-20.04
needs: [build-info, ci-images]
env:
INSTALL_AIRFLOW_VERSION: "2.0.0"
AIRFLOW_EXTRAS: "all"
PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
VERSION_SUFFIX_FOR_PYPI: "dev"
PACKAGE_FORMAT: ${{ matrix.package-format }}
strategy:
matrix:
package-format: ['wheel', 'sdist']
if: needs.build-info.outputs.image-build == 'true'
steps:
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v2
with:
persist-credentials: false
- name: "Setup python"
uses: actions/setup-python@v2
with:
python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}
- name: "Free space"
run: ./scripts/ci/tools/ci_free_space_on_ci.sh
- name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh
- name: "Prepare provider readmes"
run: ./scripts/ci/provider_packages/ci_prepare_provider_readmes.sh
- name: "Prepare provider packages: ${{ matrix.package-format }}"
run: ./scripts/ci/provider_packages/ci_prepare_provider_packages.sh
- name: "Install and test provider packages and airflow via ${{ matrix.package-format }} files"
run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh
tests-helm:
timeout-minutes: 20
name: "Python unit tests for helm chart"
@ -941,6 +976,7 @@ jobs:
- tests-kubernetes
- prepare-backport-provider-packages
- prepare-provider-packages
- test-provider-packages-released-airflow
- prod-images
- verify-prod-images
- docs

Просмотреть файл

@ -24,7 +24,7 @@
}
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"hidden_fields",
"relabeling"

Просмотреть файл

@ -0,0 +1,199 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"package-name": {
"description": "Package name available under which the package is available in the PyPI repository.",
"type": "string"
},
"name": {
"description": "Provider name",
"type": "string"
},
"description": {
"description": "Information about the package in RST format",
"type": "string"
},
"versions": {
"description": "List of available versions in Pypi. Sorted descending according to release date.",
"type": "array",
"items": {
"type": "string"
}
},
"integrations": {
"description": "List of integrations supported by the provider.",
"type": "array",
"items": {
"type": "object",
"properties": {
"integration-name": {
"type": "string",
"description": "Name of the integration."
},
"external-doc-url": {
"type": "string",
"description": "URL to external documentation for the integration."
},
"how-to-guide": {
"description": "List of paths to how-to-guide for the integration. The path must start with '/docs/'",
"type": "array",
"items": {
"type": "string"
}
},
"tags": {
"description": "List of tags describing the integration. While we're using RST, only one tag is supported per integration.",
"type": "array",
"items": {
"type": "string",
"enum": [
"apache",
"aws",
"azure",
"gcp",
"gmp",
"google",
"protocol",
"service",
"software",
"yandex"
]
},
"minItems": 1,
"maxItems": 1
}
},
"additionalProperties": false,
"required": [
"integration-name",
"external-doc-url",
"tags"
]
}
},
"operators": {
"type": "array",
"items": {
"type": "object",
"properties": {
"integration-name": {
"type": "string",
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
},
"python-modules": {
"description": "List of python modules containing the operators.",
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"integration-name",
"python-modules"
]
}
},
"sensors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"integration-name": {
"type": "string",
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
},
"python-modules": {
"description": "List of python modules containing the sensors.",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"integration-name",
"python-modules"
],
"additionalProperties": false
}
},
"hooks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"integration-name": {
"type": "string",
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
},
"python-modules": {
"description": "List of python modules containing the hooks.",
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"integration-name",
"python-modules"
]
}
},
"transfers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"how-to-guide": {
"description": "Path to how-to-guide for the transfer. The path must start with '/docs/'",
"type": "string"
},
"source-integration-name": {
"type": "string",
"description": "Integration name. It must have a matching item in the 'integration' section of any provider."
},
"target-integration-name": {
"type": "string",
"description": "Target integration name. It must have a matching item in the 'integration' section of any provider."
},
"python-module": {
"type": "string",
"description": "List of python modules containing the transfers."
}
},
"additionalProperties": false,
"required": [
"source-integration-name",
"target-integration-name",
"python-module"
]
}
},
"hook-class-names": {
"type": "array",
"description": "Hook class names that provide connection types to core",
"items": {
"type": "string"
}
},
"extra-links": {
"type": "array",
"description": "Class name that provide extra link functionality",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"name",
"package-name",
"description",
"versions"
]
}

Просмотреть файл

@ -68,7 +68,7 @@
"maxItems": 1
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"integration-name",
"external-doc-url",
@ -93,7 +93,7 @@
}
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"integration-name",
"python-modules"
@ -121,7 +121,7 @@
"integration-name",
"python-modules"
],
"additionalProperties": false
"additionalProperties": true
}
},
"hooks": {
@ -141,7 +141,7 @@
}
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"integration-name",
"python-modules"
@ -170,7 +170,7 @@
"description": "List of python modules containing the transfers."
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"source-integration-name",
"target-integration-name",
@ -193,7 +193,7 @@
}
}
},
"additionalProperties": false,
"additionalProperties": true,
"required": [
"name",
"package-name",

Просмотреть файл

@ -28,6 +28,7 @@ import sys
import tempfile
import textwrap
from contextlib import contextmanager
from copy import deepcopy
from datetime import datetime, timedelta
from enum import Enum
from os import listdir
@ -35,6 +36,8 @@ from os.path import dirname
from shutil import copyfile
from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Type
import jsonpath_ng
import jsonschema
import yaml
from packaging.version import Version
@ -50,6 +53,11 @@ PROVIDERS_PATH = os.path.join(AIRFLOW_PATH, "providers")
TARGET_PROVIDER_PACKAGES_PATH = os.path.join(SOURCE_DIR_PATH, "provider_packages")
GENERATED_AIRFLOW_PATH = os.path.join(TARGET_PROVIDER_PACKAGES_PATH, "airflow")
GENERATED_PROVIDERS_PATH = os.path.join(GENERATED_AIRFLOW_PATH, "providers")
PROVIDER_2_0_0_DATA_SCHEMA_PATH = os.path.join(
SOURCE_DIR_PATH, "airflow", "deprecated_schemas", "provider-2.0.0.yaml.schema.json"
)
sys.path.insert(0, SOURCE_DIR_PATH)
# those imports need to come after the above sys.path.insert to make sure that Airflow
@ -1155,12 +1163,56 @@ def get_package_pip_name(provider_package_id: str, backport_packages: bool):
return f"apache-airflow-providers-{provider_package_id.replace('.', '-')}"
def get_provider_info(provider_package_id: str):
def validate_provider_info_with_2_0_0_schema(provider_info: Dict[str, Any]) -> None:
"""
Validates provider info against 2.0.0 schema.
:param provider_info: provider info to validate
"""
def _load_schema() -> Dict[str, Any]:
with open(PROVIDER_2_0_0_DATA_SCHEMA_PATH) as schema_file:
content = json.load(schema_file)
return content
schema = _load_schema()
try:
jsonschema.validate(provider_info, schema=schema)
except jsonschema.ValidationError as e:
raise Exception(
"Error when validating schema. The schema must be Airflow 2.0.0 compatible. "
"If you added any fields please remove them via 'remove_extra_fields' method.",
e,
)
def remove_logo_field(original_provider_info: Dict[str, Any]):
updated_provider_info = deepcopy(original_provider_info)
expression = jsonpath_ng.parse("integrations..logo")
updated_provider_info = expression.filter(lambda x: True, updated_provider_info)
return updated_provider_info
def remove_extra_fields(provider_info: Dict[str, Any]) -> Dict[str, Any]:
"""
In Airflow 2.0.0 we set 'additionalProperties" to 'false' in provider's schema, which makes the schema
non future-compatible. While we changed tho additionalProperties to 'true' in 2.0.1, we have to
make sure that the returned provider_info when preparing package is compatible with the older version
of the schema and remove all the newly added fields until we deprecate (possibly even yank) 2.0.0
and make provider packages depend on Airflow >=2.0.1.
"""
provider_info = remove_logo_field(provider_info)
return provider_info
def get_provider_info(provider_package_id: str) -> Dict[str, Any]:
provider_yaml_file_name = os.path.join(get_source_package_path(provider_package_id), "provider.yaml")
if not os.path.exists(provider_yaml_file_name):
raise Exception(f"The provider.yaml file is missing: {provider_yaml_file_name}")
with open(provider_yaml_file_name) as provider_file:
return yaml.safe_load(provider_file.read())
provider_info = yaml.safe_load(provider_file.read())
stripped_provider_info = remove_extra_fields(provider_info)
validate_provider_info_with_2_0_0_schema(stripped_provider_info)
return stripped_provider_info
def update_generated_files_for_package(

Просмотреть файл

@ -465,6 +465,7 @@ devel = [
'importlib-resources~=1.4',
'ipdb',
'jira',
'jsonpath-ng',
# HACK: Moto is not compatible with newer versions
# See: https://github.com/spulec/moto/issues/3535
'mock<4.0.3',