From 523e2f48ca8b10399cebb88d6867482b80dd840c Mon Sep 17 00:00:00 2001
From: Jarek Potiuk <jarek.potiuk@polidea.com>
Date: Mon, 4 Jan 2021 11:15:04 +0100
Subject: [PATCH] Additional properties should be allowed in provider schema
 (#13440)

The additional properties should be allowed in provider schema,
otherwise future version of providers will not be compatible with
older versions of Airflow.

Specifying 'additionalProperties' as allowed we are opening up to
adding more properties to provider.yaml.

This change fixes this is for now by removing extra fields
added since the Airlow 2.0.0 schema and verifying that the 2.0.0
schema correctly validates such modified dictionary.

In the future we might deprecate 2.0.0 and add >=2.0.1 limitation
to the provider packages in which case we will be able to remove
this modification of the provider_info dict.

Also added additional test for provider packages whether they
install on Airflow 2.0.0. This tests might remain even after the
deprecation of 2.0.0 - we can just move it to 2.0.1. However this
will give us much bigger confidence that the providers will
continue work even for older versions of Airflow 2.0.

We might have to modify that test and only include the providers
that are backwards-compatible, in case we have some providers
that depend on future Airflow versions. For now we assume
all providers should be installable from master on 2.0.0.
---
 .github/workflows/ci.yml                      |  40 +++-
 ...stomized_form_field_behaviours.schema.json |   2 +-
 .../provider-2.0.0.yaml.schema.json           | 199 ++++++++++++++++++
 airflow/provider.yaml.schema.json             |  12 +-
 .../prepare_provider_packages.py              |  56 ++++-
 setup.py                                      |   1 +
 6 files changed, 299 insertions(+), 11 deletions(-)
 create mode 100644 airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1abecddf92..637f39c122 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -387,7 +387,7 @@ jobs:
       AIRFLOW_EXTRAS: "all"
       PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
       BACKPORT_PACKAGES: "true"
-      VERSION_SUFFIX_FOR_PYPI: "rc1"
+      VERSION_SUFFIX_FOR_PYPI: "dev"
       PACKAGE_FORMAT: ${{ matrix.package-format }}
     if: needs.build-info.outputs.image-build == 'true'
     steps:
@@ -433,7 +433,7 @@ jobs:
       INSTALL_AIRFLOW_VERSION: "${{ matrix.package-format }}"
       AIRFLOW_EXTRAS: "all"
       PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
-      VERSION_SUFFIX_FOR_PYPI: "rc1"
+      VERSION_SUFFIX_FOR_PYPI: "dev"
       PACKAGE_FORMAT: ${{ matrix.package-format }}
     strategy:
       matrix:
@@ -475,6 +475,41 @@ jobs:
           path: "./files/airflow-readme-*"
           retention-days: 7
 
+  test-provider-packages-released-airflow:
+    timeout-minutes: 30
+    name: "Test Provider packages with 2.0.0 version ${{ matrix.package-format }}"
+    runs-on: ubuntu-20.04
+    needs: [build-info, ci-images]
+    env:
+      INSTALL_AIRFLOW_VERSION: "2.0.0"
+      AIRFLOW_EXTRAS: "all"
+      PYTHON_MAJOR_MINOR_VERSION: ${{needs.build-info.outputs.defaultPythonVersion}}
+      VERSION_SUFFIX_FOR_PYPI: "dev"
+      PACKAGE_FORMAT: ${{ matrix.package-format }}
+    strategy:
+      matrix:
+        package-format: ['wheel', 'sdist']
+    if: needs.build-info.outputs.image-build == 'true'
+    steps:
+      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+        uses: actions/checkout@v2
+        with:
+          persist-credentials: false
+      - name: "Setup python"
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}
+      - name: "Free space"
+        run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+      - name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
+        run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh
+      - name: "Prepare provider readmes"
+        run: ./scripts/ci/provider_packages/ci_prepare_provider_readmes.sh
+      - name: "Prepare provider packages: ${{ matrix.package-format }}"
+        run: ./scripts/ci/provider_packages/ci_prepare_provider_packages.sh
+      - name: "Install and test provider packages and airflow via ${{ matrix.package-format }} files"
+        run: ./scripts/ci/provider_packages/ci_install_and_test_provider_packages.sh
+
   tests-helm:
     timeout-minutes: 20
     name: "Python unit tests for helm chart"
@@ -941,6 +976,7 @@ jobs:
       - tests-kubernetes
       - prepare-backport-provider-packages
       - prepare-provider-packages
+      - test-provider-packages-released-airflow
       - prod-images
       - verify-prod-images
       - docs
diff --git a/airflow/customized_form_field_behaviours.schema.json b/airflow/customized_form_field_behaviours.schema.json
index 18f10aa7bf..11ac6736c0 100644
--- a/airflow/customized_form_field_behaviours.schema.json
+++ b/airflow/customized_form_field_behaviours.schema.json
@@ -24,7 +24,7 @@
       }
     }
   },
-  "additionalProperties": false,
+  "additionalProperties": true,
   "required": [
     "hidden_fields",
     "relabeling"
diff --git a/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json b/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json
new file mode 100644
index 0000000000..d6fea5019c
--- /dev/null
+++ b/airflow/deprecated_schemas/provider-2.0.0.yaml.schema.json
@@ -0,0 +1,199 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "properties": {
+    "package-name": {
+      "description": "Package name available under which the package is available in the PyPI repository.",
+      "type": "string"
+    },
+    "name": {
+      "description": "Provider name",
+      "type": "string"
+    },
+    "description": {
+      "description": "Information about the package in RST format",
+      "type": "string"
+    },
+    "versions": {
+      "description": "List of available versions in Pypi. Sorted descending according to release date.",
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    },
+    "integrations": {
+      "description": "List of integrations supported by the provider.",
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "integration-name": {
+            "type": "string",
+            "description": "Name of the integration."
+          },
+          "external-doc-url": {
+            "type": "string",
+            "description": "URL to external documentation for the integration."
+          },
+          "how-to-guide": {
+            "description": "List of paths to how-to-guide for the integration. The path must start with '/docs/'",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "tags": {
+            "description": "List of tags describing the integration. While we're using RST, only one tag is supported per integration.",
+            "type": "array",
+            "items": {
+              "type": "string",
+              "enum": [
+                "apache",
+                "aws",
+                "azure",
+                "gcp",
+                "gmp",
+                "google",
+                "protocol",
+                "service",
+                "software",
+                "yandex"
+              ]
+            },
+            "minItems": 1,
+            "maxItems": 1
+          }
+        },
+        "additionalProperties": false,
+        "required": [
+          "integration-name",
+          "external-doc-url",
+          "tags"
+        ]
+      }
+    },
+    "operators": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "integration-name": {
+            "type": "string",
+            "description": "Integration name. It must have a matching item in the 'integration' section of any provider."
+          },
+          "python-modules": {
+            "description": "List of python modules containing the operators.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "additionalProperties": false,
+        "required": [
+          "integration-name",
+          "python-modules"
+        ]
+      }
+    },
+    "sensors": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "integration-name": {
+            "type": "string",
+            "description": "Integration name. It must have a matching item in the 'integration' section of any provider."
+          },
+          "python-modules": {
+            "description": "List of python modules containing the sensors.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "required": [
+          "integration-name",
+          "python-modules"
+        ],
+        "additionalProperties": false
+      }
+    },
+    "hooks": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "integration-name": {
+            "type": "string",
+            "description": "Integration name. It must have a matching item in the 'integration' section of any provider."
+          },
+          "python-modules": {
+            "description": "List of python modules containing the hooks.",
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "additionalProperties": false,
+        "required": [
+          "integration-name",
+          "python-modules"
+        ]
+      }
+    },
+    "transfers": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "how-to-guide": {
+            "description": "Path to how-to-guide for the transfer. The path must start with '/docs/'",
+            "type": "string"
+          },
+          "source-integration-name": {
+            "type": "string",
+            "description": "Integration name. It must have a matching item in the 'integration' section of any provider."
+          },
+          "target-integration-name": {
+            "type": "string",
+            "description": "Target integration name. It must have a matching item in the 'integration' section of any provider."
+          },
+          "python-module": {
+            "type": "string",
+            "description": "List of python modules containing the transfers."
+          }
+        },
+        "additionalProperties": false,
+        "required": [
+          "source-integration-name",
+          "target-integration-name",
+          "python-module"
+        ]
+      }
+    },
+    "hook-class-names": {
+      "type": "array",
+      "description": "Hook class names that provide connection types to core",
+      "items": {
+        "type": "string"
+      }
+    },
+    "extra-links": {
+      "type": "array",
+      "description": "Class name that provide extra link functionality",
+      "items": {
+        "type": "string"
+      }
+    }
+  },
+  "additionalProperties": false,
+  "required": [
+    "name",
+    "package-name",
+    "description",
+    "versions"
+  ]
+}
diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json
index 287b80e7db..0ee3b56dcb 100644
--- a/airflow/provider.yaml.schema.json
+++ b/airflow/provider.yaml.schema.json
@@ -68,7 +68,7 @@
             "maxItems": 1
           }
         },
-        "additionalProperties": false,
+        "additionalProperties": true,
         "required": [
           "integration-name",
           "external-doc-url",
@@ -93,7 +93,7 @@
             }
           }
         },
-        "additionalProperties": false,
+        "additionalProperties": true,
         "required": [
           "integration-name",
           "python-modules"
@@ -121,7 +121,7 @@
           "integration-name",
           "python-modules"
         ],
-        "additionalProperties": false
+        "additionalProperties": true
       }
     },
     "hooks": {
@@ -141,7 +141,7 @@
             }
           }
         },
-        "additionalProperties": false,
+        "additionalProperties": true,
         "required": [
           "integration-name",
           "python-modules"
@@ -170,7 +170,7 @@
             "description": "List of python modules containing the transfers."
           }
         },
-        "additionalProperties": false,
+        "additionalProperties": true,
         "required": [
           "source-integration-name",
           "target-integration-name",
@@ -193,7 +193,7 @@
       }
     }
   },
-  "additionalProperties": false,
+  "additionalProperties": true,
   "required": [
     "name",
     "package-name",
diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py
index 2459a586ad..5b59886a0c 100644
--- a/dev/provider_packages/prepare_provider_packages.py
+++ b/dev/provider_packages/prepare_provider_packages.py
@@ -28,6 +28,7 @@ import sys
 import tempfile
 import textwrap
 from contextlib import contextmanager
+from copy import deepcopy
 from datetime import datetime, timedelta
 from enum import Enum
 from os import listdir
@@ -35,6 +36,8 @@ from os.path import dirname
 from shutil import copyfile
 from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Type
 
+import jsonpath_ng
+import jsonschema
 import yaml
 from packaging.version import Version
 
@@ -50,6 +53,11 @@ PROVIDERS_PATH = os.path.join(AIRFLOW_PATH, "providers")
 TARGET_PROVIDER_PACKAGES_PATH = os.path.join(SOURCE_DIR_PATH, "provider_packages")
 GENERATED_AIRFLOW_PATH = os.path.join(TARGET_PROVIDER_PACKAGES_PATH, "airflow")
 GENERATED_PROVIDERS_PATH = os.path.join(GENERATED_AIRFLOW_PATH, "providers")
+
+PROVIDER_2_0_0_DATA_SCHEMA_PATH = os.path.join(
+    SOURCE_DIR_PATH, "airflow", "deprecated_schemas", "provider-2.0.0.yaml.schema.json"
+)
+
 sys.path.insert(0, SOURCE_DIR_PATH)
 
 # those imports need to come after the above sys.path.insert to make sure that Airflow
@@ -1155,12 +1163,56 @@ def get_package_pip_name(provider_package_id: str, backport_packages: bool):
         return f"apache-airflow-providers-{provider_package_id.replace('.', '-')}"
 
 
-def get_provider_info(provider_package_id: str):
+def validate_provider_info_with_2_0_0_schema(provider_info: Dict[str, Any]) -> None:
+    """
+    Validates provider info against 2.0.0 schema.
+    :param provider_info: provider info to validate
+    """
+
+    def _load_schema() -> Dict[str, Any]:
+        with open(PROVIDER_2_0_0_DATA_SCHEMA_PATH) as schema_file:
+            content = json.load(schema_file)
+        return content
+
+    schema = _load_schema()
+    try:
+        jsonschema.validate(provider_info, schema=schema)
+    except jsonschema.ValidationError as e:
+        raise Exception(
+            "Error when validating schema. The schema must be Airflow 2.0.0 compatible. "
+            "If you added any fields please remove them via 'remove_extra_fields' method.",
+            e,
+        )
+
+
+def remove_logo_field(original_provider_info: Dict[str, Any]):
+    updated_provider_info = deepcopy(original_provider_info)
+    expression = jsonpath_ng.parse("integrations..logo")
+    updated_provider_info = expression.filter(lambda x: True, updated_provider_info)
+    return updated_provider_info
+
+
+def remove_extra_fields(provider_info: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    In Airflow 2.0.0 we set 'additionalProperties" to 'false' in provider's schema, which makes the schema
+    non future-compatible. While we changed tho additionalProperties to 'true' in 2.0.1, we have to
+    make sure that the returned provider_info when preparing package is compatible with the older version
+    of the schema and remove all the newly added fields until we deprecate (possibly even yank) 2.0.0
+    and make provider packages depend on Airflow >=2.0.1.
+    """
+    provider_info = remove_logo_field(provider_info)
+    return provider_info
+
+
+def get_provider_info(provider_package_id: str) -> Dict[str, Any]:
     provider_yaml_file_name = os.path.join(get_source_package_path(provider_package_id), "provider.yaml")
     if not os.path.exists(provider_yaml_file_name):
         raise Exception(f"The provider.yaml file is missing: {provider_yaml_file_name}")
     with open(provider_yaml_file_name) as provider_file:
-        return yaml.safe_load(provider_file.read())
+        provider_info = yaml.safe_load(provider_file.read())
+    stripped_provider_info = remove_extra_fields(provider_info)
+    validate_provider_info_with_2_0_0_schema(stripped_provider_info)
+    return stripped_provider_info
 
 
 def update_generated_files_for_package(
diff --git a/setup.py b/setup.py
index 422f03e88d..a918718baa 100644
--- a/setup.py
+++ b/setup.py
@@ -465,6 +465,7 @@ devel = [
     'importlib-resources~=1.4',
     'ipdb',
     'jira',
+    'jsonpath-ng',
     # HACK: Moto is not compatible with newer versions
     # See: https://github.com/spulec/moto/issues/3535
     'mock<4.0.3',