Initial commit for the sample solution (#6)
Sample solution that demonstrates how to deploy and analyze spaceborne data using Azure Synapse Analytics
This commit is contained in:
Родитель
2cdccb9e38
Коммит
6e8faaf04e
|
@ -0,0 +1,2 @@
|
|||
# Declare files that will always have LF line endings on checkout.
|
||||
*.sh text eol=lf
|
|
@ -0,0 +1,71 @@
|
|||
name: Docker
|
||||
|
||||
on:
|
||||
push:
|
||||
# Publish `main` as Docker `latest` image.
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- poolmodel
|
||||
|
||||
# Publish `v1.2.3` tags as releases.
|
||||
tags:
|
||||
- v*
|
||||
|
||||
# Runs tests for any PRs.
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
|
||||
env:
|
||||
IMAGE_NAME: custom_vision_offline
|
||||
|
||||
jobs:
|
||||
# See also https://docs.docker.com/docker-hub/builds/automated-testing/
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
if [ -f docker-compose.test.yml ]; then
|
||||
docker-compose --file docker-compose.test.yml build
|
||||
docker-compose --file docker-compose.test.yml run sut
|
||||
else
|
||||
docker build src/aimodels/custom_vision_object_detection_offline/container
|
||||
fi
|
||||
# Push image to GitHub Packages.
|
||||
# See also https://docs.docker.com/docker-hub/builds/
|
||||
pushpackagereg:
|
||||
# Ensure test job passes before pushing image.
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Build image
|
||||
run: docker build --tag $IMAGE_NAME src/aimodels/custom_vision_object_detection_offline/container
|
||||
|
||||
- name: Log into registry
|
||||
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.actor }} --password-stdin
|
||||
|
||||
- name: Push image to github packages registry
|
||||
run: |
|
||||
IMAGE_ID=docker.pkg.github.com/${{ github.repository }}/$IMAGE_NAME
|
||||
# Change all uppercase to lowercase
|
||||
IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]')
|
||||
# Strip git ref prefix from version
|
||||
VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
|
||||
# Strip "v" prefix from tag name
|
||||
[[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//')
|
||||
# Use Docker `latest` tag convention
|
||||
[ "$VERSION" == "main" ] && VERSION=latest
|
||||
echo IMAGE_ID=$IMAGE_ID
|
||||
echo VERSION=$VERSION
|
||||
docker tag $IMAGE_NAME $IMAGE_ID:$VERSION
|
||||
docker push $IMAGE_ID:$VERSION
|
|
@ -0,0 +1,18 @@
|
|||
## Contributors
|
||||
|
||||
This repository is currently maintained by [Azure Orbital analytics](https://github.com/orgs/Azure/teams/azure-orbital-analytics-contributors) team.
|
||||
|
||||
Azure Orbital analytics has benefited from many developers, including
|
||||
- [Karthick Narendran](https://github.com/karthick-rn)
|
||||
- [Katy Smith](https://github.com/kes256)
|
||||
- [Kevin Mack](https://github.com/kevindmack)
|
||||
- [Kungumaraj Nachimuthu](https://github.com/senthilkungumaraj)
|
||||
- [Mandar Inamdar](https://github.com/mandarinamdar)
|
||||
- [Nikhil Manchanda](https://github.com/SlickNik)
|
||||
- [Safiyah Sadiq](https://github.com/safiyahs)
|
||||
- [Sushil Kumar](https://github.com/sushilkm)
|
||||
- [Tatyana Pearson](https://github.com/tpearson02)
|
||||
- [Taylor Corbett](https://github.com/TaylorCorbett)
|
||||
- [Tony Griffith](https://github.com/pgc1a)
|
||||
- [Tushar Dhadiwal](https://github.com/tushardhadiwal)
|
||||
- [Xiaoyuan Yang](https://github.com/xiaoyuan-ms)
|
24
README.md
24
README.md
|
@ -1,14 +1,22 @@
|
|||
# Project
|
||||
|
||||
> This repo has been populated by an initial template to help get you started. Please
|
||||
> make sure to update the content to build a great experience for community-building.
|
||||
|
||||
As the maintainer of this project, please make a few updates:
|
||||
This repository contains sample solution that demonstrates how to deploy and execute [Geospatial Analysis using Azure Synapse Analytics](https://aka.ms/synapse-geospatial-analytics) workload on your Azure tenant. We recommend that you read the document on "Geospatial Analysis using Azure Synapse Analytics" before deploying this solution.
|
||||
|
||||
- Improving this README.MD file to provide a great experience
|
||||
- Updating SUPPORT.MD with content about this project's support experience
|
||||
- Understanding the security reporting process in SECURITY.MD
|
||||
- Remove this section from the README
|
||||
Disclaimer: The solution and samples provided in this repository is for learning purpose only. They're intended to explore the possibilites of the Azure Services and are a starting point to developing your own solution. We recommend that you follow the security best practices as per the Microsoft documentation for individual services.
|
||||
|
||||
# Getting Started
|
||||
|
||||
Start by following the steps in the `deploy` folder to setup the Azure resources required to build your pipeline.
|
||||
|
||||
Import the pipeline under the `workflow` folder to your Azure Synapse Analytics instance's workspace. Alternatively, you can copy the files to your repository (git or Azure DevOps) and link the repository to your Azure Synapse Analytics workspace.
|
||||
|
||||
Sample pipelines are provided that include the following AI Model:
|
||||
|
||||
### a. AI model
|
||||
|
||||
This solution uses the Custom Vision Model as a sample AI model for demonstrating end to end Azure Synapse workflow geospatial analysis. This sample solution uses Custom Vision model to detect pools in a given geospatial data.
|
||||
You can use any other AI model for object detection or otherwise to run against this solution with a similar [specification](/src/aimodels/custom_vision_object_detection_offline/specs/custom_vision_object_detection.json) or different specification as defined by AI model to integrate in your solution.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
@ -28,6 +36,6 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
|
|||
|
||||
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
|
||||
trademarks or logos is subject to and must follow
|
||||
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
|
||||
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general).
|
||||
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
|
||||
Any use of third-party trademarks or logos are subject to those third-party's policies.
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
|
@ -12,7 +12,7 @@ If you believe you have found a security vulnerability in any Microsoft-owned re
|
|||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/msrc/pgp-key-msrc).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
|
||||
|
||||
|
@ -36,6 +36,6 @@ We prefer all communications to be in English.
|
|||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/msrc/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
25
SUPPORT.md
25
SUPPORT.md
|
@ -1,25 +0,0 @@
|
|||
# TODO: The maintainer of this repo has not yet edited this file
|
||||
|
||||
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
|
||||
|
||||
- **No CSS support:** Fill out this template with information about how to file issues and get help.
|
||||
- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
|
||||
- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
|
||||
|
||||
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
|
||||
|
||||
# Support
|
||||
|
||||
## How to file issues and get help
|
||||
|
||||
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
|
||||
issues before filing new issues to avoid duplicates. For new issues, file your bug or
|
||||
feature request as a new Issue.
|
||||
|
||||
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
|
||||
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
|
||||
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
|
||||
|
||||
## Microsoft Support Policy
|
||||
|
||||
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
|
|
@ -0,0 +1,243 @@
|
|||
# Prerequisites
|
||||
|
||||
The deployment script uses following tools, please follow the links provided to install the suggested tools on your computer using which you would execute the script.
|
||||
|
||||
- [bicep](https://docs.microsoft.com/azure/azure-resource-manager/bicep/install)
|
||||
- [az cli](https://docs.microsoft.com/cli/azure/install-azure-cli)
|
||||
- [docker cli](https://docs.docker.com/get-docker/)
|
||||
- [jq](https://stedolan.github.io/jq/download/)
|
||||
|
||||
- The scripts are executed on bash shell, so if using a computer with windows based operating system, install a [WSL](https://docs.microsoft.com/windows/wsl/about) environment to execute the script.
|
||||
|
||||
- The user performing the deployment of the bicep template and the associated scripts should have `Contributor` role assigned at the subscription to which the resources are being deployed.
|
||||
|
||||
- This solution assumes no interference from Policies deployed to your tenant preventing resources from being deployed.
|
||||
|
||||
- The bicep templates included in this solution are not idempotent. Use the template for greenfield deployment only.
|
||||
|
||||
- Get the repository to find the scripts. Clone the repository using following command.
|
||||
```bash
|
||||
git clone git@github.com:Azure/Azure-Orbital-Analytics-Samples.git
|
||||
```
|
||||
|
||||
One would need [git](https://github.com/git-guides/install-git) cli tool to download the repository.
|
||||
|
||||
Alternatively, you can use Azure Cloud Bash to deploy this sample solution to your Azure subscription.
|
||||
|
||||
# How does scripts work?
|
||||
|
||||
The shell script runs an `az cli` command to invoke `bicep` tool.
|
||||
|
||||
This command recieves the bicep template as input, and converts the bicep templates into an intermediate ARM template output which is then submitted to Azure APIs to create the Azure resources.
|
||||
|
||||
|
||||
# Executing the script
|
||||
|
||||
Before executing the script one would need to login to azure using `az` cli and set the correct subscription in which they want to provision the resources.
|
||||
|
||||
```bash
|
||||
az login
|
||||
az account set -s <subscription_id>
|
||||
```
|
||||
|
||||
Script has been written to be executed with minimalistic input, it requires following input
|
||||
- `environmentCode` which serves as the prefix for infrastructure services names.
|
||||
- `location` which suggests which azure region infrastructure is deployed in.
|
||||
|
||||
To install infrastructure execute install.sh script as follows
|
||||
|
||||
```bash
|
||||
./deploy/install.sh <environmentCode> <location> <envTag>
|
||||
|
||||
```
|
||||
|
||||
Default values for the parameters are provided in the script itself.
|
||||
|
||||
Arguments | Required | Sample value
|
||||
----------|-----------|-------
|
||||
environmentCode | yes | aoi
|
||||
location | yes | westus
|
||||
envTag | no | synapse\-\<environmentCode\>
|
||||
|
||||
|
||||
For eg.
|
||||
|
||||
```bash
|
||||
./deploy/install.sh aoi-demo westus demo
|
||||
|
||||
|
||||
```
|
||||
|
||||
# Using bicep template
|
||||
|
||||
Users can also use bicep template directly instead of using the script `install.sh`
|
||||
|
||||
To deploy the resources using the bicep template use the command as follows:
|
||||
|
||||
```bash
|
||||
az deployment sub create -l <region_name> -n <deployment_name> -f main.bicep -p location=<region_name> environmentCode=<environment_name_prefix> environment=<tag_value>
|
||||
```
|
||||
|
||||
For eg.
|
||||
```bash
|
||||
az deployment sub create -l <region> -n aoi -f main.bicep -p location=<region> environmentCode=aoi-demo environment=devSynapse
|
||||
```
|
||||
|
||||
# Verifying infrastructure resources
|
||||
|
||||
Once setup has been executed one can check for following resource-groups and resources to confirm the successful execution.
|
||||
|
||||
Following is the list of resource-groups and resources that should be created if we executed the command `./deploy/install.sh aoi-demo`
|
||||
|
||||
- `aoi-demo-data-rg`
|
||||
|
||||
This resource group houses data resources.
|
||||
|
||||
- Storage account named `rawdata<6-character-random-string>` to store raw input data for pipelines.
|
||||
- Keyvault named `aoi-demo-data-kv` to store credentials as secrets.
|
||||
|
||||
- `aoi-demo-monitor-rg`
|
||||
|
||||
This resource group houses monitoring resources.
|
||||
|
||||
- App Insights instance named `aoi-demo-monitor-appinsights` for monitoring.
|
||||
- Log Analytics workspace named `aoi-demo-monitor-workspace` to store monitoring data.
|
||||
|
||||
- `aoi-demo-network-rg`
|
||||
|
||||
This resource group houses networking resources.
|
||||
|
||||
- Virtual network named `aoi-demo-vnet` which has 3 subnets.
|
||||
|
||||
- `pipeline-subnet`
|
||||
- `data-subnet`
|
||||
- `orchestration-subnet`
|
||||
- It also has a list security groups to restrict access on the network.
|
||||
|
||||
- `aoi-demo-orc-rg`
|
||||
|
||||
This resource group houses pipeline orchestration resources.
|
||||
|
||||
- Storage account named `aoi-demoorcbatchact` for batch account.
|
||||
- Batch Account named `batchacc<6-character-random-string>`.
|
||||
|
||||
Also, go to the Batch Account and switch to the pools blade. Look for one or more pools created by the bicep template. Make sure the resizing of the pool is completed without any errors.
|
||||
|
||||
- Error while resizing the pools are indicated by red exclamation icon next to the pool. Most common issues causing failure are related to the VM Quota limitations.
|
||||
- Resizing may take a few minutes. Pools that are resizing are indicated by `0 -> 1` numbers under dedicated nodes column. Pools that have completed resizing should show the number of dedicated nodes.
|
||||
|
||||
Wait for all pools to complete resizing before moving to the next steps.
|
||||
|
||||
Note: The Bicep template adds the Synapse workspace's Managed Identity to the Batch Account as `Contributor`. Alternatively, Custom Role Definitions can be used to assign the Synapse workspace's Managed Identity to the Batch Account with required Azure RBAC operations.
|
||||
|
||||
- Keyvault named `aoi-demo-orc-kv`.
|
||||
- User managed identity `aoi-demo8-orc-umi` for access and authentication.
|
||||
- Azure Container registry instance named `aoi-demoorcacr` to store container images.
|
||||
|
||||
- `aoi-demo-pipeline-rg`
|
||||
|
||||
This resource group houses Synapse pipeline resources.
|
||||
|
||||
- Keyvault instance named `aoi-demo-pipeline-kv` to hold secrets for pipeline.
|
||||
- Storage account named `synhns<6-character-random-string>` for Synapse workspace.
|
||||
- Synapse workspace named `aoi-demo-pipeline-syn-ws` to hold pipeline resources.
|
||||
- Synapse spark pool `pool<6-character-random-string>` to run analytics.
|
||||
|
||||
|
||||
# Load the Custom Vision Model to your Container Registry
|
||||
|
||||
Custom Vision Model is containerized image that can be downloaded as tar.gz file and loaded to your Container Registry. To load the image to your Container Registry, follow the steps below:
|
||||
|
||||
```bash
|
||||
curl https://stllrairbusdatav2.blob.core.windows.net/public/images/custom_vision_offline.tar.gz --output custom_vision_offline.tar.gz
|
||||
|
||||
docker load < custom_vision_offline.tar.gz
|
||||
|
||||
docker tag custom_vision_offline <container-registry-name>.azurecr.io/custom_vision_offline:latest
|
||||
|
||||
az acr login --name <container-registry-name>
|
||||
|
||||
docker push <container-registry-name>.azurecr.io/custom_vision_offline:latest
|
||||
|
||||
```
|
||||
|
||||
Once the image is loaded to your Container Registry, update the [specification document](../src/aimodels/custom_vision_object_detection_offline/specs/custom_vision_object_detection.json) with the Container Registry details.
|
||||
|
||||
[Specification document](../src/aimodels/custom_vision_object_detection_offline/specs/custom_vision_object_detection.json) and [Configuration file](../src/aimodels/custom_vision_object_detection_offline/config/config.json) required to run the Custom Vision Model.
|
||||
|
||||
- Specification document - This solution has a framework defined to standardized way of running AI Models as containerized solutions. A Specification document works as a contract definition document to run an AI Model.
|
||||
|
||||
- Configuration file - Each AI Model may require one or more parameters to run the model. This parameters driven by the end users are passed to the AI Model in the form of a configuration file. The schema of these configuration file is specific to the AI Model and hence we provide a template for the end user to plug-in their values.
|
||||
|
||||
# Configuring the Resources
|
||||
|
||||
Next step is to configure your resources and set them up with the required dependencies like Python files, Library requirements and so on, before importing the Synapse pipeline. Run the `configure.sh` script below to perform the configuration:
|
||||
|
||||
```bash
|
||||
./deploy/configure.sh <environmentCode>
|
||||
```
|
||||
|
||||
# Packaging the Synapse Pipeline
|
||||
|
||||
To package the Synapse pipeline, run the `package.sh` script by following the syntax below:
|
||||
|
||||
```bash
|
||||
./deploy/package.sh <environmentCode>
|
||||
```
|
||||
|
||||
Once the above step completes, a zip file is generated. Upload the generated zip files to your Synapse Studio by following the steps below:
|
||||
|
||||
1. Open the Synapse Studio
|
||||
2. Switch to Integrate tab on the left
|
||||
3. At the top of the left pane, click on the "+" dropdown and select "Import resources from support files"
|
||||
4. When prompted to select a file, pick the zip file generated in the previous step
|
||||
5. Pipelines and its dependencies are imported to the Synapse Studio. Validate the components being imported for any errors
|
||||
6. Click "Publish all" and wait for the imported components to be published
|
||||
|
||||
## Running the pipeline
|
||||
|
||||
To run the pipeline, open the Synapse Studio for the Synapse workspace that you have created and follow the below listed steps.
|
||||
|
||||
- Open the `E2E Custom Vision Model Flow` and click on debug button
|
||||
|
||||
- When presented with the parameters, fill out the values. Below table provide the details on that each parameter represents.
|
||||
|
||||
| parameter | description |
|
||||
|--|--|
|
||||
| Prefix | A random string for each run of the pipeline. Use a random string betwee 4 to 6 characters long. Example - hj4t3d |
|
||||
| StorageAccountName | Name of the Storage Account that hosts the Raw data |
|
||||
| StorageAccountKey | Access Key of the Storage Account that hosts the Raw data |
|
||||
| BatchAccountName | Name of the Batch Account to run the AI Model |
|
||||
| BatchJobName | Job name within the Batch Account that runs the AI Model |
|
||||
| BatchLocation | Location of the Batch Account that runs the AI Model |
|
||||
|
||||
- Once the parameters are entered, click ok to submit and kick off the pipeline.
|
||||
|
||||
- Wait for the pipeline to complete.
|
||||
|
||||
# Cleanup Script
|
||||
|
||||
We have a cleanup script to cleanup the resource groups and thus the resources provisioned using the `environmentCode`.
|
||||
As discussed above the `environmentCode` is used as prefix to generate resource group names, so the cleanup-script deletes the resource groups with generated names.
|
||||
|
||||
Execute the cleanup script as follows:
|
||||
|
||||
```bash
|
||||
./deploy/cleanup.sh <environmentCode>
|
||||
```
|
||||
|
||||
For eg.
|
||||
```bash
|
||||
./deploy/cleanup.sh aoi-demo
|
||||
```
|
||||
|
||||
If one wants not to delete any specific resource group and thus resource they can use NO_DELETE_*_RESOURCE_GROUP environment variable, by setting it to true
|
||||
|
||||
```bash
|
||||
NO_DELETE_DATA_RESOURCE_GROUP=true
|
||||
NO_DELETE_MONITORING_RESOURCE_GROUP=true
|
||||
NO_DELETE_NETWORKING_RESOURCE_GROUP=true
|
||||
NO_DELETE_ORCHESTRATION_RESOURCE_GROUP=true
|
||||
NO_DELETE_PIPELINE_RESOURCE_GROUP=true
|
||||
./deploy/cleanup.sh <environmentCode>
|
||||
```
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
set -x
|
||||
|
||||
# parameters
|
||||
envCode=${1:-"aoi"}
|
||||
|
||||
NO_DELETE_DATA_RESOURCE_GROUP=${NO_DELETE_DATA_RESOURCE_GROUP:-"false"}
|
||||
DATA_RESOURCE_GROUP="${envCode}-data-rg"
|
||||
|
||||
NO_DELETE_MONITORING_RESOURCE_GROUP=${NO_DELETE_MONITORING_RESOURCE_GROUP:-"false"}
|
||||
MONITORING_RESOURCE_GROUP="${envCode}-monitor-rg"
|
||||
|
||||
NO_DELETE_NETWORKING_RESOURCE_GROUP=${NO_DELETE_NETWORKING_RESOURCE_GROUP:-"false"}
|
||||
NETWORKING_RESOURCE_GROUP="${envCode}-network-rg"
|
||||
|
||||
NO_DELETE_ORCHESTRATION_RESOURCE_GROUP=${NO_DELETE_ORCHESTRATION_RESOURCE_GROUP:-"false"}
|
||||
ORCHESTRATION_RESOURCE_GROUP="${envCode}-orc-rg"
|
||||
|
||||
NO_DELETE_PIPELINE_RESOURCE_GROUP=${NO_DELETE_PIPELINE_RESOURCE_GROUP:-"false"}
|
||||
PIPELINE_RESOURCE_GROUP="${envCode}-pipeline-rg"
|
||||
|
||||
if [[ ${NO_DELETE_DATA_RESOURCE_GROUP} != "true" ]] && [[ ${NO_DELETE_DATA_RESOURCE_GROUP} == "false" ]]; then
|
||||
set -x
|
||||
az group delete --name ${DATA_RESOURCE_GROUP} --no-wait --yes
|
||||
set +x
|
||||
fi
|
||||
|
||||
if [ "${NO_DELETE_MONITORING_RESOURCE_GROUP}" != "true" ] && [ "${NO_DELETE_MONITORING_RESOURCE_GROUP}" == "false" ]; then
|
||||
set -x
|
||||
az group delete --name ${MONITORING_RESOURCE_GROUP} --no-wait --yes
|
||||
set +x
|
||||
fi
|
||||
|
||||
if [ "${NO_DELETE_NETWORKING_RESOURCE_GROUP}" != "true" ] && [ "${NO_DELETE_NETWORKING_RESOURCE_GROUP}" == "false" ]; then
|
||||
set -x
|
||||
az group delete --name ${NETWORKING_RESOURCE_GROUP} --no-wait --yes
|
||||
set +x
|
||||
fi
|
||||
|
||||
if [ "${NO_DELETE_ORCHESTRATION_RESOURCE_GROUP}" != "true" ] && [ "${NO_DELETE_ORCHESTRATION_RESOURCE_GROUP}" == "false" ]; then
|
||||
set -x
|
||||
az group delete --name ${ORCHESTRATION_RESOURCE_GROUP} --no-wait --yes
|
||||
set +x
|
||||
fi
|
||||
|
||||
if [ "${NO_DELETE_PIPELINE_RESOURCE_GROUP}" != "true" ] && [ "${NO_DELETE_PIPELINE_RESOURCE_GROUP}" == "false" ]; then
|
||||
set -x
|
||||
az group delete --name ${PIPELINE_RESOURCE_GROUP} --no-wait --yes
|
||||
set +x
|
||||
fi
|
||||
|
||||
set +x
|
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
PRJ_ROOT="$(cd `dirname "${BASH_SOURCE}"`/..; pwd)"
|
||||
ENVCODE=$1
|
||||
|
||||
echo "configuration started ..."
|
||||
|
||||
set -x
|
||||
|
||||
# get synapse workspace and pool
|
||||
SYNAPSE_WORKSPACE=$(az synapse workspace list --query "[?tags.workspaceId && tags.workspaceId == 'default'].name" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_WORKSPACE
|
||||
SYNAPSE_WORKSPACE_RG=$(az synapse workspace list --query "[?tags.workspaceId && tags.workspaceId == 'default'].resourceGroup" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_WORKSPACE_RG
|
||||
SYNAPSE_POOL=$(az synapse spark pool list --workspace-name ${SYNAPSE_WORKSPACE} --resource-group ${SYNAPSE_WORKSPACE_RG} --query "[?tags.poolId && tags.poolId == 'default'].name" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_POOL
|
||||
|
||||
if [[ -n $SYNAPSE_WORKSPACE ]] && [[ -n $SYNAPSE_WORKSPACE_RG ]] && [[ -n $SYNAPSE_POOL ]]
|
||||
then
|
||||
# upload synapse pool
|
||||
az synapse spark pool update --name ${SYNAPSE_POOL} --workspace-name ${SYNAPSE_WORKSPACE} --resource-group ${ENVCODE}-pipeline-rg --library-requirements "${PRJ_ROOT}/deploy/environment.yml"
|
||||
fi
|
||||
|
||||
# get batch account
|
||||
BATCH_ACCT=$(az batch account list --query "[?tags.type && tags.type == 'batch'].name" -o tsv -g ${ENVCODE}-orc-rg)
|
||||
echo $BATCH_ACCT
|
||||
|
||||
BATCH_ACCT_KEY=$(az batch account keys list --name ${BATCH_ACCT} --resource-group ${ENVCODE}-orc-rg | jq ".primary")
|
||||
|
||||
if [[ -n $BATCH_ACCT ]]
|
||||
then
|
||||
az batch account login --name ${BATCH_ACCT} --resource-group ${ENVCODE}-orc-rg
|
||||
# create batch job for custom vision model
|
||||
az batch job create --id 'custom-vision-model-job' --pool-id 'data-cpu-pool' --account-name ${BATCH_ACCT} --account-key ${BATCH_ACCT_KEY}
|
||||
fi
|
||||
SYNAPSE_STORAGE_ACCT=$(az storage account list --query "[?tags.store && tags.store == 'synapse'].name" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_STORAGE_ACCT
|
||||
|
||||
if [[ -n $SYNAPSE_STORAGE_ACCT ]]
|
||||
then
|
||||
# create a container to upload the spark job python files
|
||||
az storage container create --name "spark-jobs" --account-name ${SYNAPSE_STORAGE_ACCT}
|
||||
# uploads the spark job python files
|
||||
az storage blob upload-batch --destination "spark-jobs" --account-name ${SYNAPSE_STORAGE_ACCT} --source "${PRJ_ROOT}/src/transforms/spark-jobs"
|
||||
fi
|
||||
|
||||
set +x
|
||||
|
||||
echo "configuration completed!"
|
|
@ -0,0 +1,14 @@
|
|||
name: aoi-env
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
dependencies:
|
||||
- gdal=3.3.0
|
||||
- pip>=20.1.1
|
||||
- azure-storage-file-datalake
|
||||
- libgdal
|
||||
- shapely
|
||||
- pyproj
|
||||
- pip:
|
||||
- rasterio
|
||||
- geopandas
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// List of required parameters
|
||||
param environmentCode string
|
||||
param environmentTag string
|
||||
param projectName string
|
||||
param location string
|
||||
|
||||
// Name parameters for infrastructure resources
|
||||
param dataResourceGroupName string = ''
|
||||
param pipelineResourceGroupName string = ''
|
||||
param pipelineLinkedSvcKeyVaultName string = ''
|
||||
param keyvaultName string = ''
|
||||
param rawDataStorageAccountName string = ''
|
||||
|
||||
// Parameters with default values for Keyvault
|
||||
param keyvaultSkuName string = 'Standard'
|
||||
param objIdForKeyvaultAccessPolicyPolicy string = ''
|
||||
param keyvaultCertPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultKeyPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultSecretPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultStoragePermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultUsePublicIp bool = true
|
||||
param keyvaultPublicNetworkAccess bool = true
|
||||
param keyvaultEnabledForDeployment bool = true
|
||||
param keyvaultEnabledForDiskEncryption bool = true
|
||||
param keyvaultEnabledForTemplateDeployment bool = true
|
||||
param keyvaultEnablePurgeProtection bool = true
|
||||
param keyvaultEnableRbacAuthorization bool = false
|
||||
param keyvaultEnableSoftDelete bool = true
|
||||
param keyvaultSoftDeleteRetentionInDays int = 7
|
||||
|
||||
// Parameters with default values for Synapse and its Managed Identity
|
||||
param synapseMIStorageAccountRoles array = [
|
||||
'ba92f5b4-2d11-453d-a403-e96b0029c9fe'
|
||||
'974c5e8b-45b9-4653-ba55-5f855dd0fb88'
|
||||
]
|
||||
param synapseMIPrincipalId string = ''
|
||||
|
||||
var namingPrefix = '${environmentCode}-${projectName}'
|
||||
var dataResourceGroupNameVar = empty(dataResourceGroupName) ? '${namingPrefix}-rg' : dataResourceGroupName
|
||||
var nameSuffix = substring(uniqueString(dataResourceGroupNameVar), 0, 6)
|
||||
var keyvaultNameVar = empty(keyvaultName) ? '${namingPrefix}-kv' : keyvaultName
|
||||
var rawDataStorageAccountNameVar = empty(rawDataStorageAccountName) ? 'rawdata${nameSuffix}' : rawDataStorageAccountName
|
||||
|
||||
module keyVault '../modules/akv.bicep' = {
|
||||
name: '${namingPrefix}-akv'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
keyVaultName: keyvaultNameVar
|
||||
location: location
|
||||
skuName:keyvaultSkuName
|
||||
objIdForAccessPolicyPolicy: objIdForKeyvaultAccessPolicyPolicy
|
||||
certPermission:keyvaultCertPermission
|
||||
keyPermission:keyvaultKeyPermission
|
||||
secretPermission:keyvaultSecretPermission
|
||||
storagePermission:keyvaultStoragePermission
|
||||
usePublicIp: keyvaultUsePublicIp
|
||||
publicNetworkAccess:keyvaultPublicNetworkAccess
|
||||
enabledForDeployment: keyvaultEnabledForDeployment
|
||||
enabledForDiskEncryption: keyvaultEnabledForDiskEncryption
|
||||
enabledForTemplateDeployment: keyvaultEnabledForTemplateDeployment
|
||||
enablePurgeProtection: keyvaultEnablePurgeProtection
|
||||
enableRbacAuthorization: keyvaultEnableRbacAuthorization
|
||||
enableSoftDelete: keyvaultEnableSoftDelete
|
||||
softDeleteRetentionInDays: keyvaultSoftDeleteRetentionInDays
|
||||
}
|
||||
}
|
||||
|
||||
module rawDataStorageAccount '../modules/storage.bicep' = {
|
||||
name: '${namingPrefix}-raw-data-storage'
|
||||
params: {
|
||||
storageAccountName: rawDataStorageAccountNameVar
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
isHnsEnabled: true
|
||||
storeType: 'raw'
|
||||
}
|
||||
}
|
||||
|
||||
module rawDataStorageAccountFileShare '../modules/file-share.bicep' = {
|
||||
name: '${namingPrefix}-raw-data-storage-fileshare'
|
||||
params: {
|
||||
storageAccountName: rawDataStorageAccountNameVar
|
||||
shareName: 'volume-a'
|
||||
}
|
||||
dependsOn: [
|
||||
rawDataStorageAccount
|
||||
]
|
||||
}
|
||||
|
||||
module rawDataStorageAccountCredentials '../modules/storage.credentials.to.keyvault.bicep' = {
|
||||
name: '${namingPrefix}-raw-data-storage-credentials'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
storageAccountName: rawDataStorageAccountNameVar
|
||||
keyVaultName: pipelineLinkedSvcKeyVaultName
|
||||
keyVaultResourceGroup: pipelineResourceGroupName
|
||||
}
|
||||
dependsOn: [
|
||||
rawDataStorageAccount
|
||||
]
|
||||
}
|
||||
|
||||
module synapseIdentityForStorageAccess '../modules/storage-role-assignment.bicep' = [ for (role, index) in synapseMIStorageAccountRoles: {
|
||||
name: '${namingPrefix}-synapse-id-kv-${index}'
|
||||
params: {
|
||||
resourceName: rawDataStorageAccountNameVar
|
||||
principalId: synapseMIPrincipalId
|
||||
roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${role}'
|
||||
}
|
||||
dependsOn: [
|
||||
rawDataStorageAccount
|
||||
]
|
||||
}]
|
||||
|
||||
output rawStorageAccountName string = rawDataStorageAccountNameVar
|
||||
output rawStorageFileEndpointUri string = rawDataStorageAccount.outputs.fileEndpointUri
|
||||
output rawStoragePrimaryKey string = rawDataStorageAccount.outputs.primaryKey
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// List of required parameters
|
||||
param environmentCode string
|
||||
param environmentTag string
|
||||
param projectName string
|
||||
param location string
|
||||
|
||||
// Parameters with default values for Monitoring
|
||||
var namingPrefix = '${environmentCode}-${projectName}'
|
||||
|
||||
module workspace '../modules/log-analytics.bicep' = {
|
||||
name : '${namingPrefix}-workspace'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
workspaceName: '${namingPrefix}-workspace'
|
||||
location: location
|
||||
}
|
||||
}
|
||||
|
||||
module appinsights '../modules/appinsights.bicep' = {
|
||||
name : '${namingPrefix}-appinsights'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
applicationInsightsName: '${namingPrefix}-appinsights'
|
||||
location: location
|
||||
workspaceId: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.OperationalInsights/workspaces/${namingPrefix}-workspace'
|
||||
}
|
||||
dependsOn: [
|
||||
workspace
|
||||
]
|
||||
}
|
||||
|
||||
output workspaceId string = workspace.outputs.workspaceId
|
|
@ -0,0 +1,63 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// List of required parameters
|
||||
param environmentCode string
|
||||
param environmentTag string
|
||||
param projectName string
|
||||
param Location string
|
||||
|
||||
// Parameters with default values for Virtual Network
|
||||
param virtualNetworkName string = ''
|
||||
param vnetAddressPrefix string = '10.5.0.0/16'
|
||||
param pipelineSubnetAddressPrefix string = '10.5.1.0/24'
|
||||
param dataSubnetAddressPrefix string = '10.5.2.0/24'
|
||||
param orchestrationSubnetAddressPrefix string = '10.5.3.0/24'
|
||||
|
||||
var namingPrefix = '${environmentCode}-${projectName}'
|
||||
|
||||
module vnet '../modules/vnet.bicep' = {
|
||||
name: virtualNetworkName
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
virtualNetworkName: virtualNetworkName
|
||||
location: Location
|
||||
addressPrefix: vnetAddressPrefix
|
||||
}
|
||||
}
|
||||
|
||||
module pipelineSubnet '../modules/subnet.bicep' = {
|
||||
name: '${namingPrefix}-pipeline-subnet'
|
||||
params: {
|
||||
vNetName: virtualNetworkName
|
||||
subnetName: 'pipeline-subnet'
|
||||
subnetAddressPrefix: pipelineSubnetAddressPrefix
|
||||
}
|
||||
dependsOn: [
|
||||
vnet
|
||||
]
|
||||
}
|
||||
|
||||
module dataSubnet '../modules/subnet.bicep' = {
|
||||
name: '${namingPrefix}-data-subnet'
|
||||
params: {
|
||||
vNetName: virtualNetworkName
|
||||
subnetName: 'data-subnet'
|
||||
subnetAddressPrefix: dataSubnetAddressPrefix
|
||||
}
|
||||
dependsOn: [
|
||||
pipelineSubnet
|
||||
]
|
||||
}
|
||||
|
||||
module orchestrationSubnet '../modules/subnet.bicep' = {
|
||||
name: '${namingPrefix}-orchestration-subnet'
|
||||
params: {
|
||||
vNetName: virtualNetworkName
|
||||
subnetName: 'orchestration-subnet'
|
||||
subnetAddressPrefix: orchestrationSubnetAddressPrefix
|
||||
}
|
||||
dependsOn: [
|
||||
dataSubnet
|
||||
]
|
||||
}
|
|
@ -0,0 +1,286 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// List of required parameters
|
||||
param environmentCode string
|
||||
param environmentTag string
|
||||
param projectName string
|
||||
param location string
|
||||
|
||||
param synapseMIPrincipalId string
|
||||
|
||||
// Guid to role definitions to be used during role
|
||||
// assignments including the below roles definitions:
|
||||
// Contributor
|
||||
param synapseMIBatchAccountRoles array = [
|
||||
'b24988ac-6180-42a0-ab88-20f7382dd24c'
|
||||
]
|
||||
|
||||
// Name parameters for infrastructure resources
|
||||
param orchestrationResourceGroupName string = ''
|
||||
param keyvaultName string = ''
|
||||
param batchAccountName string = ''
|
||||
param batchAccountAutoStorageAccountName string = ''
|
||||
param acrName string = ''
|
||||
param uamiName string = ''
|
||||
|
||||
param pipelineResourceGroupName string
|
||||
param pipelineLinkedSvcKeyVaultName string
|
||||
|
||||
// Mount options
|
||||
param mountAccountName string
|
||||
param mountAccountKey string
|
||||
param mountFileUrl string
|
||||
|
||||
// Parameters with default values for Keyvault
|
||||
param keyvaultSkuName string = 'Standard'
|
||||
param objIdForKeyvaultAccessPolicyPolicy string = ''
|
||||
param keyvaultCertPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultKeyPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultSecretPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultStoragePermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultUsePublicIp bool = true
|
||||
param keyvaultPublicNetworkAccess bool = true
|
||||
param keyvaultEnabledForDeployment bool = true
|
||||
param keyvaultEnabledForDiskEncryption bool = true
|
||||
param keyvaultEnabledForTemplateDeployment bool = true
|
||||
param keyvaultEnablePurgeProtection bool = true
|
||||
param keyvaultEnableRbacAuthorization bool = false
|
||||
param keyvaultEnableSoftDelete bool = true
|
||||
param keyvaultSoftDeleteRetentionInDays int = 7
|
||||
|
||||
// Parameters with default values for Batch Account
|
||||
param allowedAuthenticationModesBatchSvc array = [
|
||||
'AAD'
|
||||
'SharedKey'
|
||||
'TaskAuthenticationToken'
|
||||
]
|
||||
param allowedAuthenticationModesUsrSub array = [
|
||||
'AAD'
|
||||
'TaskAuthenticationToken'
|
||||
]
|
||||
|
||||
param batchAccountAutoStorageAuthenticationMode string = 'StorageKeys'
|
||||
param batchAccountPoolAllocationMode string = 'BatchService'
|
||||
param batchAccountPublicNetworkAccess bool = true
|
||||
|
||||
// Parameters with default values for Data Fetch Batch Account Pool
|
||||
param batchAccountCpuOnlyPoolName string = 'data-cpu-pool'
|
||||
param batchAccountCpuOnlyPoolVmSize string = 'standard_d2s_v3'
|
||||
param batchAccountCpuOnlyPoolDedicatedNodes int = 0
|
||||
param batchAccountCpuOnlyPoolImageReferencePublisher string = 'microsoft-azure-batch'
|
||||
param batchAccountCpuOnlyPoolImageReferenceOffer string = 'ubuntu-server-container'
|
||||
param batchAccountCpuOnlyPoolImageReferenceSku string = '20-04-lts'
|
||||
param batchAccountCpuOnlyPoolImageReferenceVersion string = 'latest'
|
||||
param batchAccountCpuOnlyPoolStartTaskCommandLine string = '/bin/bash -c "apt-get update && apt-get install -y python3-pip && pip install requests && pip install azure-storage-blob && pip install pandas"'
|
||||
|
||||
|
||||
param batchLogsDiagCategories array = [
|
||||
'allLogs'
|
||||
]
|
||||
param batchMetricsDiagCategories array = [
|
||||
'AllMetrics'
|
||||
]
|
||||
param logAnalyticsWorkspaceId string
|
||||
|
||||
// Parameters with default values for ACR
|
||||
param acrSku string = 'Standard'
|
||||
|
||||
var namingPrefix = '${environmentCode}-${projectName}'
|
||||
var orchestrationResourceGroupNameVar = empty(orchestrationResourceGroupName) ? '${namingPrefix}-rg' : orchestrationResourceGroupName
|
||||
var nameSuffix = substring(uniqueString(orchestrationResourceGroupNameVar), 0, 6)
|
||||
var uamiNameVar = empty(uamiName) ? '${namingPrefix}-umi' : uamiName
|
||||
var keyvaultNameVar = empty(keyvaultName) ? '${namingPrefix}-kv' : keyvaultName
|
||||
var batchAccountNameVar = empty(batchAccountName) ? '${environmentCode}${projectName}batchact' : batchAccountName
|
||||
var batchAccountAutoStorageAccountNameVar = empty(batchAccountAutoStorageAccountName) ? 'batchacc${nameSuffix}' : batchAccountAutoStorageAccountName
|
||||
var acrNameVar = empty(acrName) ? '${environmentCode}${projectName}acr' : acrName
|
||||
|
||||
module keyVault '../modules/akv.bicep' = {
|
||||
name: '${namingPrefix}-akv'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
keyVaultName: keyvaultNameVar
|
||||
location: location
|
||||
skuName:keyvaultSkuName
|
||||
objIdForAccessPolicyPolicy: objIdForKeyvaultAccessPolicyPolicy
|
||||
certPermission:keyvaultCertPermission
|
||||
keyPermission:keyvaultKeyPermission
|
||||
secretPermission:keyvaultSecretPermission
|
||||
storagePermission:keyvaultStoragePermission
|
||||
usePublicIp: keyvaultUsePublicIp
|
||||
publicNetworkAccess:keyvaultPublicNetworkAccess
|
||||
enabledForDeployment: keyvaultEnabledForDeployment
|
||||
enabledForDiskEncryption: keyvaultEnabledForDiskEncryption
|
||||
enabledForTemplateDeployment: keyvaultEnabledForTemplateDeployment
|
||||
enablePurgeProtection: keyvaultEnablePurgeProtection
|
||||
enableRbacAuthorization: keyvaultEnableRbacAuthorization
|
||||
enableSoftDelete: keyvaultEnableSoftDelete
|
||||
softDeleteRetentionInDays: keyvaultSoftDeleteRetentionInDays
|
||||
}
|
||||
}
|
||||
|
||||
module batchAccountAutoStorageAccount '../modules/storage.bicep' = {
|
||||
name: '${namingPrefix}-batch-account-auto-storage'
|
||||
params: {
|
||||
storageAccountName: batchAccountAutoStorageAccountNameVar
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
storeType: 'batch'
|
||||
}
|
||||
}
|
||||
|
||||
module batchStorageAccountCredentials '../modules/storage.credentials.to.keyvault.bicep' = {
|
||||
name: '${namingPrefix}-batch-storage-credentials'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
storageAccountName: batchAccountAutoStorageAccountNameVar
|
||||
keyVaultName: keyvaultNameVar
|
||||
keyVaultResourceGroup: resourceGroup().name
|
||||
secretNamePrefix: 'Batch'
|
||||
}
|
||||
dependsOn: [
|
||||
keyVault
|
||||
batchAccountAutoStorageAccount
|
||||
]
|
||||
}
|
||||
|
||||
module uami '../modules/managed.identity.user.bicep' = {
|
||||
name: '${namingPrefix}-umi'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
uamiName: uamiNameVar
|
||||
}
|
||||
}
|
||||
|
||||
module batchAccount '../modules/batch.account.bicep' = {
|
||||
name: '${namingPrefix}-batch-account'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
batchAccountName: toLower(batchAccountNameVar)
|
||||
userManagedIdentityId: uami.outputs.uamiId
|
||||
userManagedIdentityPrincipalId: uami.outputs.uamiPrincipalId
|
||||
allowedAuthenticationModes: batchAccountPoolAllocationMode == 'BatchService' ? allowedAuthenticationModesBatchSvc : allowedAuthenticationModesUsrSub
|
||||
autoStorageAuthenticationMode: batchAccountAutoStorageAuthenticationMode
|
||||
autoStorageAccountName: batchAccountAutoStorageAccountNameVar
|
||||
poolAllocationMode: batchAccountPoolAllocationMode
|
||||
publicNetworkAccess: batchAccountPublicNetworkAccess
|
||||
keyVaultName: keyvaultNameVar
|
||||
}
|
||||
dependsOn: [
|
||||
uami
|
||||
batchAccountAutoStorageAccount
|
||||
keyVault
|
||||
]
|
||||
}
|
||||
|
||||
module synapseIdentityForBatchAccess '../modules/batch.account.role.assignment.bicep' = [ for role in synapseMIBatchAccountRoles: {
|
||||
name: '${namingPrefix}-batch-account-role-assgn'
|
||||
params: {
|
||||
resourceName: toLower(batchAccountNameVar)
|
||||
principalId: synapseMIPrincipalId
|
||||
roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${role}'
|
||||
}
|
||||
dependsOn: [
|
||||
batchAccount
|
||||
]
|
||||
}]
|
||||
|
||||
module batchAccountCpuOnlyPool '../modules/batch.account.pools.bicep' = {
|
||||
name: '${namingPrefix}-batch-account-data-fetch-pool'
|
||||
params: {
|
||||
batchAccountName: batchAccountNameVar
|
||||
batchAccountPoolName: batchAccountCpuOnlyPoolName
|
||||
vmSize: batchAccountCpuOnlyPoolVmSize
|
||||
fixedScaleTargetDedicatedNodes: batchAccountCpuOnlyPoolDedicatedNodes
|
||||
imageReferencePublisher: batchAccountCpuOnlyPoolImageReferencePublisher
|
||||
imageReferenceOffer: batchAccountCpuOnlyPoolImageReferenceOffer
|
||||
imageReferenceSku: batchAccountCpuOnlyPoolImageReferenceSku
|
||||
imageReferenceVersion: batchAccountCpuOnlyPoolImageReferenceVersion
|
||||
startTaskCommandLine: batchAccountCpuOnlyPoolStartTaskCommandLine
|
||||
azureFileShareConfigurationAccountKey: mountAccountKey
|
||||
azureFileShareConfigurationAccountName: mountAccountName
|
||||
azureFileShareConfigurationAzureFileUrl: mountFileUrl
|
||||
azureFileShareConfigurationMountOptions: '-o vers=3.0,dir_mode=0777,file_mode=0777,sec=ntlmssp'
|
||||
azureFileShareConfigurationRelativeMountPath: 'S'
|
||||
}
|
||||
dependsOn: [
|
||||
batchAccountAutoStorageAccount
|
||||
batchAccount
|
||||
]
|
||||
}
|
||||
|
||||
module acr '../modules/acr.bicep' = {
|
||||
name: '${namingPrefix}-acr'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
acrName: acrNameVar
|
||||
acrSku: acrSku
|
||||
}
|
||||
}
|
||||
|
||||
module acrCredentials '../modules/acr.credentials.to.keyvault.bicep' = {
|
||||
name: '${namingPrefix}-acr-credentials'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
acrName: acrNameVar
|
||||
keyVaultName: keyvaultNameVar
|
||||
|
||||
}
|
||||
dependsOn: [
|
||||
keyVault
|
||||
acr
|
||||
]
|
||||
}
|
||||
|
||||
module batchAccountCredentials '../modules/batch.account.to.keyvault.bicep' = {
|
||||
name: '${namingPrefix}-batch-account-credentials'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
batchAccoutName: toLower(batchAccountNameVar)
|
||||
keyVaultName: pipelineLinkedSvcKeyVaultName
|
||||
keyVaultResourceGroup: pipelineResourceGroupName
|
||||
}
|
||||
dependsOn: [
|
||||
keyVault
|
||||
batchAccount
|
||||
]
|
||||
}
|
||||
|
||||
module batchDiagnosticSettings '../modules/batch-diagnostic-settings.bicep' = {
|
||||
name: '${namingPrefix}-synapse-diag-settings'
|
||||
params: {
|
||||
batchAccountName: batchAccountNameVar
|
||||
logs: [for category in batchLogsDiagCategories: {
|
||||
category: null
|
||||
categoryGroup: category
|
||||
enabled: true
|
||||
retentionPolicy: {
|
||||
days: 30
|
||||
enabled: false
|
||||
}
|
||||
}]
|
||||
metrics: [for category in batchMetricsDiagCategories: {
|
||||
category: category
|
||||
enabled: true
|
||||
retentionPolicy: {
|
||||
days: 30
|
||||
enabled: false
|
||||
}
|
||||
}]
|
||||
workspaceId: logAnalyticsWorkspaceId
|
||||
}
|
||||
dependsOn: [
|
||||
batchAccount
|
||||
]
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// List of required parameters
|
||||
param environmentCode string
|
||||
param environmentTag string
|
||||
param projectName string
|
||||
param location string
|
||||
|
||||
// Name parameters for infrastructure resources
|
||||
param synapseResourceGroupName string = ''
|
||||
param keyvaultName string = ''
|
||||
param synapseHnsStorageAccountName string = ''
|
||||
param synapseWorkspaceName string = ''
|
||||
param synapseSparkPoolName string = ''
|
||||
param synapseSqlAdminLoginPassword string = ''
|
||||
|
||||
// Parameters with default values for Keyvault
|
||||
param keyvaultSkuName string = 'Standard'
|
||||
param objIdForKeyvaultAccessPolicyPolicy string = ''
|
||||
param keyvaultCertPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultKeyPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultSecretPermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultStoragePermission array = [
|
||||
'All'
|
||||
]
|
||||
param keyvaultUsePublicIp bool = true
|
||||
param keyvaultPublicNetworkAccess bool = true
|
||||
param keyvaultEnabledForDeployment bool = true
|
||||
param keyvaultEnabledForDiskEncryption bool = true
|
||||
param keyvaultEnabledForTemplateDeployment bool = true
|
||||
param keyvaultEnablePurgeProtection bool = true
|
||||
param keyvaultEnableRbacAuthorization bool = false
|
||||
param keyvaultEnableSoftDelete bool = true
|
||||
param keyvaultSoftDeleteRetentionInDays int = 7
|
||||
|
||||
// Parameters with default values for Synapse Workspace
|
||||
param synapseHnsStorageAccountFileSystem string = 'users'
|
||||
param synapseSqlAdminLogin string = 'sqladmin'
|
||||
param synapseFirewallAllowEndIP string = '255.255.255.255'
|
||||
param synapseFirewallAllowStartIP string = '0.0.0.0'
|
||||
param synapseAutoPauseEnabled bool = true
|
||||
param synapseAutoPauseDelayInMinutes int = 15
|
||||
param synapseAutoScaleEnabled bool = true
|
||||
param synapseAutoScaleMinNodeCount int = 1
|
||||
param synapseAutoScaleMaxNodeCount int = 5
|
||||
param synapseCacheSize int = 0
|
||||
param synapseDynamicExecutorAllocationEnabled bool = false
|
||||
param synapseIsComputeIsolationEnabled bool = false
|
||||
param synapseNodeCount int = 0
|
||||
param synapseNodeSize string = 'Medium'
|
||||
param synapseNodeSizeFamily string = 'MemoryOptimized'
|
||||
param synapseSparkVersion string = '3.1'
|
||||
param synapseGitRepoAccountName string = ''
|
||||
param synapseGitRepoCollaborationBranch string = 'main'
|
||||
param synapseGitRepoHostName string = ''
|
||||
param synapseGitRepoLastCommitId string = ''
|
||||
param synapseGitRepoVstsProjectName string = ''
|
||||
param synapseGitRepoRepositoryName string = ''
|
||||
param synapseGitRepoRootFolder string = '.'
|
||||
param synapseGitRepoVstsTenantId string = subscription().tenantId
|
||||
param synapseGitRepoType string = ''
|
||||
|
||||
param synapseCategories array = [
|
||||
'SynapseRbacOperations'
|
||||
'GatewayApiRequests'
|
||||
'SQLSecurityAuditEvents'
|
||||
'BuiltinSqlReqsEnded'
|
||||
'IntegrationPipelineRuns'
|
||||
'IntegrationActivityRuns'
|
||||
'IntegrationTriggerRuns'
|
||||
]
|
||||
|
||||
// Parameters with default values for Synapse and its Managed Identity
|
||||
param synapseMIStorageAccountRoles array = [
|
||||
'ba92f5b4-2d11-453d-a403-e96b0029c9fe'
|
||||
'974c5e8b-45b9-4653-ba55-5f855dd0fb88'
|
||||
]
|
||||
|
||||
param logAnalyticsWorkspaceId string
|
||||
|
||||
var namingPrefix = '${environmentCode}-${projectName}'
|
||||
var synapseResourceGroupNameVar = empty(synapseResourceGroupName) ? '${namingPrefix}-rg' : synapseResourceGroupName
|
||||
var nameSuffix = substring(uniqueString(synapseResourceGroupNameVar), 0, 6)
|
||||
var keyvaultNameVar = empty(keyvaultName) ? '${namingPrefix}-kv' : keyvaultName
|
||||
var synapseHnsStorageAccountNameVar = empty(synapseHnsStorageAccountName) ? 'synhns${nameSuffix}' : synapseHnsStorageAccountName
|
||||
var synapseWorkspaceNameVar = empty(synapseWorkspaceName) ? '${namingPrefix}-syn-ws' : synapseWorkspaceName
|
||||
var synapseSparkPoolNameVar = empty(synapseSparkPoolName) ? 'pool${nameSuffix}' : synapseSparkPoolName
|
||||
var synapseSqlAdminLoginPasswordVar = empty(synapseSqlAdminLoginPassword) ? 'SynapsePassword!${nameSuffix}' : synapseSqlAdminLoginPassword
|
||||
|
||||
module keyVault '../modules/akv.bicep' = {
|
||||
name: '${namingPrefix}-akv'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
keyVaultName: keyvaultNameVar
|
||||
location: location
|
||||
skuName:keyvaultSkuName
|
||||
objIdForAccessPolicyPolicy: objIdForKeyvaultAccessPolicyPolicy
|
||||
certPermission:keyvaultCertPermission
|
||||
keyPermission:keyvaultKeyPermission
|
||||
secretPermission:keyvaultSecretPermission
|
||||
storagePermission:keyvaultStoragePermission
|
||||
usePublicIp: keyvaultUsePublicIp
|
||||
publicNetworkAccess:keyvaultPublicNetworkAccess
|
||||
enabledForDeployment: keyvaultEnabledForDeployment
|
||||
enabledForDiskEncryption: keyvaultEnabledForDiskEncryption
|
||||
enabledForTemplateDeployment: keyvaultEnabledForTemplateDeployment
|
||||
enablePurgeProtection: keyvaultEnablePurgeProtection
|
||||
enableRbacAuthorization: keyvaultEnableRbacAuthorization
|
||||
enableSoftDelete: keyvaultEnableSoftDelete
|
||||
softDeleteRetentionInDays: keyvaultSoftDeleteRetentionInDays
|
||||
usage: 'linkedService'
|
||||
}
|
||||
}
|
||||
|
||||
module synapseHnsStorageAccount '../modules/storage.hns.bicep' = {
|
||||
name: '${namingPrefix}-hns-storage'
|
||||
params: {
|
||||
storageAccountName: synapseHnsStorageAccountNameVar
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
storeType: 'synapse'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
module synapseWorkspace '../modules/synapse.workspace.bicep' = {
|
||||
name: '${namingPrefix}-workspace'
|
||||
params:{
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
synapseWorkspaceName: synapseWorkspaceNameVar
|
||||
hnsStorageAccountName: synapseHnsStorageAccountNameVar
|
||||
hnsStorageFileSystem: synapseHnsStorageAccountFileSystem
|
||||
sqlAdminLogin: synapseSqlAdminLogin
|
||||
sqlAdminLoginPassword: synapseSqlAdminLoginPasswordVar
|
||||
firewallAllowEndIP: synapseFirewallAllowEndIP
|
||||
firewallAllowStartIP: synapseFirewallAllowStartIP
|
||||
keyVaultName: keyvaultName
|
||||
gitRepoAccountName: synapseGitRepoAccountName
|
||||
gitRepoCollaborationBranch: synapseGitRepoCollaborationBranch
|
||||
gitRepoHostName: synapseGitRepoHostName
|
||||
gitRepoLastCommitId: synapseGitRepoLastCommitId
|
||||
gitRepoVstsProjectName: synapseGitRepoVstsProjectName
|
||||
gitRepoRepositoryName: synapseGitRepoRepositoryName
|
||||
gitRepoRootFolder: synapseGitRepoRootFolder
|
||||
gitRepoVstsTenantId: synapseGitRepoVstsTenantId
|
||||
gitRepoType: synapseGitRepoType
|
||||
}
|
||||
dependsOn: [
|
||||
synapseHnsStorageAccount
|
||||
keyVault
|
||||
]
|
||||
}
|
||||
|
||||
module synapseIdentityForStorageAccess '../modules/storage-role-assignment.bicep' = [ for (role, roleIndex) in synapseMIStorageAccountRoles: {
|
||||
name: '${namingPrefix}-synapse-id-kv-${roleIndex}'
|
||||
params: {
|
||||
resourceName: synapseHnsStorageAccountNameVar
|
||||
principalId: synapseWorkspace.outputs.synapseMIPrincipalId
|
||||
roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${role}'
|
||||
}
|
||||
dependsOn: [
|
||||
synapseHnsStorageAccount
|
||||
synapseWorkspace
|
||||
]
|
||||
}]
|
||||
|
||||
module synapseIdentityKeyVaultAccess '../modules/akv.policy.bicep' = {
|
||||
name: '${namingPrefix}-synapse-id-kv'
|
||||
params: {
|
||||
keyVaultName: keyvaultNameVar
|
||||
policyOps: 'add'
|
||||
objIdForPolicy: synapseWorkspace.outputs.synapseMIPrincipalId
|
||||
}
|
||||
|
||||
dependsOn: [
|
||||
synapseWorkspace
|
||||
]
|
||||
}
|
||||
|
||||
module synapseSparkPool '../modules/synapse.sparkpool.bicep' = {
|
||||
name: '${namingPrefix}-sparkpool'
|
||||
params:{
|
||||
environmentName: environmentTag
|
||||
location: location
|
||||
synapseWorkspaceName: synapseWorkspaceNameVar
|
||||
sparkPoolName: synapseSparkPoolNameVar
|
||||
autoPauseEnabled: synapseAutoPauseEnabled
|
||||
autoPauseDelayInMinutes: synapseAutoPauseDelayInMinutes
|
||||
autoScaleEnabled: synapseAutoScaleEnabled
|
||||
autoScaleMinNodeCount: synapseAutoScaleMinNodeCount
|
||||
autoScaleMaxNodeCount: synapseAutoScaleMaxNodeCount
|
||||
cacheSize: synapseCacheSize
|
||||
dynamicExecutorAllocationEnabled: synapseDynamicExecutorAllocationEnabled
|
||||
isComputeIsolationEnabled: synapseIsComputeIsolationEnabled
|
||||
nodeCount: synapseNodeCount
|
||||
nodeSize: synapseNodeSize
|
||||
nodeSizeFamily: synapseNodeSizeFamily
|
||||
sparkVersion: synapseSparkVersion
|
||||
}
|
||||
dependsOn: [
|
||||
synapseHnsStorageAccount
|
||||
synapseWorkspace
|
||||
]
|
||||
}
|
||||
|
||||
module synapseDiagnosticSettings '../modules/synapse-diagnostic-settings.bicep' = {
|
||||
name: '${namingPrefix}-synapse-diag-settings'
|
||||
params: {
|
||||
synapseWorkspaceName: synapseWorkspaceNameVar
|
||||
logs: [for category in synapseCategories: {
|
||||
category: category
|
||||
categoryGroup: null
|
||||
enabled: true
|
||||
retentionPolicy: {
|
||||
days: 30
|
||||
enabled: false
|
||||
}
|
||||
}]
|
||||
workspaceId: logAnalyticsWorkspaceId
|
||||
}
|
||||
dependsOn: [
|
||||
synapseWorkspace
|
||||
]
|
||||
}
|
||||
|
||||
module pkgDataStorageAccountCredentials '../modules/storage.credentials.to.keyvault.bicep' = {
|
||||
name: '${namingPrefix}-pkgs-storage-credentials'
|
||||
params: {
|
||||
environmentName: environmentTag
|
||||
storageAccountName: synapseHnsStorageAccountNameVar
|
||||
keyVaultName: keyvaultNameVar
|
||||
keyVaultResourceGroup: resourceGroup().name
|
||||
secretNamePrefix: 'Packages'
|
||||
}
|
||||
dependsOn: [
|
||||
keyVault
|
||||
synapseHnsStorageAccount
|
||||
]
|
||||
}
|
||||
|
||||
output synapseMIPrincipalId string = synapseWorkspace.outputs.synapseMIPrincipalId
|
|
@ -0,0 +1,174 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
targetScope='subscription'
|
||||
|
||||
@description('Location for all the resources to be deployed')
|
||||
param location string
|
||||
|
||||
@minLength(3)
|
||||
@maxLength(8)
|
||||
@description('Prefix to be used for naming all the resources in the deployment')
|
||||
param environmentCode string
|
||||
|
||||
@description('Environment will be used as Tag on the resource group')
|
||||
param environment string
|
||||
|
||||
@description('Used for naming of the network resource group and its resources')
|
||||
param networkModulePrefix string = 'network'
|
||||
|
||||
@description('Used for naming of the data resource group and its resources')
|
||||
param dataModulePrefix string = 'data'
|
||||
|
||||
@description('Used for naming of the monitor resource group and its resources')
|
||||
param monitorModulePrefix string = 'monitor'
|
||||
|
||||
@description('Used for naming of the pipeline resource group and its resources')
|
||||
param pipelineModulePrefix string = 'pipeline'
|
||||
|
||||
@description('Used for naming of the orchestration resource group and its resources')
|
||||
param orchestrationModulePrefix string = 'orc'
|
||||
|
||||
var networkResourceGroupName = '${environmentCode}-${networkModulePrefix}-rg'
|
||||
var dataResourceGroupName = '${environmentCode}-${dataModulePrefix}-rg'
|
||||
var monitorResourceGroupName = '${environmentCode}-${monitorModulePrefix}-rg'
|
||||
var pipelineResourceGroupName = '${environmentCode}-${pipelineModulePrefix}-rg'
|
||||
var orchestrationResourceGroupName = '${environmentCode}-${orchestrationModulePrefix}-rg'
|
||||
|
||||
module networkResourceGroup 'modules/resourcegroup.bicep' = {
|
||||
name : networkResourceGroupName
|
||||
scope: subscription()
|
||||
params: {
|
||||
environmentName: environment
|
||||
resourceGroupName: networkResourceGroupName
|
||||
resourceGroupLocation: location
|
||||
}
|
||||
}
|
||||
|
||||
module networkModule 'groups/networking.bicep' = {
|
||||
name: '${networkModulePrefix}-module'
|
||||
scope: resourceGroup(networkResourceGroup.name)
|
||||
params: {
|
||||
projectName: networkModulePrefix
|
||||
Location: location
|
||||
environmentCode: environmentCode
|
||||
environmentTag: environment
|
||||
virtualNetworkName: '${environmentCode}-vnet'
|
||||
}
|
||||
dependsOn: [
|
||||
networkResourceGroup
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
|
||||
module monitorResourceGroup 'modules/resourcegroup.bicep' = {
|
||||
name : monitorResourceGroupName
|
||||
scope: subscription()
|
||||
params: {
|
||||
environmentName: environment
|
||||
resourceGroupName: monitorResourceGroupName
|
||||
resourceGroupLocation: location
|
||||
}
|
||||
}
|
||||
|
||||
module monitorModule 'groups/monitoring.bicep' = {
|
||||
name: '${monitorModulePrefix}-module'
|
||||
scope: resourceGroup(monitorResourceGroup.name)
|
||||
params: {
|
||||
projectName: monitorModulePrefix
|
||||
location: location
|
||||
environmentCode: environmentCode
|
||||
environmentTag: environment
|
||||
}
|
||||
dependsOn: [
|
||||
networkModule
|
||||
]
|
||||
}
|
||||
|
||||
module pipelineResourceGroup 'modules/resourcegroup.bicep' = {
|
||||
name : pipelineResourceGroupName
|
||||
scope: subscription()
|
||||
params: {
|
||||
environmentName: environment
|
||||
resourceGroupName: pipelineResourceGroupName
|
||||
resourceGroupLocation: location
|
||||
}
|
||||
}
|
||||
|
||||
module pipelineModule 'groups/pipeline.bicep' = {
|
||||
name: '${pipelineModulePrefix}-module'
|
||||
scope: resourceGroup(pipelineResourceGroup.name)
|
||||
params: {
|
||||
projectName: pipelineModulePrefix
|
||||
location: location
|
||||
environmentCode: environmentCode
|
||||
environmentTag: environment
|
||||
logAnalyticsWorkspaceId: monitorModule.outputs.workspaceId
|
||||
}
|
||||
dependsOn: [
|
||||
networkModule
|
||||
monitorModule
|
||||
]
|
||||
}
|
||||
|
||||
module dataResourceGroup 'modules/resourcegroup.bicep' = {
|
||||
name : dataResourceGroupName
|
||||
scope: subscription()
|
||||
params: {
|
||||
environmentName: environment
|
||||
resourceGroupName: dataResourceGroupName
|
||||
resourceGroupLocation: location
|
||||
}
|
||||
}
|
||||
|
||||
module dataModule 'groups/data.bicep' = {
|
||||
name: '${dataModulePrefix}-module'
|
||||
scope: resourceGroup(dataResourceGroup.name)
|
||||
params: {
|
||||
projectName: dataModulePrefix
|
||||
location: location
|
||||
environmentCode: environmentCode
|
||||
environmentTag: environment
|
||||
synapseMIPrincipalId: pipelineModule.outputs.synapseMIPrincipalId
|
||||
pipelineResourceGroupName: pipelineResourceGroup.name
|
||||
pipelineLinkedSvcKeyVaultName: '${environmentCode}-${pipelineModulePrefix}-kv'
|
||||
}
|
||||
dependsOn: [
|
||||
networkModule
|
||||
pipelineModule
|
||||
]
|
||||
}
|
||||
|
||||
module orchestrationResourceGroup 'modules/resourcegroup.bicep' = {
|
||||
name : orchestrationResourceGroupName
|
||||
scope: subscription()
|
||||
params: {
|
||||
environmentName: environment
|
||||
resourceGroupName: orchestrationResourceGroupName
|
||||
resourceGroupLocation: location
|
||||
}
|
||||
}
|
||||
|
||||
module orchestrationModule 'groups/orchestration.bicep' = {
|
||||
name: '${orchestrationModulePrefix}-module'
|
||||
scope: resourceGroup(orchestrationResourceGroup.name)
|
||||
params: {
|
||||
projectName: orchestrationModulePrefix
|
||||
location: location
|
||||
environmentCode: environmentCode
|
||||
environmentTag: environment
|
||||
logAnalyticsWorkspaceId: monitorModule.outputs.workspaceId
|
||||
mountAccountKey: dataModule.outputs.rawStoragePrimaryKey
|
||||
mountAccountName: dataModule.outputs.rawStorageAccountName
|
||||
mountFileUrl: '${dataModule.outputs.rawStorageFileEndpointUri}volume-a'
|
||||
pipelineResourceGroupName: pipelineResourceGroup.name
|
||||
pipelineLinkedSvcKeyVaultName: '${environmentCode}-${pipelineModulePrefix}-kv'
|
||||
synapseMIPrincipalId: pipelineModule.outputs.synapseMIPrincipalId
|
||||
}
|
||||
dependsOn: [
|
||||
pipelineModule
|
||||
networkModule
|
||||
monitorModule
|
||||
]
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param location string = resourceGroup().location
|
||||
param acrName string
|
||||
param acrSku string = 'Standard'
|
||||
param adminUserEnabled bool = true
|
||||
param publicNetworkAccess bool = true
|
||||
param zoneRedundancy bool = false
|
||||
resource containerRepoitory 'Microsoft.ContainerRegistry/registries@2021-12-01-preview' = {
|
||||
name: acrName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
sku: {
|
||||
name: acrSku
|
||||
}
|
||||
properties: {
|
||||
adminUserEnabled: adminUserEnabled
|
||||
publicNetworkAccess: (publicNetworkAccess) ? 'Enabled' : 'Disabled'
|
||||
zoneRedundancy: (zoneRedundancy) ? 'Enabled' : 'Disabled'
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param acrName string
|
||||
param keyVaultName string
|
||||
param containerRegistryLoginServerSecretName string = 'RegistryServer'
|
||||
param containerRegistryUsernameSecretName string = 'RegistryUserName'
|
||||
param containerRegistryPasswordSecretName string = 'RegistryPassword'
|
||||
param utcValue string = utcNow()
|
||||
|
||||
resource containerRepository 'Microsoft.ContainerRegistry/registries@2021-12-01-preview' existing = {
|
||||
name: acrName
|
||||
}
|
||||
|
||||
module acrLoginServerNameSecret './akv.secrets.bicep' = {
|
||||
name: 'acr-login-server-name-${utcValue}'
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: containerRegistryLoginServerSecretName
|
||||
secretValue: containerRepository.properties.loginServer
|
||||
}
|
||||
}
|
||||
|
||||
module acrUsernameSecret './akv.secrets.bicep' = {
|
||||
name: 'acr-username-${utcValue}'
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: containerRegistryUsernameSecretName
|
||||
secretValue: containerRepository.listCredentials().username
|
||||
}
|
||||
}
|
||||
|
||||
module acrPasswordSecret './akv.secrets.bicep' = {
|
||||
name: 'acr-password-${utcValue}'
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: containerRegistryPasswordSecretName
|
||||
secretValue: containerRepository.listCredentials().passwords[0].value
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param keyVaultName string
|
||||
param location string = resourceGroup().location
|
||||
param skuName string = 'Standard'
|
||||
param objIdForAccessPolicyPolicy string = ''
|
||||
param usage string = 'general'
|
||||
|
||||
param certPermission array = [
|
||||
'get'
|
||||
]
|
||||
param keyPermission array = [
|
||||
'get'
|
||||
]
|
||||
param secretPermission array = [
|
||||
'get'
|
||||
]
|
||||
param storagePermission array = [
|
||||
'get'
|
||||
]
|
||||
param usePublicIp bool = true
|
||||
param publicNetworkAccess bool = true
|
||||
param enabledForDeployment bool = true
|
||||
param enabledForDiskEncryption bool = true
|
||||
param enabledForTemplateDeployment bool = true
|
||||
param enablePurgeProtection bool = true
|
||||
param enableRbacAuthorization bool = false
|
||||
param enableSoftDelete bool = true
|
||||
param softDeleteRetentionInDays int = 7
|
||||
|
||||
resource akv 'Microsoft.KeyVault/vaults@2021-11-01-preview' = {
|
||||
name: keyVaultName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
usage: usage
|
||||
}
|
||||
properties: {
|
||||
accessPolicies: [
|
||||
{
|
||||
objectId: !empty(objIdForAccessPolicyPolicy)? objIdForAccessPolicyPolicy : '${reference(resourceGroup().id, '2021-04-01', 'Full').subscriptionId}'
|
||||
permissions: {
|
||||
certificates: certPermission
|
||||
keys: keyPermission
|
||||
secrets: secretPermission
|
||||
storage: storagePermission
|
||||
}
|
||||
tenantId: subscription().tenantId
|
||||
}
|
||||
]
|
||||
|
||||
enabledForDeployment: enabledForDeployment
|
||||
enabledForDiskEncryption: enabledForDiskEncryption
|
||||
enabledForTemplateDeployment: enabledForTemplateDeployment
|
||||
enablePurgeProtection: enablePurgeProtection
|
||||
enableRbacAuthorization: enableRbacAuthorization
|
||||
enableSoftDelete: enableSoftDelete
|
||||
networkAcls: {
|
||||
bypass: 'AzureServices'
|
||||
defaultAction: (usePublicIp) ? 'Allow' : 'Deny'
|
||||
ipRules: []
|
||||
virtualNetworkRules: []
|
||||
}
|
||||
publicNetworkAccess: (publicNetworkAccess) ? 'Enabled' : 'Disabled'
|
||||
sku: {
|
||||
family: 'A'
|
||||
name: skuName
|
||||
}
|
||||
softDeleteRetentionInDays: softDeleteRetentionInDays
|
||||
tenantId: subscription().tenantId
|
||||
}
|
||||
}
|
||||
|
||||
output vaultUri string = akv.properties.vaultUri
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param keyVaultName string
|
||||
param policyOps string
|
||||
param objIdForPolicy string = ''
|
||||
param certPermission array = [
|
||||
'Get'
|
||||
'List'
|
||||
'Update'
|
||||
'Create'
|
||||
'Import'
|
||||
'Delete'
|
||||
'Recover'
|
||||
'Backup'
|
||||
'Restore'
|
||||
'ManageContacts'
|
||||
'ManageIssuers'
|
||||
'GetIssuers'
|
||||
'ListIssuers'
|
||||
'SetIssuers'
|
||||
'DeleteIssuers'
|
||||
]
|
||||
param keyPermission array = [
|
||||
'Get'
|
||||
'List'
|
||||
'Update'
|
||||
'Create'
|
||||
'Import'
|
||||
'Delete'
|
||||
'Recover'
|
||||
'Backup'
|
||||
'Restore'
|
||||
]
|
||||
param secretPermission array = [
|
||||
'Get'
|
||||
'List'
|
||||
'Set'
|
||||
'Delete'
|
||||
'Recover'
|
||||
'Backup'
|
||||
'Restore'
|
||||
]
|
||||
|
||||
resource akv 'Microsoft.KeyVault/vaults@2021-11-01-preview' existing = {
|
||||
name: keyVaultName
|
||||
}
|
||||
|
||||
resource akvAccessPolicy 'Microsoft.KeyVault/vaults/accessPolicies@2021-11-01-preview' = {
|
||||
name: policyOps
|
||||
parent: akv
|
||||
properties: {
|
||||
accessPolicies: [
|
||||
{
|
||||
applicationId: null
|
||||
objectId: objIdForPolicy
|
||||
permissions: {
|
||||
certificates: certPermission
|
||||
keys: keyPermission
|
||||
secrets: secretPermission
|
||||
}
|
||||
tenantId: subscription().tenantId
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param keyVaultName string
|
||||
param secretName string
|
||||
param secretValue string
|
||||
|
||||
resource akv 'Microsoft.KeyVault/vaults@2021-11-01-preview' existing = {
|
||||
name: keyVaultName
|
||||
}
|
||||
|
||||
resource akvSecret 'Microsoft.KeyVault/vaults/secrets@2021-11-01-preview' = {
|
||||
name: secretName
|
||||
parent: akv
|
||||
properties: {
|
||||
value: secretValue
|
||||
}
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param applicationInsightsName string
|
||||
param location string
|
||||
param workspaceId string
|
||||
param appInsightsKind string = 'web'
|
||||
param appInsightsType string = 'web'
|
||||
|
||||
resource applicationInsights 'Microsoft.Insights/components@2020-02-02-preview' = {
|
||||
name: applicationInsightsName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
kind: appInsightsKind
|
||||
properties: {
|
||||
Application_Type: appInsightsType
|
||||
WorkspaceResourceId: workspaceId
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param batchAccountName string
|
||||
|
||||
param logs array
|
||||
param metrics array
|
||||
param storageAccountId string = ''
|
||||
param workspaceId string = ''
|
||||
param serviceBusId string = ''
|
||||
|
||||
param logAnalyticsDestinationType string = ''
|
||||
param eventHubAuthorizationRuleId string = ''
|
||||
param eventHubName string = ''
|
||||
|
||||
resource existingResource 'Microsoft.Batch/batchAccounts@2021-06-01' existing = {
|
||||
name: batchAccountName
|
||||
}
|
||||
|
||||
resource symbolicname 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = {
|
||||
name: '${existingResource.name}-diag'
|
||||
scope: existingResource
|
||||
properties: {
|
||||
eventHubAuthorizationRuleId: empty(eventHubAuthorizationRuleId) ? null : eventHubAuthorizationRuleId
|
||||
eventHubName: empty(eventHubName) ? null : eventHubName
|
||||
logAnalyticsDestinationType: empty(logAnalyticsDestinationType) ? null: logAnalyticsDestinationType
|
||||
logs: logs
|
||||
metrics: metrics
|
||||
serviceBusRuleId: empty(serviceBusId) ? null : serviceBusId
|
||||
storageAccountId: empty(storageAccountId) ? null : storageAccountId
|
||||
workspaceId: empty(workspaceId) ? null : workspaceId
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param location string = resourceGroup().location
|
||||
param environmentName string
|
||||
param batchAccountName string
|
||||
param userManagedIdentityId string
|
||||
param type string = 'batch'
|
||||
param allowedAuthenticationModes array = [
|
||||
'AAD'
|
||||
'SharedKey'
|
||||
'TaskAuthenticationToken'
|
||||
]
|
||||
param keyVaultName string = ''
|
||||
param autoStorageAuthenticationMode string = 'StorageKeys'
|
||||
param autoStorageAccountName string
|
||||
param poolAllocationMode string = 'BatchService'
|
||||
param publicNetworkAccess bool = true
|
||||
param assignRoleToUserManagedIdentity string = 'Owner'
|
||||
param userManagedIdentityPrincipalId string
|
||||
|
||||
param objIdForPolicy string = 'f520d84c-3fd3-4cc8-88d4-2ed25b00d27a'
|
||||
|
||||
|
||||
param certPermission array = []
|
||||
param keyPermission array = []
|
||||
param secretPermission array = [
|
||||
'Get'
|
||||
'List'
|
||||
'Set'
|
||||
'Delete'
|
||||
'Recover'
|
||||
]
|
||||
|
||||
var policyOpsVar = 'add'
|
||||
|
||||
resource keyVault 'Microsoft.KeyVault/vaults@2021-11-01-preview' existing = if( toLower(poolAllocationMode) == 'usersubscription' ) {
|
||||
name: keyVaultName
|
||||
|
||||
}
|
||||
|
||||
resource autoStorageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' existing = {
|
||||
name: autoStorageAccountName
|
||||
}
|
||||
|
||||
resource akvAccessPolicy 'Microsoft.KeyVault/vaults/accessPolicies@2021-11-01-preview' = {
|
||||
name: policyOpsVar
|
||||
parent: keyVault
|
||||
properties: {
|
||||
accessPolicies: [
|
||||
{
|
||||
applicationId: null
|
||||
objectId: objIdForPolicy
|
||||
permissions: {
|
||||
certificates: certPermission
|
||||
keys: keyPermission
|
||||
secrets: secretPermission
|
||||
}
|
||||
tenantId: subscription().tenantId
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
resource batchAccount 'Microsoft.Batch/batchAccounts@2021-06-01' = {
|
||||
name: batchAccountName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
type: type
|
||||
}
|
||||
identity: {
|
||||
type: 'UserAssigned'
|
||||
userAssignedIdentities : {
|
||||
'${userManagedIdentityId}': {}
|
||||
}
|
||||
}
|
||||
properties: {
|
||||
allowedAuthenticationModes: allowedAuthenticationModes
|
||||
autoStorage: toLower(poolAllocationMode) == 'usersubscription' ? null : {
|
||||
authenticationMode: autoStorageAuthenticationMode
|
||||
storageAccountId: autoStorageAccount.id
|
||||
}
|
||||
encryption: {
|
||||
keySource: 'Microsoft.Batch'
|
||||
}
|
||||
poolAllocationMode: poolAllocationMode
|
||||
publicNetworkAccess: (publicNetworkAccess) ? 'Enabled' : 'Disabled'
|
||||
keyVaultReference: toLower(poolAllocationMode) == 'usersubscription' ? {
|
||||
id: keyVault.id
|
||||
url: keyVault.properties.vaultUri
|
||||
} : null
|
||||
}
|
||||
dependsOn: [
|
||||
akvAccessPolicy
|
||||
]
|
||||
}
|
||||
|
||||
var role = {
|
||||
owner: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/8e3af657-a8ff-443c-a75c-2fe8c4bcb635'
|
||||
contributor: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c'
|
||||
reader: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/acdd72a7-3385-48ef-bd42-f606fba81ae7'
|
||||
}
|
||||
|
||||
resource assignRole 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = {
|
||||
name: guid(batchAccount.id, userManagedIdentityPrincipalId, role[toLower(assignRoleToUserManagedIdentity)])
|
||||
scope: batchAccount
|
||||
properties: {
|
||||
principalId: userManagedIdentityPrincipalId
|
||||
roleDefinitionId: role[toLower(assignRoleToUserManagedIdentity)]
|
||||
}
|
||||
}
|
||||
|
||||
output batchAccountId string = batchAccount.id
|
|
@ -0,0 +1,43 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param batchAccountName string
|
||||
param batchPoolName string
|
||||
param userManagedIdentityName string
|
||||
param userManagedIdentityResourcegroupName string
|
||||
param location string = resourceGroup().location
|
||||
param utcValue string = utcNow()
|
||||
|
||||
resource queryuserManagedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = {
|
||||
scope: resourceGroup(userManagedIdentityResourcegroupName)
|
||||
name: userManagedIdentityName
|
||||
}
|
||||
|
||||
resource runPowerShellInlineWithOutput 'Microsoft.Resources/deploymentScripts@2020-10-01' = {
|
||||
name: 'runPowerShellInlineWithOutput${utcValue}'
|
||||
location: location
|
||||
kind: 'AzurePowerShell'
|
||||
identity: {
|
||||
type: 'UserAssigned'
|
||||
userAssignedIdentities: {
|
||||
'${queryuserManagedIdentity.id}': {}
|
||||
}
|
||||
}
|
||||
properties: {
|
||||
forceUpdateTag: utcValue
|
||||
azPowerShellVersion: '6.4'
|
||||
scriptContent: '''
|
||||
param([string] $batchAccountName, [string] $batchPoolName)
|
||||
Write-Output $output
|
||||
$DeploymentScriptOutputs = @{}
|
||||
$batchContext = Get-AzBatchAccount -AccountName $batchAccountName
|
||||
$DeploymentScriptOutputs = Get-AzBatchPool -Id $batchPoolName -BatchContext $batchContext
|
||||
'''
|
||||
arguments: '-batchAccountName ${batchAccountName} -batchPoolName ${batchPoolName}'
|
||||
timeout: 'PT1H'
|
||||
cleanupPreference: 'OnSuccess'
|
||||
retentionInterval: 'P1D'
|
||||
}
|
||||
}
|
||||
|
||||
output batchPoolExists bool = contains(runPowerShellInlineWithOutput.properties, 'outputs')
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param batchAccountName string
|
||||
param batchAccountPoolName string
|
||||
param batchPoolExists bool = false
|
||||
param vmSize string
|
||||
param imageReferencePublisher string = 'microsoft-azure-batch'
|
||||
param imageReferenceOffer string
|
||||
param imageReferenceSku string
|
||||
param imageReferenceVersion string
|
||||
param containerImageNames array = []
|
||||
param containerRegistryPassword string = ''
|
||||
param containerRegistryServer string = ''
|
||||
param containerRegistryUsername string = ''
|
||||
param nodeAgentSkuId string = 'batch.node.ubuntu 20.04'
|
||||
param nodePlacementConfigurationPolicy string = ''
|
||||
param batchAccountPoolDisplayName string = batchAccountPoolName
|
||||
param interNodeCommunication bool = false
|
||||
param azureFileShareConfigurationAccountKey string = ''
|
||||
param azureFileShareConfigurationAccountName string = ''
|
||||
param azureFileShareConfigurationAzureFileUrl string = ''
|
||||
param azureFileShareConfigurationMountOptions string = ''
|
||||
param azureFileShareConfigurationRelativeMountPath string = ''
|
||||
param publicIPAddressConfigurationProvision string = ''
|
||||
param fixedScaleResizeTimeout string = 'PT15M'
|
||||
param fixedScaleTargetDedicatedNodes int = 1
|
||||
param fixedScaleTargetLowPriorityNodes int = 0
|
||||
param startTaskCommandLine string = ''
|
||||
param startTaskEnvironmentSettings array = []
|
||||
param startTaskMaxTaskRetryCount int = 0
|
||||
param startTaskAutoUserElevationLevel string = 'Admin'
|
||||
param startTaskautoUserScope string = 'Pool'
|
||||
param startTaskWaitForSuccess bool = true
|
||||
param taskSchedulingPolicy string = 'Pack'
|
||||
param taskSlotsPerNode int = 1
|
||||
|
||||
resource batchAccount 'Microsoft.Batch/batchAccounts@2021-06-01' existing = {
|
||||
name: batchAccountName
|
||||
}
|
||||
|
||||
resource batchAccountPool 'Microsoft.Batch/batchAccounts/pools@2021-06-01' = if (!batchPoolExists) {
|
||||
name: batchAccountPoolName
|
||||
parent: batchAccount
|
||||
properties: {
|
||||
vmSize: vmSize
|
||||
deploymentConfiguration: {
|
||||
virtualMachineConfiguration: {
|
||||
imageReference: {
|
||||
offer: imageReferenceOffer
|
||||
publisher: imageReferencePublisher
|
||||
sku: imageReferenceSku
|
||||
version: imageReferenceVersion
|
||||
}
|
||||
containerConfiguration: (empty(containerImageNames))? null: {
|
||||
containerImageNames: containerImageNames
|
||||
containerRegistries: [
|
||||
{
|
||||
password: containerRegistryPassword
|
||||
registryServer: containerRegistryServer
|
||||
username: containerRegistryUsername
|
||||
}
|
||||
]
|
||||
type: 'DockerCompatible'
|
||||
}
|
||||
nodeAgentSkuId: nodeAgentSkuId
|
||||
nodePlacementConfiguration: (empty(nodePlacementConfigurationPolicy))? {} : {
|
||||
policy: nodePlacementConfigurationPolicy
|
||||
}
|
||||
}
|
||||
}
|
||||
displayName: batchAccountPoolDisplayName
|
||||
interNodeCommunication: (interNodeCommunication) ? 'Enabled' : 'Disabled'
|
||||
|
||||
|
||||
mountConfiguration: (empty(azureFileShareConfigurationAccountName))? []: [
|
||||
{
|
||||
azureFileShareConfiguration: {
|
||||
accountKey: azureFileShareConfigurationAccountKey
|
||||
accountName: azureFileShareConfigurationAccountName
|
||||
azureFileUrl: azureFileShareConfigurationAzureFileUrl
|
||||
mountOptions: azureFileShareConfigurationMountOptions
|
||||
relativeMountPath: azureFileShareConfigurationRelativeMountPath
|
||||
}
|
||||
}
|
||||
]
|
||||
networkConfiguration: (empty(publicIPAddressConfigurationProvision))? {}: {
|
||||
publicIPAddressConfiguration: {
|
||||
provision: publicIPAddressConfigurationProvision
|
||||
}
|
||||
}
|
||||
scaleSettings: {
|
||||
fixedScale: {
|
||||
resizeTimeout: fixedScaleResizeTimeout
|
||||
targetDedicatedNodes: fixedScaleTargetDedicatedNodes
|
||||
targetLowPriorityNodes: fixedScaleTargetLowPriorityNodes
|
||||
}
|
||||
}
|
||||
startTask: (empty(startTaskCommandLine))? {}: {
|
||||
commandLine: startTaskCommandLine
|
||||
environmentSettings: startTaskEnvironmentSettings
|
||||
userIdentity: {
|
||||
autoUser: {
|
||||
elevationLevel: startTaskAutoUserElevationLevel
|
||||
scope: startTaskautoUserScope
|
||||
}
|
||||
}
|
||||
maxTaskRetryCount: startTaskMaxTaskRetryCount
|
||||
waitForSuccess: startTaskWaitForSuccess
|
||||
}
|
||||
taskSchedulingPolicy: {
|
||||
nodeFillType: taskSchedulingPolicy
|
||||
}
|
||||
taskSlotsPerNode: taskSlotsPerNode
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param principalId string
|
||||
param roleDefinitionId string
|
||||
|
||||
param resourceName string
|
||||
|
||||
param roleAssignmentId string = newGuid()
|
||||
|
||||
resource existingResource 'Microsoft.Batch/batchAccounts@2021-06-01' existing = {
|
||||
name: resourceName
|
||||
}
|
||||
|
||||
resource symbolicname 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = {
|
||||
name: roleAssignmentId
|
||||
scope: existingResource
|
||||
properties: {
|
||||
principalId: principalId
|
||||
roleDefinitionId: roleDefinitionId
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param batchAccoutName string
|
||||
param keyVaultName string
|
||||
param keyVaultResourceGroup string
|
||||
param secretNamePrefix string = 'Geospatial'
|
||||
param utcValue string = utcNow()
|
||||
|
||||
var batchAccountNameSecretNameVar = '${secretNamePrefix}BatchAccountKey'
|
||||
|
||||
resource batchAccount 'Microsoft.Batch/batchAccounts@2021-06-01' existing = {
|
||||
name: batchAccoutName
|
||||
}
|
||||
|
||||
module storageAccountNameSecret './akv.secrets.bicep' = {
|
||||
name: 'batch-account-key-${utcValue}'
|
||||
scope: resourceGroup(keyVaultResourceGroup)
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: batchAccountNameSecretNameVar
|
||||
secretValue: batchAccount.listKeys().primary
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param storageAccountName string
|
||||
param shareName string
|
||||
param accessTier string = 'TransactionOptimized'
|
||||
param enabledProtocols string = 'SMB'
|
||||
|
||||
|
||||
resource fileShare 'Microsoft.Storage/storageAccounts/fileServices/shares@2021-08-01' = {
|
||||
name: '${storageAccountName}/default/${shareName}'
|
||||
properties: {
|
||||
accessTier: accessTier
|
||||
enabledProtocols: enabledProtocols
|
||||
metadata: {}
|
||||
shareQuota: 5120
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param workspaceName string
|
||||
param location string
|
||||
param sku string = 'pergb2018'
|
||||
|
||||
resource workspace 'Microsoft.OperationalInsights/workspaces@2020-10-01' = {
|
||||
name: workspaceName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
properties: {
|
||||
sku: {
|
||||
name: sku
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output workspaceId string = workspace.id
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param location string = resourceGroup().location
|
||||
param uamiName string
|
||||
param environmentName string
|
||||
|
||||
resource uami 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = {
|
||||
name: uamiName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
}
|
||||
|
||||
output uamiId string = uami.id
|
||||
output uamiPrincipalId string = uami.properties.principalId
|
||||
output uamiClientId string = uami.properties.clientId
|
||||
output uamiTenantId string = uami.properties.tenantId
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
targetScope='subscription'
|
||||
|
||||
param environmentName string
|
||||
param resourceGroupName string
|
||||
param resourceGroupLocation string
|
||||
|
||||
resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = {
|
||||
name: resourceGroupName
|
||||
location: resourceGroupLocation
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
}
|
||||
|
||||
output rgLocation string = rg.location
|
||||
output rgId string = rg.id
|
|
@ -0,0 +1,22 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param principalId string
|
||||
param roleDefinitionId string
|
||||
|
||||
param resourceName string
|
||||
|
||||
param roleAssignmentId string = newGuid()
|
||||
|
||||
resource existingResource 'Microsoft.Storage/storageAccounts@2021-08-01' existing = {
|
||||
name: resourceName
|
||||
}
|
||||
|
||||
resource symbolicname 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = {
|
||||
name: roleAssignmentId
|
||||
scope: existingResource
|
||||
properties: {
|
||||
principalId: principalId
|
||||
roleDefinitionId: roleDefinitionId
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param storageAccountName string
|
||||
param location string = resourceGroup().location
|
||||
param environmentName string
|
||||
param storeType string = 'data'
|
||||
param storageSku string = 'Standard_GRS'
|
||||
param storageKind string = 'StorageV2'
|
||||
param public_access string = 'Enabled'
|
||||
param isHnsEnabled bool = false
|
||||
|
||||
resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = {
|
||||
name: storageAccountName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
store: storeType
|
||||
}
|
||||
sku: {
|
||||
name: storageSku
|
||||
}
|
||||
kind: storageKind
|
||||
properties:{
|
||||
isHnsEnabled: isHnsEnabled
|
||||
accessTier: 'Hot'
|
||||
publicNetworkAccess: public_access
|
||||
networkAcls: {
|
||||
resourceAccessRules: []
|
||||
bypass: 'AzureServices'
|
||||
virtualNetworkRules: []
|
||||
ipRules: []
|
||||
defaultAction: ((public_access == 'Enabled')) ? 'Allow' : 'Deny'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output storageAccountId string = storageAccount.id
|
||||
output fileEndpointUri string = storageAccount.properties.primaryEndpoints.file
|
||||
output primaryKey string = storageAccount.listKeys().keys[0].value
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param storageAccountName string
|
||||
param containerName string
|
||||
param containerPublicAccess string = 'None'
|
||||
param containerDeleteRetentionPolicyEnabled bool = true
|
||||
param containerDeleteRetentionPolicyDays int = 7
|
||||
param deleteRetentionPolicyEnabled bool = true
|
||||
param deleteRetentionPolicyDays int = 7
|
||||
param isVersioningEnabled bool = false
|
||||
|
||||
|
||||
resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' existing = {
|
||||
name: storageAccountName
|
||||
}
|
||||
|
||||
resource storageAccountService 'Microsoft.Storage/storageAccounts/blobServices@2021-08-01' = {
|
||||
name: 'default'
|
||||
parent: storageAccount
|
||||
properties: {
|
||||
containerDeleteRetentionPolicy: {
|
||||
days: containerDeleteRetentionPolicyDays
|
||||
enabled: containerDeleteRetentionPolicyEnabled
|
||||
}
|
||||
deleteRetentionPolicy: {
|
||||
days: deleteRetentionPolicyDays
|
||||
enabled: deleteRetentionPolicyEnabled
|
||||
}
|
||||
isVersioningEnabled: isVersioningEnabled
|
||||
}
|
||||
}
|
||||
|
||||
resource stgAcctBlobSvcsContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = {
|
||||
name: containerName
|
||||
parent: storageAccountService
|
||||
properties: {
|
||||
publicAccess: containerPublicAccess
|
||||
}
|
||||
}
|
||||
|
||||
output containerId string = stgAcctBlobSvcsContainer.id
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param storageAccountName string
|
||||
param keyVaultName string
|
||||
param keyVaultResourceGroup string
|
||||
param secretNamePrefix string = 'Geospatial'
|
||||
param utcValue string = utcNow()
|
||||
|
||||
var storageAccountKeySecretNameVar = '${secretNamePrefix}StorageAccountKey'
|
||||
|
||||
resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' existing = {
|
||||
name: storageAccountName
|
||||
}
|
||||
|
||||
module storageAccountKeySecret './akv.secrets.bicep' = {
|
||||
name: '${toLower(secretNamePrefix)}-storage-account-key-${utcValue}'
|
||||
scope: resourceGroup(keyVaultResourceGroup)
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: storageAccountKeySecretNameVar
|
||||
secretValue: storageAccount.listKeys().keys[0].value
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param storageAccountName string
|
||||
param location string = resourceGroup().location
|
||||
param environmentName string
|
||||
param storeType string = 'data'
|
||||
param storageSku string = 'Standard_GRS'
|
||||
param storageKind string = 'StorageV2'
|
||||
param public_access string = 'Enabled'
|
||||
|
||||
|
||||
module hnsEnabledStorageAccount 'storage.bicep' = {
|
||||
name: '${storageAccountName}-hns'
|
||||
params: {
|
||||
storageAccountName: storageAccountName
|
||||
location: location
|
||||
environmentName: environmentName
|
||||
storageSku: storageSku
|
||||
storageKind: storageKind
|
||||
isHnsEnabled: true
|
||||
public_access: public_access
|
||||
storeType: storeType
|
||||
}
|
||||
}
|
||||
|
||||
output storageAccountId string = hnsEnabledStorageAccount.outputs.storageAccountId
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param vNetName string
|
||||
param subnetName string
|
||||
param subnetAddressPrefix string
|
||||
param serviceEndPoints array = []
|
||||
|
||||
|
||||
//Subnet with RT and NSG
|
||||
resource subnet 'Microsoft.Network/virtualNetworks/subnets@2020-06-01' = {
|
||||
name: '${vNetName}/${subnetName}'
|
||||
properties: {
|
||||
addressPrefix: subnetAddressPrefix
|
||||
serviceEndpoints: serviceEndPoints
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param synapseWorkspaceName string
|
||||
|
||||
param logs array
|
||||
param storageAccountId string = ''
|
||||
param workspaceId string = ''
|
||||
param serviceBusId string = ''
|
||||
|
||||
param logAnalyticsDestinationType string = ''
|
||||
param eventHubAuthorizationRuleId string = ''
|
||||
param eventHubName string = ''
|
||||
|
||||
resource existingResource 'Microsoft.Synapse/workspaces@2021-06-01' existing = {
|
||||
name: synapseWorkspaceName
|
||||
}
|
||||
|
||||
resource symbolicname 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = {
|
||||
name: '${existingResource.name}-diag'
|
||||
scope: existingResource
|
||||
properties: {
|
||||
eventHubAuthorizationRuleId: empty(eventHubAuthorizationRuleId) ? null : eventHubAuthorizationRuleId
|
||||
eventHubName: empty(eventHubName) ? null : eventHubName
|
||||
logAnalyticsDestinationType: empty(logAnalyticsDestinationType) ? null: logAnalyticsDestinationType
|
||||
logs: logs
|
||||
metrics: []
|
||||
serviceBusRuleId: empty(serviceBusId) ? null : serviceBusId
|
||||
storageAccountId: empty(storageAccountId) ? null : storageAccountId
|
||||
workspaceId: empty(workspaceId) ? null : workspaceId
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param location string = resourceGroup().location
|
||||
param synapseWorkspaceName string
|
||||
param sparkPoolName string
|
||||
param autoPauseEnabled bool = true
|
||||
param autoPauseDelayInMinutes int = 15
|
||||
param autoScaleEnabled bool = true
|
||||
param autoScaleMinNodeCount int = 1
|
||||
param autoScaleMaxNodeCount int = 5
|
||||
param cacheSize int = 0
|
||||
param dynamicExecutorAllocationEnabled bool = false
|
||||
param isComputeIsolationEnabled bool = false
|
||||
param nodeCount int = 0
|
||||
param nodeSize string
|
||||
param nodeSizeFamily string
|
||||
param sparkVersion string = '3.1'
|
||||
param poolId string = 'default'
|
||||
|
||||
|
||||
resource synapseWorspace 'Microsoft.Synapse/workspaces@2021-06-01' existing = {
|
||||
name: synapseWorkspaceName
|
||||
}
|
||||
|
||||
resource synapseSparkPool 'Microsoft.Synapse/workspaces/bigDataPools@2021-06-01' = {
|
||||
name: sparkPoolName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
poolId: poolId
|
||||
}
|
||||
parent: synapseWorspace
|
||||
properties: {
|
||||
autoPause: {
|
||||
delayInMinutes: autoPauseDelayInMinutes
|
||||
enabled: autoPauseEnabled
|
||||
}
|
||||
autoScale: {
|
||||
enabled: autoScaleEnabled
|
||||
maxNodeCount: autoScaleMaxNodeCount
|
||||
minNodeCount: autoScaleMinNodeCount
|
||||
}
|
||||
cacheSize: cacheSize
|
||||
dynamicExecutorAllocation: {
|
||||
enabled: dynamicExecutorAllocationEnabled
|
||||
}
|
||||
isComputeIsolationEnabled: isComputeIsolationEnabled
|
||||
nodeCount: nodeCount
|
||||
nodeSize: nodeSize
|
||||
nodeSizeFamily: nodeSizeFamily
|
||||
sparkVersion: sparkVersion
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
param environmentName string
|
||||
param location string = resourceGroup().location
|
||||
param synapseWorkspaceName string
|
||||
param hnsStorageAccountName string
|
||||
param hnsStorageFileSystem string = 'users'
|
||||
param sqlAdminLogin string = 'sqladmin'
|
||||
param sqlAdminLoginPassword string
|
||||
param firewallAllowEndIP string = '255.255.255.255'
|
||||
param firewallAllowStartIP string = '0.0.0.0'
|
||||
|
||||
param gitRepoAccountName string = ''
|
||||
param gitRepoCollaborationBranch string = 'main'
|
||||
param gitRepoHostName string = ''
|
||||
param gitRepoLastCommitId string = ''
|
||||
param gitRepoVstsProjectName string = ''
|
||||
param gitRepoRepositoryName string = ''
|
||||
param gitRepoRootFolder string = '.'
|
||||
param gitRepoVstsTenantId string = subscription().tenantId
|
||||
param gitRepoType string = ''
|
||||
|
||||
param keyVaultName string = ''
|
||||
param synapseSqlAdminPasswordSecretName string = 'synapse-sqladmin-password'
|
||||
param utcValue string = utcNow()
|
||||
param workspaceId string = 'default'
|
||||
|
||||
resource hnsStorage 'Microsoft.Storage/storageAccounts@2021-08-01' existing = {
|
||||
name: hnsStorageAccountName
|
||||
}
|
||||
|
||||
resource synapseWorspace 'Microsoft.Synapse/workspaces@2021-06-01' = {
|
||||
name: synapseWorkspaceName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
workspaceId: workspaceId
|
||||
}
|
||||
identity: {
|
||||
type: 'SystemAssigned'
|
||||
}
|
||||
properties: {
|
||||
defaultDataLakeStorage: {
|
||||
resourceId: hnsStorage.id
|
||||
accountUrl: hnsStorage.properties.primaryEndpoints.dfs
|
||||
filesystem: hnsStorageFileSystem
|
||||
}
|
||||
sqlAdministratorLogin: sqlAdminLogin
|
||||
sqlAdministratorLoginPassword: sqlAdminLoginPassword
|
||||
workspaceRepositoryConfiguration:(empty(gitRepoType))? {}: {
|
||||
accountName: gitRepoAccountName
|
||||
collaborationBranch: gitRepoCollaborationBranch
|
||||
hostName: gitRepoHostName
|
||||
lastCommitId: gitRepoLastCommitId
|
||||
projectName: gitRepoVstsProjectName
|
||||
repositoryName: gitRepoRepositoryName
|
||||
rootFolder: gitRepoRootFolder
|
||||
tenantId: gitRepoVstsTenantId
|
||||
type: gitRepoType
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource synapseWorkspaceFwRules 'Microsoft.Synapse/workspaces/firewallRules@2021-06-01' = {
|
||||
name: 'allowAll'
|
||||
parent: synapseWorspace
|
||||
properties: {
|
||||
endIpAddress: firewallAllowEndIP
|
||||
startIpAddress: firewallAllowStartIP
|
||||
}
|
||||
}
|
||||
|
||||
module synapseSqlAdminPasswordSecret './akv.secrets.bicep' = if (!empty(keyVaultName)) {
|
||||
name: 'synapse-sqladmin-password-${utcValue}'
|
||||
params: {
|
||||
environmentName: environmentName
|
||||
keyVaultName: keyVaultName
|
||||
secretName: synapseSqlAdminPasswordSecretName
|
||||
secretValue: sqlAdminLoginPassword
|
||||
}
|
||||
}
|
||||
|
||||
output synapseMIPrincipalId string = synapseWorspace.identity.principalId
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
// Name of the VNET.
|
||||
param virtualNetworkName string
|
||||
param location string = resourceGroup().location
|
||||
param environmentName string
|
||||
param addressPrefix string
|
||||
|
||||
resource vnet 'Microsoft.Network/virtualNetworks@2020-06-01' = {
|
||||
name: virtualNetworkName
|
||||
location: location
|
||||
tags: {
|
||||
environment: environmentName
|
||||
}
|
||||
properties: {
|
||||
addressSpace: {
|
||||
addressPrefixes: [
|
||||
addressPrefix
|
||||
]
|
||||
}
|
||||
subnets: [
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
set -x
|
||||
|
||||
if [[ -z "$1" ]]
|
||||
then
|
||||
echo "Environment Code value not supplied"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$2" ]]
|
||||
then
|
||||
echo "Location value not supplied"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Setup parameters
|
||||
envCode=${envCode:-"${1}"}
|
||||
location=${location:-"${2}"}
|
||||
envTag=${envTag:-"synapse-${envCode}"}
|
||||
deploymentName=${3:-"${envTag}-deploy"}
|
||||
|
||||
DEPLOYMENT_SCRIPT="az deployment sub create -l $location -n $deploymentName \
|
||||
-f ./deploy/infra/main.bicep \
|
||||
-p \
|
||||
location=$location \
|
||||
environmentCode=$envCode \
|
||||
environment=$envTag"
|
||||
$DEPLOYMENT_SCRIPT
|
||||
set +x
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
import shutil
|
||||
|
||||
|
||||
# Collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run packaging function')
|
||||
parser.add_argument('--raw_storage_account_name', type=str, required=True, help='Name of the Raw data hosting Storage Account')
|
||||
parser.add_argument('--synapse_storage_account_name', type=str, required=True, help='Name of the Raw data hosting Storage Account')
|
||||
parser.add_argument('--synapse_pool_name', type=str, required=True, help='Name of the Synapse pool in the Synapse workspace to use as default')
|
||||
parser.add_argument('--batch_storage_account_name', type=str, required=True, help='Name of the Batch Storage Account')
|
||||
parser.add_argument('--batch_account', type=str, required=True, help="Batch Account name")
|
||||
parser.add_argument('--linked_key_vault', type=str, required=True, help="Key Vault to be added as Linked Service")
|
||||
parser.add_argument('--location', type=str, required=True, help="Batch Account Location")
|
||||
|
||||
#Parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def replace(tokens_map: dict, body: str):
|
||||
|
||||
# use regex to identify tokens in the files. Token are in the format __token_name__
|
||||
# same token can occur multiple times in the same file
|
||||
tokenizer = re.compile("([\w\'\-]+|\s+|.?)")
|
||||
|
||||
# replace tokens with actual values
|
||||
swap = lambda x: '{0}'.format(tokens_map.get(x)) if x in tokens_map else x
|
||||
|
||||
# find all and replace
|
||||
result = ''.join(swap(st) for st in tokenizer.findall(body))
|
||||
|
||||
return result
|
||||
|
||||
def package(tokens_map: dict):
|
||||
|
||||
script_dirname = os.path.dirname(__file__)
|
||||
src_folder_path = os.path.join(script_dirname, '..', 'src', 'workflow')
|
||||
package_folder_path= os.path.join(os.getcwd(), 'package')
|
||||
|
||||
# mode
|
||||
mode = 0o766
|
||||
|
||||
# if package folder already exists, delete it before starting a new iteration
|
||||
if os.path.exists(package_folder_path):
|
||||
shutil.rmtree(package_folder_path)
|
||||
|
||||
# copy the folder structure from src/workflow folder before replacing the
|
||||
# tokens with values
|
||||
shutil.copytree(src_folder_path, package_folder_path)
|
||||
|
||||
# set of folder names are fixed for synapse pipelines and hence hardcoding them
|
||||
for folder in ['linkedService', 'sparkJobDefinition', 'pipeline', 'bigDataPool']:
|
||||
|
||||
# iterate through all file
|
||||
for file in os.listdir(f'{package_folder_path}/{folder}'):
|
||||
|
||||
# check whether file is in json format or not
|
||||
if file.endswith(".json"):
|
||||
|
||||
file_path = os.path.join(package_folder_path, folder ,file)
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
|
||||
# replaced token string in memory
|
||||
token_replaced_file_content = replace(tokens_map, f.read())
|
||||
|
||||
with open(file_path, 'w') as file_write:
|
||||
|
||||
if token_replaced_file_content is not None:
|
||||
|
||||
# write back the token replaced string to file
|
||||
file_write.write(token_replaced_file_content)
|
||||
|
||||
# zip the folder contents to package.zip
|
||||
shutil.make_archive('package', 'zip', package_folder_path)
|
||||
|
||||
# finally clean up the package folder
|
||||
if os.path.exists(package_folder_path):
|
||||
shutil.rmtree(package_folder_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# list of tokens and their values to be replaced
|
||||
tokens_map = {
|
||||
'__raw_data_storage_account__': args.raw_storage_account_name,
|
||||
'__batch_storage_account__': args.batch_storage_account_name,
|
||||
'__batch_account__': args.batch_account,
|
||||
'__linked_key_vault__': args.linked_key_vault,
|
||||
'__synapse_storage_account__': args.synapse_storage_account_name,
|
||||
'__synapse_pool_name__': args.synapse_pool_name,
|
||||
'__location__': args.location
|
||||
}
|
||||
|
||||
# invoke package method
|
||||
package(tokens_map)
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
PRJ_ROOT="$(cd `dirname "${BASH_SOURCE}"`/..; pwd)"
|
||||
set -ex
|
||||
|
||||
echo 'Retrieving resources from Azure ...'
|
||||
RAW_STORAGE_ACCT=$(az storage account list --query "[?tags.store && tags.store == 'raw'].name" -o tsv -g $1-data-rg)
|
||||
SYNAPSE_STORAGE_ACCT=$(az storage account list --query "[?tags.store && tags.store == 'synapse'].name" -o tsv -g $1-pipeline-rg)
|
||||
BATCH_STORAGE_ACCT=$(az storage account list --query "[?tags.store && tags.store == 'batch'].name" -o tsv -g $1-orc-rg)
|
||||
BATCH_ACCT=$(az batch account list --query "[?tags.type && tags.type == 'batch'].name" -o tsv -g $1-orc-rg)
|
||||
BATCH_ACCT_LOCATION=$(az batch account list --query "[?tags.type && tags.type == 'batch'].location" -o tsv -g $1-orc-rg)
|
||||
KEY_VAULT=$(az keyvault list --query "[?tags.usage && tags.usage == 'linkedService'].name" -o tsv -g $1-pipeline-rg)
|
||||
SYNAPSE_WORKSPACE=$(az synapse workspace list --query "[?tags.workspaceId && tags.workspaceId == 'default'].name" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_WORKSPACE
|
||||
SYNAPSE_WORKSPACE_RG=$(az synapse workspace list --query "[?tags.workspaceId && tags.workspaceId == 'default'].resourceGroup" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_WORKSPACE_RG
|
||||
SYNAPSE_POOL=$(az synapse spark pool list --workspace-name $SYNAPSE_WORKSPACE --resource-group $SYNAPSE_WORKSPACE_RG --query "[?tags.poolId && tags.poolId == 'default'].name" -o tsv -g $1-pipeline-rg)
|
||||
echo $SYNAPSE_POOL
|
||||
|
||||
echo 'Retrieved resource from Azure and ready to package'
|
||||
PACKAGING_SCRIPT="python3 ${PRJ_ROOT}/deploy/package.py --raw_storage_account_name $RAW_STORAGE_ACCT \
|
||||
--synapse_storage_account_name $SYNAPSE_STORAGE_ACCT \
|
||||
--batch_storage_account_name $BATCH_STORAGE_ACCT \
|
||||
--batch_account $BATCH_ACCT \
|
||||
--linked_key_vault $KEY_VAULT \
|
||||
--synapse_pool_name $SYNAPSE_POOL \
|
||||
--location $BATCH_ACCT_LOCATION"
|
||||
|
||||
echo $PACKAGING_SCRIPT
|
||||
set -x
|
||||
|
||||
echo 'Starting packaging script ...'
|
||||
$PACKAGING_SCRIPT
|
||||
|
||||
echo 'Packaging script completed'
|
|
@ -0,0 +1,92 @@
|
|||
# Custom Vision Model
|
||||
|
||||
In this Sample solution and the custom vision model implementation, we take input as WGS84 GeoTiff and transform the image using (optionally) Mosaic, Crop, convert to PNG and create chips of this image. These chipped images are passed to custom vision model as an input along with the [specification document](./specs/custom_vision_object_detection.json). This CV model provides an output as json files providing the details of the objects identified
|
||||
|
||||
## What does this model do?
|
||||
|
||||
This model detects swimming pools in a given Area of Interest.
|
||||
|
||||
## What are the inputs and outputs?
|
||||
|
||||
A number of small images in PNG format of size 512 x 512 (or 1024 x 1024) can be passed to the model as input. The input CRS is WGS84 with data from moderate to high resolution image source.
|
||||
|
||||
The output contains a number of files that are stored in sub-folders of three file types:
|
||||
|
||||
* Images in PNG format, the same as the input file unmodified by the AI Model.
|
||||
* GeoJson files that contain the image coordinates for a specific PNG tile (512 x 512 or 1024 x 1024 image).
|
||||
* XML file that holds the geolocation / reference information in latitude & longitude.
|
||||
|
||||
## Are additional transformations / processing of output required?
|
||||
|
||||
Yes, the output contains the pool location in the image coordinates that needs to be converted into a geolocation. A transformation named `pool-geolocation` is used to perform the final conversion from image coordinates to geolocation.
|
||||
|
||||
## Transforms
|
||||
|
||||
The following transforms are used in this sample solution. Some of these transformations are AI model specific and some are data source specific.
|
||||
|
||||
* Mosaic - stitch multiple geotiff files into one single geotiff file.
|
||||
* Crop - crop the geotiff to the Area of Interest represented as polygon.
|
||||
* Convert to PNG - convert the geotiff to PNG file format.
|
||||
* Chipping - cuts the large PNG file into multiple smaller PNG files (512 x 512 or 1024 x 1024).
|
||||
|
||||
# Custom Vision Container Overview
|
||||
|
||||
### Overview
|
||||
This container allows users to pass images through General (Compact) Domain Azure Custom Vision Object Detection model and retrieve the predictions from that model. Users can Train the Model using Custom Vision and then export the trained model as a container from Custom Vision Portal and place the contents of app folder inside src folder.
|
||||
|
||||
This containers runs in an offline manner and does not require communication with custom vision service. Inference is done within the container and specifically, two outputs are returned to user :
|
||||
|
||||
|
||||
* JSON - One JSON is returned for every image passed through the Custom Vision model. This JSON contains <b>all</b> model detections for the given image. For example, an image passed through a pool detection Custom Vision model will return the following JSON with one detection:
|
||||
```json
|
||||
{
|
||||
"id": "e8526e8a-6e9a-433f-9ff2-0820f18ffc9a",
|
||||
"project": "c3c8d02c-e05c-49ea-9a87-fb85975233a9",
|
||||
"iteration": "cb0011d3-9e9b-4d1e-abf2-4fe51b588520",
|
||||
"created": "2021-03-19T01:39:39.675Z",
|
||||
"predictions": [
|
||||
{
|
||||
"probability": 0.9973912,
|
||||
"tagId": "00005547-553e-4058-a5a2-cafe7e5c822d",
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.9580524,
|
||||
"top": 0.7763942,
|
||||
"width": 0.02493298,
|
||||
"height": 0.035517573
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
* Image - A new image is also stored in a directory with all detections (that are above a user specified probability threshold) highlighted as in this example:
|
||||
|
||||
![Pool Detect Example](./container/examples/out/img/test2.png "Pool Detection Example")
|
||||
|
||||
### Model Configuration
|
||||
Model selection and specifications are defined in the`/app/data/config.json` file. Within this file users can define:
|
||||
|
||||
* prob_cutoff - The threshold a detection must meet in order for its bounding box to be drawn on the output image (note all detections will still be included in the JSON regardless of their probability).
|
||||
* tag_type - The type of tag (given that custom vision models can be trained on multiple tags) that should be highlighted in the output image. This should be one of the entries found in `labels.txt` under `src` folder.
|
||||
* bbox_color - The color of the bounding boxes around detections in the output image (default is red)
|
||||
* bbox_width - The width of the bounding boxes around detections in the output image (default is 1)
|
||||
|
||||
|
||||
### Docker Build & Run
|
||||
In order to build and run the container use the included `build.sh` and `run.sh` files. Within the run.sh:
|
||||
|
||||
- Input files are provided at container runtime by mounting the local folder with images in the container’s `/app/data/in` directory.
|
||||
- The user's local config file is mounted in the container at `/app/data/config.json`
|
||||
- Output files are written to the container's `/app/data/out` which can be mounted to a local output folder
|
||||
|
||||
# Attributions And Disclaimers
|
||||
- Images located in [examples](./container/examples/in/) folder used for testing the model are attributed to NAIP Imagery available via [Planetary Computer](https://planetarycomputer.microsoft.com/explore?c=-117.0520%2C32.8127&z=18.49&r=Natural+color&d=naip&m=Most+recent+available) They are covered under [USDA](https://ngda-imagery-geoplatform.hub.arcgis.com)
|
||||
|
||||
- Following files located in the `container/src` folder are exported from Azure Custom Vision Portal and included as is to demonstrate use of Custom Vision Model with Synapse Pipeline. Users can use this container as wrapper for their own exports of models from Azure Custom Vision in similar manner.
|
||||
* `app.py`
|
||||
* `object_detection.py`
|
||||
* `predict.py`
|
||||
* `labels.txt`
|
||||
* `model.pb (this includes model weights)`
|
||||
* `metadata_properties.json`
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"prob_cutoff": 0.25,
|
||||
"tag_type": "pool",
|
||||
"bbox_color":"red",
|
||||
"bbox_width":1,
|
||||
"json": true
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
FROM python:3.7-slim
|
||||
|
||||
ENV APP_INPUT_DIR="data/in"
|
||||
ENV APP_OUTPUT_DIR="data/out"
|
||||
ENV APP_CONFIG_DIR="data/config.json"
|
||||
RUN mkdir -p $APP_INPUT_DIR $APP_OUTPUT_DIR
|
||||
|
||||
COPY src/requirements.txt ./requirements.txt
|
||||
|
||||
RUN pip install -U pip
|
||||
RUN cat requirements.txt | xargs -n 1 -L 1 pip install --no-cache-dir
|
||||
|
||||
COPY src/ ./
|
||||
|
||||
CMD python ./custom_vision.py
|
|
@ -0,0 +1,2 @@
|
|||
#!/bin/bash
|
||||
docker build -t custom_vision_offline .
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"prob_cutoff": 0.25,
|
||||
"tag_type": "pool",
|
||||
"bbox_color":"red",
|
||||
"bbox_width":1,
|
||||
"json": true
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"prob_cutoff": 0.25,
|
||||
"tag_type": "pool",
|
||||
"bbox_color":"red",
|
||||
"bbox_width":1,
|
||||
"json": true
|
||||
}
|
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/in/test1.png
Normal file
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/in/test1.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 486 KiB |
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/in/test2.png
Normal file
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/in/test2.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 524 KiB |
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/out/img/test1.png
Normal file
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/out/img/test1.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 496 KiB |
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/out/img/test2.png
Normal file
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/examples/out/img/test2.png
Normal file
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 525 KiB |
|
@ -0,0 +1,107 @@
|
|||
{
|
||||
"id": "",
|
||||
"project": "",
|
||||
"iteration": "",
|
||||
"created": "2022-03-14T23:54:33.916392",
|
||||
"predictions": [
|
||||
{
|
||||
"probability": 0.61881453,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.19230918,
|
||||
"top": 0.94991173,
|
||||
"width": 0.04589038,
|
||||
"height": 0.03576214
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.60209858,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.20987549,
|
||||
"top": 0.10457236,
|
||||
"width": 0.05345676,
|
||||
"height": 0.03343743
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.57750881,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.40422668,
|
||||
"top": 0.10402005,
|
||||
"width": 0.04818549,
|
||||
"height": 0.03059276
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.57598472,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.88052545,
|
||||
"top": 0.87105907,
|
||||
"width": 0.03775809,
|
||||
"height": 0.03882225
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.51797944,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.66309578,
|
||||
"top": 0.53548619,
|
||||
"width": 0.04114099,
|
||||
"height": 0.0347527
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.44672883,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.96371614,
|
||||
"top": 0.24967665,
|
||||
"width": 0.03051094,
|
||||
"height": 0.03393322
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.4148635,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.71822231,
|
||||
"top": 0.61416072,
|
||||
"width": 0.03197394,
|
||||
"height": 0.04014191
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.32669157,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.6975608,
|
||||
"top": 0.08355029,
|
||||
"width": 0.04166068,
|
||||
"height": 0.02810757
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.14432584,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.15353942,
|
||||
"top": 0.8245216,
|
||||
"width": 0.03279916,
|
||||
"height": 0.03068573
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
{
|
||||
"id": "",
|
||||
"project": "",
|
||||
"iteration": "",
|
||||
"created": "2022-03-14T23:54:34.217113",
|
||||
"predictions": [
|
||||
{
|
||||
"probability": 0.68227804,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.86942675,
|
||||
"top": 0.25047198,
|
||||
"width": 0.0299696,
|
||||
"height": 0.05709938
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.67781532,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.53397762,
|
||||
"top": 0.40376284,
|
||||
"width": 0.03537256,
|
||||
"height": 0.03403089
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.64626825,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.55155444,
|
||||
"top": 0.17739939,
|
||||
"width": 0.04233379,
|
||||
"height": 0.03724356
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.59528136,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.2945051,
|
||||
"top": 0.01148837,
|
||||
"width": 0.04693972,
|
||||
"height": 0.04848984
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.58577621,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.38650493,
|
||||
"top": 0.78441454,
|
||||
"width": 0.0435329,
|
||||
"height": 0.03155852
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.49681535,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.73940132,
|
||||
"top": 0.91627208,
|
||||
"width": 0.0488568,
|
||||
"height": 0.05211842
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.48387742,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.23648841,
|
||||
"top": 0.90758475,
|
||||
"width": 0.04203785,
|
||||
"height": 0.02623344
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.48253059,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.82436836,
|
||||
"top": 0.14898244,
|
||||
"width": 0.05127063,
|
||||
"height": 0.04430568
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.47221375,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.09281935,
|
||||
"top": 0.15040175,
|
||||
"width": 0.03197275,
|
||||
"height": 0.04334019
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.43596581,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.90928583,
|
||||
"top": 0.67520988,
|
||||
"width": 0.03096134,
|
||||
"height": 0.03581977
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.42975113,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.260298,
|
||||
"top": 0.57769715,
|
||||
"width": 0.0320904,
|
||||
"height": 0.04634295
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.41194016,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.71912464,
|
||||
"top": 0.91361449,
|
||||
"width": 0.04556774,
|
||||
"height": 0.05428081
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.39925119,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": -0.0031015,
|
||||
"top": 0.16078551,
|
||||
"width": 0.04404637,
|
||||
"height": 0.03060472
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.39371312,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.20343139,
|
||||
"top": 0.59237513,
|
||||
"width": 0.0303582,
|
||||
"height": 0.03504668
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.35623482,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.29452763,
|
||||
"top": 0.26536847,
|
||||
"width": 0.03910565,
|
||||
"height": 0.04812963
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.34556678,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.85841707,
|
||||
"top": 0.15258495,
|
||||
"width": 0.05957669,
|
||||
"height": 0.04066875
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.342978,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.53593288,
|
||||
"top": 0.80812226,
|
||||
"width": 0.03933229,
|
||||
"height": 0.04270116
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.29204515,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.2204608,
|
||||
"top": 0.80588384,
|
||||
"width": 0.02992057,
|
||||
"height": 0.0346998
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.27062061,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.38880507,
|
||||
"top": 0.690668,
|
||||
"width": 0.04562093,
|
||||
"height": 0.03153645
|
||||
}
|
||||
},
|
||||
{
|
||||
"probability": 0.25728515,
|
||||
"tagId": 0,
|
||||
"tagName": "pool",
|
||||
"boundingBox": {
|
||||
"left": 0.95600736,
|
||||
"top": 0.44882039,
|
||||
"width": 0.03758581,
|
||||
"height": 0.03808294
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
|
||||
base=$(pwd)
|
||||
|
||||
docker run -u 1000:1000 \
|
||||
-v "$base/examples/in/:/data/in" \
|
||||
-v "$base/examples/out/:/data/out" \
|
||||
-v "$base/config.mine.json:/data/config.json" \
|
||||
custom_vision_offline
|
|
@ -0,0 +1,78 @@
|
|||
|
||||
import json
|
||||
import os
|
||||
import io
|
||||
|
||||
# Imports for the REST API
|
||||
from flask import Flask, request, jsonify
|
||||
|
||||
# Imports for image procesing
|
||||
from PIL import Image
|
||||
|
||||
# Imports for prediction
|
||||
from predict import initialize, predict_image, predict_url
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# 4MB Max image size limit
|
||||
app.config['MAX_CONTENT_LENGTH'] = 4 * 1024 * 1024
|
||||
|
||||
# Default route just shows simple text
|
||||
@app.route('/')
|
||||
def index():
|
||||
return 'CustomVision.ai model host harness'
|
||||
|
||||
# Like the CustomVision.ai Prediction service /image route handles either
|
||||
# - octet-stream image file
|
||||
# - a multipart/form-data with files in the imageData parameter
|
||||
@app.route('/image', methods=['POST'])
|
||||
@app.route('/<project>/image', methods=['POST'])
|
||||
@app.route('/<project>/image/nostore', methods=['POST'])
|
||||
@app.route('/<project>/classify/iterations/<publishedName>/image', methods=['POST'])
|
||||
@app.route('/<project>/classify/iterations/<publishedName>/image/nostore', methods=['POST'])
|
||||
@app.route('/<project>/detect/iterations/<publishedName>/image', methods=['POST'])
|
||||
@app.route('/<project>/detect/iterations/<publishedName>/image/nostore', methods=['POST'])
|
||||
def predict_image_handler(project=None, publishedName=None):
|
||||
try:
|
||||
imageData = None
|
||||
if ('imageData' in request.files):
|
||||
imageData = request.files['imageData']
|
||||
elif ('imageData' in request.form):
|
||||
imageData = request.form['imageData']
|
||||
else:
|
||||
imageData = io.BytesIO(request.get_data())
|
||||
|
||||
img = Image.open(imageData)
|
||||
results = predict_image(img)
|
||||
return jsonify(results)
|
||||
except Exception as e:
|
||||
print('EXCEPTION:', str(e))
|
||||
return 'Error processing image', 500
|
||||
|
||||
|
||||
# Like the CustomVision.ai Prediction service /url route handles url's
|
||||
# in the body of hte request of the form:
|
||||
# { 'Url': '<http url>'}
|
||||
@app.route('/url', methods=['POST'])
|
||||
@app.route('/<project>/url', methods=['POST'])
|
||||
@app.route('/<project>/url/nostore', methods=['POST'])
|
||||
@app.route('/<project>/classify/iterations/<publishedName>/url', methods=['POST'])
|
||||
@app.route('/<project>/classify/iterations/<publishedName>/url/nostore', methods=['POST'])
|
||||
@app.route('/<project>/detect/iterations/<publishedName>/url', methods=['POST'])
|
||||
@app.route('/<project>/detect/iterations/<publishedName>/url/nostore', methods=['POST'])
|
||||
def predict_url_handler(project=None, publishedName=None):
|
||||
try:
|
||||
image_url = json.loads(request.get_data().decode('utf-8'))['url']
|
||||
results = predict_url(image_url)
|
||||
return jsonify(results)
|
||||
except Exception as e:
|
||||
print('EXCEPTION:', str(e))
|
||||
return 'Error processing image'
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Load and intialize the model
|
||||
initialize()
|
||||
|
||||
# Run the server
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
# Import the required packages
|
||||
from time import process_time
|
||||
from time import time
|
||||
import os, json
|
||||
from PIL import Image, ImageDraw
|
||||
import time
|
||||
import pathlib
|
||||
import logging
|
||||
import shutil
|
||||
import random
|
||||
from predict import initialize, predict_image
|
||||
|
||||
logger = logging.getLogger("pool-detector")
|
||||
|
||||
# Defining the required functions
|
||||
def pred_bbox_coord(pred, img_width, img_height):
|
||||
|
||||
# Return top left bbox coordinate
|
||||
top_left = (round(pred['boundingBox']['left']*img_width),round(pred['boundingBox']['top']*img_height))
|
||||
|
||||
# Return bottom right bbox coordinate
|
||||
lower_right = (round(top_left[0]+(pred['boundingBox']['width']*img_width)),round(top_left[1]+(pred['boundingBox']['height']*img_height)))
|
||||
|
||||
return((top_left, lower_right))
|
||||
|
||||
def img_with_preds(raw_img, results_dict, prob_cutoff, tag_type, bbox_color='red', bbox_width=1):
|
||||
|
||||
# Extract the image size
|
||||
img_width = raw_img.size[0]
|
||||
img_height = raw_img.size[1]
|
||||
|
||||
# Create a new version of the image to draw on
|
||||
draw = ImageDraw.Draw(raw_img)
|
||||
|
||||
# For every prediction
|
||||
for pred in results_dict['predictions']:
|
||||
|
||||
# If the prediction is of the correct type and meets the confidence threshold
|
||||
if pred['probability']>=prob_cutoff and pred['tagName']==tag_type:
|
||||
|
||||
|
||||
pred_bbox = pred_bbox_coord(pred, img_width, img_height)
|
||||
draw.rectangle(pred_bbox, fill=None, outline=bbox_color, width=bbox_width)
|
||||
|
||||
return(raw_img)
|
||||
|
||||
|
||||
def retry_with_backoff(func, image_path, retries = 4, backoff_in_seconds = 0.5):
|
||||
attempts = 0
|
||||
while True:
|
||||
try:
|
||||
timeStart = time.time()
|
||||
logger.info(" Attempt {}".format(attempts))
|
||||
img=Image.open(image_path)
|
||||
timeEnd = time.time()
|
||||
logger.info("opening image: {}".format(timeEnd-timeStart))
|
||||
return func(img)
|
||||
except:
|
||||
if attempts == retries:
|
||||
logger.info(" Time is Up, attempt {} failed and maxed out retries".format(attempts))
|
||||
raise
|
||||
else:
|
||||
#sleep = backoff * (2^attempts) + random subsecond increment
|
||||
sleep = (backoff_in_seconds * 2 ** attempts + random.uniform(0, 1))
|
||||
logger.info(" Sleep :", str(sleep) + "s")
|
||||
time.sleep(sleep)
|
||||
attempts += 1
|
||||
pass
|
||||
|
||||
def get_custom_vision_preds(input_path, output_path, config):
|
||||
|
||||
pathlib.Path(f"{output_path}/img").mkdir(parents=True, exist_ok=True)
|
||||
pathlib.Path(f"{output_path}/json").mkdir(parents=True, exist_ok=True)
|
||||
pathlib.Path(f"{output_path}/other").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
save_configs = config.get("json", False)
|
||||
if save_configs:
|
||||
logger.info("saving results to json")
|
||||
|
||||
logger.info(f"looking for images in {input_path}")
|
||||
# For every image in the input directory
|
||||
extensions = ( "jpg","png","bmp","gif" )
|
||||
for input_file in os.scandir(input_path):
|
||||
t1_start = time.time()
|
||||
logger.info(input_file.name)
|
||||
counter = 1
|
||||
# Open the image
|
||||
with open(input_file.path, mode="rb") as img:
|
||||
if input_file.path.endswith(extensions):
|
||||
# Send the image to the custom vision model
|
||||
#Timer to evaulate request time for detect image
|
||||
|
||||
#send an image to custom vision model with retry control loop
|
||||
requestStart = time.time()
|
||||
results = retry_with_backoff(predict_image, input_file.path)
|
||||
requestEnd = time.time()
|
||||
logger.info("Request time: {}".format(requestEnd-requestStart))
|
||||
pilStart= time.time()
|
||||
# Collect the resulting predictions
|
||||
results_dict = results #.as_dict()
|
||||
# Open the image in Pil
|
||||
pil_img = Image.open(img)
|
||||
pilEnd = time.time()
|
||||
logger.info("pil_img time: {}".format(pilEnd-pilStart))
|
||||
|
||||
|
||||
predStart = time.time()
|
||||
# Append the detection bboxes to the PIL image
|
||||
pil_img = img_with_preds(pil_img,
|
||||
results_dict,
|
||||
config['prob_cutoff'],
|
||||
config['tag_type'],
|
||||
bbox_color=config['bbox_color'],
|
||||
bbox_width=config['bbox_width'])
|
||||
predEnd = time.time()
|
||||
logger.info("pred time: {}".format(predEnd-predStart))
|
||||
|
||||
# Save off the image with the detections
|
||||
saveStart = time.time()
|
||||
pil_img.save(os.path.join(output_path,'img',input_file.name))
|
||||
|
||||
# Save off a JSON with the results
|
||||
if save_configs:
|
||||
json_name = '.'.join(input_file.name.split('.')[:-1])+'.json'
|
||||
with open(os.path.join(output_path,'json',json_name),'w') as json_output:
|
||||
json.dump(results_dict,json_output)
|
||||
saveEnd = time.time()
|
||||
logger.info("save time: {}".format(saveEnd-saveStart))
|
||||
|
||||
else:
|
||||
print("File is not an image, copying to destination directory")
|
||||
sourcePath = input_file.path
|
||||
destinationPath = os.path.join(output_path,'other',input_file.name)
|
||||
|
||||
print(f"Copying file from {sourcePath} to {destinationPath}")
|
||||
shutil.copyfile(sourcePath,destinationPath)
|
||||
print(f"Copied file from {sourcePath} to {destinationPath}")
|
||||
|
||||
t1_stop = time.time()
|
||||
detect_img_requestTime = t1_stop-t1_start
|
||||
logger.info("{} process time: {}".format(input_file.name, detect_img_requestTime))
|
||||
logger.info("done")
|
||||
|
||||
# Run
|
||||
if __name__ == '__main__':
|
||||
logger.setLevel("DEBUG")
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
logger.handlers[0].setFormatter(logging.Formatter("[%(asctime)s] %(msg)s"))
|
||||
|
||||
# Define Input and Output Paths
|
||||
input_path = os.path.abspath(os.environ['APP_INPUT_DIR'])
|
||||
output_path = os.path.abspath(os.environ['APP_OUTPUT_DIR'])
|
||||
config_path = os.path.abspath(os.environ['APP_CONFIG_DIR'])
|
||||
|
||||
os.makedirs(input_path, exist_ok=True)
|
||||
os.makedirs(output_path, exist_ok=True)
|
||||
|
||||
logger.info(f"input {input_path}")
|
||||
logger.info(f"output {input_path}")
|
||||
logger.info(f"config {config_path}")
|
||||
|
||||
# Collect items from the config file
|
||||
with open(config_path) as config:
|
||||
config = json.load(config)
|
||||
|
||||
logger.info(f"using config {config}")
|
||||
initialize() #Loads Offline CV model.pb
|
||||
get_custom_vision_preds(input_path, output_path, config)
|
|
@ -0,0 +1,2 @@
|
|||
pool
|
||||
roof
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"CustomVision.Metadata.AdditionalModelInfo": "",
|
||||
"CustomVision.Metadata.Version": "1.2",
|
||||
"CustomVision.Postprocess.Method": "Yolo",
|
||||
"CustomVision.Postprocess.Yolo.Biases": "[0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]",
|
||||
"CustomVision.Postprocess.Yolo.NmsThreshold": "0.45",
|
||||
"CustomVision.Preprocess.CropHeight": "0",
|
||||
"CustomVision.Preprocess.CropMethod": "NoCrop",
|
||||
"CustomVision.Preprocess.CropWidth": "0",
|
||||
"CustomVision.Preprocess.MaxDimension": "0",
|
||||
"CustomVision.Preprocess.MaxScale": "0.0",
|
||||
"CustomVision.Preprocess.MinDimension": "0",
|
||||
"CustomVision.Preprocess.MinScale": "0.0",
|
||||
"CustomVision.Preprocess.NormalizeMean": "[0.0, 0.0, 0.0]",
|
||||
"CustomVision.Preprocess.NormalizeStd": "[1.0, 1.0, 1.0]",
|
||||
"CustomVision.Preprocess.ResizeMethod": "ByPixelCountAlign32",
|
||||
"CustomVision.Preprocess.TargetHeight": "512",
|
||||
"CustomVision.Preprocess.TargetWidth": "512",
|
||||
"Image.BitmapPixelFormat": "Rgb8",
|
||||
"Image.ColorSpaceGamma": "SRGB",
|
||||
"Image.NominalPixelRange": "Normalized_0_1"
|
||||
}
|
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/src/model.pb
Normal file
Двоичные данные
src/aimodels/custom_vision_object_detection_offline/container/src/model.pb
Normal file
Двоичный файл не отображается.
|
@ -0,0 +1,204 @@
|
|||
# The steps implemented in the object detection sample code:
|
||||
# 1. for an image of width and height being (w, h) pixels, resize image to (w', h'), where w/h = w'/h' and w' x h' = 262144
|
||||
# 2. resize network input size to (w', h')
|
||||
# 3. pass the image to network and do inference
|
||||
# (4. if inference speed is too slow for you, try to make w' x h' smaller, which is defined with DEFAULT_INPUT_SIZE (in object_detection.py or ObjectDetection.cs))
|
||||
import numpy as np
|
||||
import math
|
||||
import mscviplib
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class ObjectDetection(object):
|
||||
"""Class for Custom Vision's exported object detection model
|
||||
"""
|
||||
|
||||
ANCHORS = np.array([[0.573, 0.677], [1.87, 2.06], [3.34, 5.47], [7.88, 3.53], [9.77, 9.17]])
|
||||
IOU_THRESHOLD = 0.45
|
||||
DEFAULT_INPUT_SIZE = 512
|
||||
|
||||
def __init__(self, labels, prob_threshold=0.10, max_detections = 20):
|
||||
"""Initialize the class
|
||||
|
||||
Args:
|
||||
labels ([str]): list of labels for the exported model.
|
||||
prob_threshold (float): threshold for class probability.
|
||||
max_detections (int): the max number of output results.
|
||||
"""
|
||||
|
||||
assert len(labels) >= 1, "At least 1 label is required"
|
||||
|
||||
self.labels = labels
|
||||
self.prob_threshold = prob_threshold
|
||||
self.max_detections = max_detections
|
||||
|
||||
def _logistic(self, x):
|
||||
return np.where(x > 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
|
||||
|
||||
def _non_maximum_suppression(self, boxes, class_probs, max_detections):
|
||||
"""Remove overlapping bouding boxes
|
||||
"""
|
||||
assert len(boxes) == len(class_probs)
|
||||
|
||||
max_detections = min(max_detections, len(boxes))
|
||||
max_probs = np.amax(class_probs, axis=1)
|
||||
max_classes = np.argmax(class_probs, axis=1)
|
||||
|
||||
areas = boxes[:, 2] * boxes[:, 3]
|
||||
|
||||
selected_boxes = []
|
||||
selected_classes = []
|
||||
selected_probs = []
|
||||
|
||||
while len(selected_boxes) < max_detections:
|
||||
# Select the prediction with the highest probability.
|
||||
i = np.argmax(max_probs)
|
||||
if max_probs[i] < self.prob_threshold:
|
||||
break
|
||||
|
||||
# Save the selected prediction
|
||||
selected_boxes.append(boxes[i])
|
||||
selected_classes.append(max_classes[i])
|
||||
selected_probs.append(max_probs[i])
|
||||
|
||||
box = boxes[i]
|
||||
other_indices = np.concatenate((np.arange(i), np.arange(i + 1, len(boxes))))
|
||||
other_boxes = boxes[other_indices]
|
||||
|
||||
# Get overlap between the 'box' and 'other_boxes'
|
||||
x1 = np.maximum(box[0], other_boxes[:, 0])
|
||||
y1 = np.maximum(box[1], other_boxes[:, 1])
|
||||
x2 = np.minimum(box[0] + box[2], other_boxes[:, 0] + other_boxes[:, 2])
|
||||
y2 = np.minimum(box[1] + box[3], other_boxes[:, 1] + other_boxes[:, 3])
|
||||
w = np.maximum(0, x2 - x1)
|
||||
h = np.maximum(0, y2 - y1)
|
||||
|
||||
# Calculate Intersection Over Union (IOU)
|
||||
overlap_area = w * h
|
||||
iou = overlap_area / (areas[i] + areas[other_indices] - overlap_area)
|
||||
|
||||
# Find the overlapping predictions
|
||||
overlapping_indices = other_indices[np.where(iou > self.IOU_THRESHOLD)[0]]
|
||||
overlapping_indices = np.append(overlapping_indices, i)
|
||||
|
||||
# Set the probability of overlapping predictions to zero, and udpate max_probs and max_classes.
|
||||
class_probs[overlapping_indices, max_classes[i]] = 0
|
||||
max_probs[overlapping_indices] = np.amax(class_probs[overlapping_indices], axis=1)
|
||||
max_classes[overlapping_indices] = np.argmax(class_probs[overlapping_indices], axis=1)
|
||||
|
||||
assert len(selected_boxes) == len(selected_classes) and len(selected_boxes) == len(selected_probs)
|
||||
return selected_boxes, selected_classes, selected_probs
|
||||
|
||||
def _extract_bb(self, prediction_output, anchors):
|
||||
assert len(prediction_output.shape) == 3
|
||||
num_anchor = anchors.shape[0]
|
||||
height, width, channels = prediction_output.shape
|
||||
assert channels % num_anchor == 0
|
||||
|
||||
num_class = int(channels / num_anchor) - 5
|
||||
assert num_class == len(self.labels)
|
||||
|
||||
outputs = prediction_output.reshape((height, width, num_anchor, -1))
|
||||
|
||||
# Extract bouding box information
|
||||
x = (self._logistic(outputs[..., 0]) + np.arange(width)[np.newaxis, :, np.newaxis]) / width
|
||||
y = (self._logistic(outputs[..., 1]) + np.arange(height)[:, np.newaxis, np.newaxis]) / height
|
||||
w = np.exp(outputs[..., 2]) * anchors[:, 0][np.newaxis, np.newaxis, :] / width
|
||||
h = np.exp(outputs[..., 3]) * anchors[:, 1][np.newaxis, np.newaxis, :] / height
|
||||
|
||||
# (x,y) in the network outputs is the center of the bounding box. Convert them to top-left.
|
||||
x = x - w / 2
|
||||
y = y - h / 2
|
||||
boxes = np.stack((x, y, w, h), axis=-1).reshape(-1, 4)
|
||||
|
||||
# Get confidence for the bounding boxes.
|
||||
objectness = self._logistic(outputs[..., 4])
|
||||
|
||||
# Get class probabilities for the bounding boxes.
|
||||
class_probs = outputs[..., 5:]
|
||||
class_probs = np.exp(class_probs - np.amax(class_probs, axis=3)[..., np.newaxis])
|
||||
class_probs = class_probs / np.sum(class_probs, axis=3)[..., np.newaxis] * objectness[..., np.newaxis]
|
||||
class_probs = class_probs.reshape(-1, num_class)
|
||||
|
||||
assert len(boxes) == len(class_probs)
|
||||
return (boxes, class_probs)
|
||||
|
||||
def _update_orientation(self, image):
|
||||
"""
|
||||
corrects image orientation according to EXIF data
|
||||
image: input PIL image
|
||||
returns corrected PIL image
|
||||
"""
|
||||
exif_orientation_tag = 0x0112
|
||||
if hasattr(image, '_getexif'):
|
||||
exif = image._getexif()
|
||||
if exif != None and exif_orientation_tag in exif:
|
||||
orientation = exif.get(exif_orientation_tag, 1)
|
||||
print('Image has EXIF Orientation: {}'.format(str(orientation)))
|
||||
# orientation is 1 based, shift to zero based and flip/transpose based on 0-based values
|
||||
orientation -= 1
|
||||
if orientation >= 4:
|
||||
image = image.transpose(Image.TRANSPOSE)
|
||||
if orientation == 2 or orientation == 3 or orientation == 6 or orientation == 7:
|
||||
image = image.transpose(Image.FLIP_TOP_BOTTOM)
|
||||
if orientation == 1 or orientation == 2 or orientation == 5 or orientation == 6:
|
||||
image = image.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
return image
|
||||
|
||||
def predict_image(self, image):
|
||||
inputs = self.preprocess(image)
|
||||
prediction_outputs = self.predict(inputs)
|
||||
return self.postprocess(prediction_outputs)
|
||||
|
||||
def preprocess(self, image):
|
||||
image = image.convert("RGB") if image.mode != "RGB" else image
|
||||
image = self._update_orientation(image)
|
||||
|
||||
metadata = mscviplib.GetImageMetadata(image)
|
||||
resized_image = mscviplib.PreprocessForInferenceAsTensor(metadata,
|
||||
image.tobytes(),
|
||||
mscviplib.ResizeAndCropMethod.FixedPixelCountNoCropAlign32px,
|
||||
(self.DEFAULT_INPUT_SIZE, self.DEFAULT_INPUT_SIZE),
|
||||
mscviplib.InterpolationType.Bilinear,
|
||||
mscviplib.ColorSpace.RGB, (), ())
|
||||
resized_image = np.moveaxis(resized_image, 0, -1)
|
||||
return resized_image
|
||||
|
||||
def predict(self, preprocessed_inputs):
|
||||
"""Evaluate the model and get the output
|
||||
|
||||
Need to be implemented for each platforms. i.e. TensorFlow, CoreML, etc.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def postprocess(self, prediction_outputs):
|
||||
""" Extract bounding boxes from the model outputs.
|
||||
|
||||
Args:
|
||||
prediction_outputs: Output from the object detection model. (H x W x C)
|
||||
|
||||
Returns:
|
||||
List of Prediction objects.
|
||||
"""
|
||||
boxes, class_probs = self._extract_bb(prediction_outputs, self.ANCHORS)
|
||||
|
||||
# Remove bounding boxes whose confidence is lower than the threshold.
|
||||
max_probs = np.amax(class_probs, axis=1)
|
||||
index, = np.where(max_probs > self.prob_threshold)
|
||||
index = index[(-max_probs[index]).argsort()]
|
||||
|
||||
# Remove overlapping bounding boxes
|
||||
selected_boxes, selected_classes, selected_probs = self._non_maximum_suppression(boxes[index],
|
||||
class_probs[index],
|
||||
self.max_detections)
|
||||
|
||||
return [{'probability': round(float(selected_probs[i]), 8),
|
||||
'tagId': int(selected_classes[i]),
|
||||
'tagName': self.labels[selected_classes[i]],
|
||||
'boundingBox': {
|
||||
'left': round(float(selected_boxes[i][0]), 8),
|
||||
'top': round(float(selected_boxes[i][1]), 8),
|
||||
'width': round(float(selected_boxes[i][2]), 8),
|
||||
'height': round(float(selected_boxes[i][3]), 8)
|
||||
}
|
||||
} for i in range(len(selected_boxes))]
|
|
@ -0,0 +1,77 @@
|
|||
# The steps implemented in the object detection sample code:
|
||||
# 1. for an image of width and height being (w, h) pixels, resize image to (w', h'), where w/h = w'/h' and w' x h' = 262144
|
||||
# 2. resize network input size to (w', h')
|
||||
# 3. pass the image to network and do inference
|
||||
# (4. if inference speed is too slow for you, try to make w' x h' smaller, which is defined with DEFAULT_INPUT_SIZE (in object_detection.py or ObjectDetection.cs))
|
||||
import sys
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from urllib.request import urlopen
|
||||
from datetime import datetime
|
||||
from object_detection import ObjectDetection
|
||||
|
||||
MODEL_FILENAME = 'model.pb'
|
||||
LABELS_FILENAME = 'labels.txt'
|
||||
|
||||
od_model = None
|
||||
|
||||
class TFObjectDetection(ObjectDetection):
|
||||
"""Object Detection class for TensorFlow"""
|
||||
|
||||
def __init__(self, graph_def, labels):
|
||||
super(TFObjectDetection, self).__init__(labels)
|
||||
self.graph = tf.compat.v1.Graph()
|
||||
with self.graph.as_default():
|
||||
input_data = tf.compat.v1.placeholder(tf.float32, [1, None, None, 3], name='Placeholder')
|
||||
tf.import_graph_def(graph_def, input_map={"Placeholder:0": input_data}, name="")
|
||||
|
||||
def predict(self, preprocessed_image):
|
||||
inputs = np.array(preprocessed_image, dtype=np.float)[:, :, (2, 1, 0)] # RGB -> BGR
|
||||
|
||||
with tf.compat.v1.Session(graph=self.graph) as sess:
|
||||
output_tensor = sess.graph.get_tensor_by_name('model_outputs:0')
|
||||
outputs = sess.run(output_tensor, {'Placeholder:0': inputs[np.newaxis, ...]})
|
||||
return outputs[0]
|
||||
|
||||
def log_msg(msg):
|
||||
print("{}: {}".format(datetime.now(), msg))
|
||||
|
||||
def initialize():
|
||||
print('Loading model...', end='')
|
||||
graph_def = tf.compat.v1.GraphDef()
|
||||
with open(MODEL_FILENAME, 'rb') as f:
|
||||
graph_def.ParseFromString(f.read())
|
||||
print('Success!')
|
||||
|
||||
print('Loading labels...', end='')
|
||||
with open(LABELS_FILENAME, 'r') as f:
|
||||
labels = [l.strip() for l in f.readlines()]
|
||||
print("{} found. Success!".format(len(labels)))
|
||||
|
||||
global od_model
|
||||
od_model = TFObjectDetection(graph_def, labels)
|
||||
|
||||
def predict_url(image_url):
|
||||
log_msg("Predicting from url: " + image_url)
|
||||
with urlopen(image_url) as image_binary:
|
||||
image = Image.open(image_binary)
|
||||
return predict_image(image)
|
||||
|
||||
def predict_image(image):
|
||||
log_msg('Predicting image')
|
||||
|
||||
w, h = image.size
|
||||
log_msg("Image size: {}x{}".format(w, h))
|
||||
|
||||
predictions = od_model.predict_image(image)
|
||||
|
||||
response = {
|
||||
'id': '',
|
||||
'project': '',
|
||||
'iteration': '',
|
||||
'created': datetime.utcnow().isoformat(),
|
||||
'predictions': predictions }
|
||||
|
||||
log_msg('Results: ' + str(response))
|
||||
return response
|
|
@ -0,0 +1,6 @@
|
|||
numpy~=1.17.5
|
||||
tensorflow~=2.0.2
|
||||
flask~=1.1.2
|
||||
pillow~=7.2.0
|
||||
mscviplib==2.200731.16
|
||||
MarkupSafe~=2.0.1
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"algImageName" :"__registry_server__/geomodel/custom-vision-object-detection:latest",
|
||||
"containerName":"pool",
|
||||
"containerReference":"custom_vision_object_detection",
|
||||
"mountedDirectory": "/data",
|
||||
"submissionDirectory" : "in",
|
||||
"resultsDirectory" : "out",
|
||||
"logsDirectory":"logs",
|
||||
"modelPython":"./custom_vision.py",
|
||||
"vaultUri":"__vault_uri__",
|
||||
"contextFileName":"config.json",
|
||||
"cpu":3,
|
||||
"memory":14,
|
||||
"gpu":"",
|
||||
"validations":[
|
||||
{
|
||||
"validator":"FileExtensionValidator",
|
||||
"expected":".png",
|
||||
"value":"*.*"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import argparse
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
PKG_PATH = Path(__file__).parent
|
||||
PKG_NAME = PKG_PATH.name
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run copy noop function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
|
||||
parser.add_argument('--src_container', type=str, required=False, help='Source container in Azure Storage')
|
||||
parser.add_argument('--src_fileshare', type=str, required=False, help='Source File share in Azure Storage')
|
||||
parser.add_argument('--src_folder', default=None, required=True, help='Source folder path in Azure Storage Container or File Share')
|
||||
|
||||
parser.add_argument('--dst_container', type=str, required=False, help='Destination container in Azure Storage')
|
||||
parser.add_argument('--dst_fileshare', type=str, required=False, help='Destination File share in Azure Storage')
|
||||
parser.add_argument('--dst_folder', default=None, required=True, help='Destination folder path in Azure Storage Container or File Share')
|
||||
|
||||
parser.add_argument('--folders_to_create', action='append', required=False, help='Folders to create in container or file share')
|
||||
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def copy(src_mounted_path: str,
|
||||
src_unmounted_path: str,
|
||||
dst_mounted_path: str,
|
||||
dst_unmounted_path: str,
|
||||
dst_folder: str,
|
||||
folders: any):
|
||||
|
||||
# create only if it does not already exists
|
||||
if not os.path.isdir(f'{dst_unmounted_path}') and dst_unmounted_path.startswith('https'):
|
||||
mssparkutils.fs.mkdirs(dst_unmounted_path)
|
||||
|
||||
dst_path = dst_mounted_path.replace(f'/{dst_folder}', '')
|
||||
|
||||
# folders are not required, so do not try to iterate
|
||||
# it if it is empty
|
||||
if folders is not None:
|
||||
for folder in folders:
|
||||
logger.info(f"creating folder path {dst_path}/{folder}")
|
||||
|
||||
# create only if it does not already exists
|
||||
if not os.path.isdir(f'{dst_path}/{folder}'):
|
||||
os.makedirs(f'{dst_path}/{folder}')
|
||||
|
||||
# mssparkutils.fs.cp works with source and destination
|
||||
# that are of the same type storage container to storage
|
||||
# container
|
||||
logger.info(f"copying from {src_mounted_path} to {dst_mounted_path}")
|
||||
|
||||
# using shutil to copy directory or individual files as needed
|
||||
if os.path.isdir(src_mounted_path):
|
||||
shutil.copytree(src_mounted_path, dst_mounted_path, dirs_exist_ok=True)
|
||||
else:
|
||||
shutil.copy(src_mounted_path, dst_mounted_path)
|
||||
|
||||
logger.info("finished copying")
|
||||
|
||||
def map_source(storage_account_name: str,
|
||||
storage_account_key: str,
|
||||
container_name: str,
|
||||
fileshare_name: str,
|
||||
folder_path: str):
|
||||
|
||||
# unmounted path refer to the storage account path that is not mounted to /synfs/{job_id}/{file_share_name} path
|
||||
unmounted_path = ''
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
# if container name is specified, then the mapping / mount is for a container in azure storage account
|
||||
if container_name:
|
||||
|
||||
unmounted_path = f'abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{folder_path}'
|
||||
|
||||
mssparkutils.fs.unmount(f'/{container_name}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{container_name}@{storage_account_name}.dfs.core.windows.net',
|
||||
f'/{container_name}',
|
||||
{"accountKey": storage_account_key}
|
||||
)
|
||||
|
||||
mounted_path = f'/synfs/{jobId}/{container_name}/{folder_path}'
|
||||
|
||||
# if file share is specified, then the mapping / mount is for a file share in azure storage account
|
||||
elif fileshare_name:
|
||||
|
||||
unmounted_path = f'https://{fileshare_name}@{storage_account_name}.file.core.windows.net/{folder_path}'
|
||||
|
||||
mssparkutils.fs.unmount(f'/{fileshare_name}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'https://{fileshare_name}@{storage_account_name}.file.core.windows.net/{folder_path}',
|
||||
f'/{fileshare_name}',
|
||||
{"accountKey": storage_account_key}
|
||||
)
|
||||
|
||||
mounted_path = f'/synfs/{jobId}/{fileshare_name}/{folder_path}'
|
||||
|
||||
return mounted_path, unmounted_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
|
||||
logger = logging.getLogger("copy_noop")
|
||||
|
||||
# map / mount the source container / file share in azure storage account
|
||||
src_mounted_path, src_unmounted_path = map_source(args.storage_account_name,
|
||||
args.storage_account_key,
|
||||
args.src_container,
|
||||
args.src_fileshare,
|
||||
args.src_folder)
|
||||
|
||||
# map / mount the destination container / file share in azure storage account
|
||||
dst_mounted_path, dst_unmounted_path = map_source(args.storage_account_name,
|
||||
args.storage_account_key,
|
||||
args.dst_container,
|
||||
args.dst_fileshare,
|
||||
args.dst_folder)
|
||||
|
||||
# copy method allows the three scenarios:
|
||||
# 1. source container to destination container
|
||||
# 2. source container to destination file share
|
||||
# 3. source file share to destination file share
|
||||
# source file share to destination container is not supported at this time
|
||||
copy(src_mounted_path, src_unmounted_path, dst_mounted_path, dst_unmounted_path, args.dst_folder, args.folders_to_create)
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"width": 512.1,
|
||||
"height": 512,
|
||||
"probability_cutoff": 0.5,
|
||||
"tag_name": "pool"
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import geopandas as gpd
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import glob
|
||||
import rasterio as rio
|
||||
import shapely as shp
|
||||
import argparse, sys
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from numpy import asarray
|
||||
from pathlib import Path
|
||||
from pyproj import Transformer
|
||||
from rasterio.crs import CRS
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
from utils import parse_config, init_logger
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
DEFAULT_CONFIG = {"probability_cutoff": 0.75}
|
||||
|
||||
PKG_PATH = Path(__file__).parent
|
||||
PKG_NAME = PKG_PATH.name
|
||||
|
||||
dst_folder_name = 'pool-geolocation'
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run pool geolocation function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder containing the source file for cropping')
|
||||
parser.add_argument('--config_file_name', required=True, help='Config file name')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def get_pool_gelocations(input_path: str,
|
||||
output_path: str,
|
||||
config_path: str):
|
||||
|
||||
config = parse_config(config_path, DEFAULT_CONFIG)
|
||||
|
||||
height = int(config["height"])
|
||||
width = int(config["width"])
|
||||
prob_cutoff = min(max(config["probability_cutoff"], 0), 1)
|
||||
dst_crs = CRS.from_epsg(4326)
|
||||
|
||||
logger.debug(f"looking for PAM file using `{input_path}/*.aux.xml`")
|
||||
|
||||
# find all files that contain the geocoordinate references
|
||||
for pam_file in glob.glob(f'{input_path}/*.aux.xml'):
|
||||
pam_base_filename = os.path.basename(pam_file)
|
||||
logger.info(f"found PAM file {str(pam_base_filename)}")
|
||||
|
||||
img_name = pam_base_filename.replace(".png.aux.xml", "")
|
||||
logger.info(f"using image name {img_name}")
|
||||
|
||||
pam_tree = ET.parse(pam_file)
|
||||
pam_root = pam_tree.getroot()
|
||||
|
||||
srs = pam_root.find("SRS")
|
||||
wkt = pam_root.find("WKT")
|
||||
|
||||
if not srs is None:
|
||||
crs = CRS.from_string(srs.text)
|
||||
elif not wkt is None:
|
||||
crs = CRS.from_string(wkt.text)
|
||||
else:
|
||||
crs = CRS.from_epsg(4326)
|
||||
logger.warning(
|
||||
f"neither node 'SRS' or 'WKT' found in file {pam_file}, using epsg:4326"
|
||||
)
|
||||
logger.info(f"parsed crs {crs}")
|
||||
|
||||
tfmr = Transformer.from_crs(crs, dst_crs, always_xy=True)
|
||||
|
||||
tfm_xml = pam_root.find("GeoTransform")
|
||||
if tfm_xml is None:
|
||||
logger.error(f"could not find node 'GeoTransform' in file {pam_file} - quiting")
|
||||
exit(1)
|
||||
|
||||
tfm_raw = [float(x) for x in tfm_xml.text.split(",")]
|
||||
|
||||
if rio.transform.tastes_like_gdal(tfm_raw):
|
||||
tfm = rio.transform.Affine.from_gdal(*tfm_raw)
|
||||
else:
|
||||
tfm = rio.transform.Affine(*tfm_raw)
|
||||
logger.info(f"parsed transform {tfm.to_gdal()}")
|
||||
|
||||
logger.info(f"using width: {width}, height: {height}, probability cut-off: {prob_cutoff}")
|
||||
logger.debug(f"looking for custom vision JSON files using `{input_path}/{img_name}*.json`")
|
||||
|
||||
# find all json files to process
|
||||
all_predictions = []
|
||||
for json_path in glob.glob(f'{input_path}/{img_name}*.json'):
|
||||
|
||||
logger.debug(f"reading {json_path}")
|
||||
logger.debug(f"file name is {json_path}")
|
||||
predictions = json.load(Path(json_path).open())
|
||||
col, row = json_path.split(".")[-3:-1]
|
||||
col, row = int(col), int(row)
|
||||
|
||||
tag_name = config["tag_name"]
|
||||
|
||||
logger.debug(f"found {len(predictions)} predictions total")
|
||||
predictions = [pred for pred in predictions["predictions"] if pred["probability"] >= prob_cutoff and pred["tagName"] == tag_name]
|
||||
logger.debug(f"only {len(predictions)} preditions meet criteria")
|
||||
|
||||
# iterate through all predictions and process
|
||||
for pred in predictions:
|
||||
box = pred["boundingBox"]
|
||||
|
||||
left = (col + box["left"]) * width
|
||||
right = (col + box["left"] + box["width"]) * width
|
||||
top = (row + box["top"]) * height
|
||||
bottom = (row + box["top"] + box["height"]) * height
|
||||
|
||||
img_bbox = shp.geometry.box(left, bottom, right, top)
|
||||
bbox = shp.geometry.Polygon(zip(*tfmr.transform(*rio.transform.xy(tfm, *reversed(img_bbox.boundary.xy), offset="ul"))))
|
||||
pred["boundingBox"] = bbox
|
||||
pred["tile"] = os.path.basename(json_path)
|
||||
|
||||
all_predictions.extend(predictions)
|
||||
|
||||
logger.info(f"found {len(all_predictions)} total predictions")
|
||||
if len(all_predictions) > 0:
|
||||
pools_geo = gpd.GeoDataFrame(all_predictions, geometry="boundingBox", crs=dst_crs)
|
||||
pools_geo["center"] = pools_geo.apply(lambda r: str(asarray(r["boundingBox"].centroid).tolist()), axis=1)
|
||||
output_file = f"{output_path}/{img_name}.geojson"
|
||||
pools_geo.to_file(output_file, driver='GeoJSON')
|
||||
logger.info(f"saved locations to {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
|
||||
logger = logging.getLogger("pool_geolocation")
|
||||
|
||||
logger.info("starting pool geolocation, running ...")
|
||||
|
||||
# if a mount to the same path is already present, then unmount it
|
||||
mssparkutils.fs.unmount(f'/{args.storage_container}')
|
||||
|
||||
# mount the container
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net',
|
||||
f'/{args.storage_container}',
|
||||
{"accountKey": args.storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
# deriving the input, output and config path
|
||||
input_path = f'/synfs/{jobId}/{args.storage_container}/{args.src_folder_name}'
|
||||
config_path = f'/synfs/{jobId}/{args.storage_container}/config/{args.config_file_name}'
|
||||
output_path = f'/synfs/{jobId}/{args.storage_container}/{dst_folder_name}'
|
||||
|
||||
# debug purposes only
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
logger.debug(f"config file path {config_path}")
|
||||
|
||||
# start by creating a placeholder file. we need this because creating files under a folder
|
||||
# that does not already existis fails without this.
|
||||
mssparkutils.fs.put(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', 'started tiling ...', True)
|
||||
|
||||
try:
|
||||
|
||||
# invoke the main logic
|
||||
get_pool_gelocations(input_path, output_path, config_path)
|
||||
|
||||
# remove the placeholder file upon successful run
|
||||
mssparkutils.fs.rm(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', True)
|
||||
except:
|
||||
# remove the placefolder file upon failed run
|
||||
mssparkutils.fs.append(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', 'tiling errored out', True)
|
||||
raise
|
||||
|
||||
# final logging for this transform
|
||||
logger.info("finished running pool geolocation")
|
|
@ -0,0 +1,84 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
from jsonschema import validate
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
##########################################################################################
|
||||
# files & download
|
||||
##########################################################################################
|
||||
|
||||
|
||||
schema_str = '{'\
|
||||
'"title": "config",' \
|
||||
'"type": "object",' \
|
||||
'"properties": {' \
|
||||
'"probability_cutoff": {' \
|
||||
'"type": "number"' \
|
||||
'},' \
|
||||
'"height": {' \
|
||||
'"type": "number"' \
|
||||
'},' \
|
||||
'"width": {' \
|
||||
'"type": "number"' \
|
||||
'},' \
|
||||
'"geometry": {' \
|
||||
'"$ref": "#/$defs/geometry"' \
|
||||
'}' \
|
||||
'},' \
|
||||
'"required": [' \
|
||||
'"width",' \
|
||||
'"height"' \
|
||||
']' \
|
||||
'}'
|
||||
|
||||
def parse_config(config_path: Path, default_config: dict):
|
||||
config = default_config
|
||||
|
||||
logger.debug(f"default config options are {config}")
|
||||
|
||||
logger.debug(f"reading config file {config_path}")
|
||||
schema = json.loads(schema_str)
|
||||
|
||||
# load config file from path
|
||||
with open(config_path, "r") as f:
|
||||
config_file = json.load(f)
|
||||
|
||||
logger.debug(f"provided configuration is {config_file}")
|
||||
logger.debug(f"validating provided config")
|
||||
|
||||
# validate the config file with the schema
|
||||
validate(config_file, schema)
|
||||
|
||||
config.update(config_file)
|
||||
logger.info(f"using configuration {config}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
##########################################################################################
|
||||
# logging
|
||||
##########################################################################################
|
||||
|
||||
|
||||
def init_logger(
|
||||
name: str,
|
||||
level: Union[int, str],
|
||||
format: str = "%(asctime)s:[%(levelname)s]:%(name)s:%(message)s",
|
||||
):
|
||||
# enable and configure logging
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(level)
|
||||
ch.setFormatter(logging.Formatter(format))
|
||||
logger.addHandler(ch)
|
||||
|
||||
return logger
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"format": "png",
|
||||
"metadata": true
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import os, argparse, sys
|
||||
import json
|
||||
import glob
|
||||
from osgeo import gdal
|
||||
import logging
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run convert to png function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder containing the source file for cropping')
|
||||
parser.add_argument('--config_file_name', required=True, help='Config file name')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def convert_directory(
|
||||
input_path,
|
||||
output_path,
|
||||
config_file,
|
||||
logger,
|
||||
default_options={"format": "png", "metadata": False},
|
||||
):
|
||||
gdal.UseExceptions()
|
||||
|
||||
logger.info("looking for config file: %s", config_file)
|
||||
|
||||
translate_options_dict = default_options
|
||||
logger.debug("default config options: %s", translate_options_dict)
|
||||
|
||||
try:
|
||||
# read config file
|
||||
with open(config_file, "r") as config:
|
||||
config_file_dict = json.load(config)
|
||||
logger.debug("read in %s", config_file_dict)
|
||||
translate_options_dict.update(config_file_dict)
|
||||
except Exception as e:
|
||||
# config file is missing or there is issue reading the config file
|
||||
logger.error("error reading config file %s", e)
|
||||
sys.exit(1)
|
||||
|
||||
logger.info("using config options: %s", translate_options_dict)
|
||||
|
||||
keep_metadata = translate_options_dict.pop("metadata")
|
||||
|
||||
opt = gdal.TranslateOptions(**translate_options_dict)
|
||||
|
||||
logger.debug("looking for input files in %s", input_path)
|
||||
for in_file in os.scandir(input_path):
|
||||
in_name = in_file.name
|
||||
logger.info("ingesting file %s", in_file.path)
|
||||
# ! this is a landmine; will error for files w/o extension but with '.', and for formats with spaces
|
||||
out_name = os.path.splitext(in_name)[0] + "." + translate_options_dict["format"]
|
||||
out_path = os.path.join(output_path, out_name)
|
||||
try:
|
||||
# call gdal to convert the file format
|
||||
gdal.Translate(out_path, in_file.path, options=opt)
|
||||
except Exception as e:
|
||||
logger.error("gdal error: %s", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
logger.info("successfully translated %s", out_path)
|
||||
|
||||
# check to see if we need to carry over the geo-coordinates / metadata file?
|
||||
if not keep_metadata:
|
||||
xml_glob = os.path.join(output_path, "*.aux.xml")
|
||||
logger.debug(f"deleting metadata files that match {xml_glob}")
|
||||
for xml_file in glob.glob(xml_glob):
|
||||
logger.debug(f"deleting metadata file f{xml_file}")
|
||||
os.remove(xml_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
logger = logging.getLogger("image_convert")
|
||||
|
||||
# unmount any previously mounted storage account container
|
||||
mssparkutils.fs.unmount(f'/{args.storage_container}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net',
|
||||
f'/{args.storage_container}',
|
||||
{"accountKey": args.storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
input_path = f'/synfs/{jobId}/{args.storage_container}/{args.src_folder_name}'
|
||||
config_path = f'/synfs/{jobId}/{args.storage_container}/config/{args.config_file_name}'
|
||||
output_path = f'/synfs/{jobId}/{args.storage_container}'
|
||||
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
logger.debug(f"config file path {config_path}")
|
||||
|
||||
convert_directory(input_path, output_path, config_path, logger)
|
||||
|
||||
# scan the directory to find tif files to convert to png file format
|
||||
for in_file in os.scandir(input_path):
|
||||
|
||||
# tif file extensions are removed so that we can use the same file name for png
|
||||
file_name = os.path.basename(in_file.path).replace('.tif', '')
|
||||
|
||||
copy_src_file_name = f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{file_name}'
|
||||
copy_dst_file_name = f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/convert/{file_name}'
|
||||
|
||||
# move source png file to destination path
|
||||
mssparkutils.fs.mv(
|
||||
f'{copy_src_file_name}.png',
|
||||
f'{copy_dst_file_name}.png',
|
||||
True
|
||||
)
|
||||
|
||||
# move source xml (geo-coordinates) to destination path
|
||||
mssparkutils.fs.mv(
|
||||
f'{copy_src_file_name}.png.aux.xml',
|
||||
f'{copy_dst_file_name}.png.aux.xml',
|
||||
True
|
||||
)
|
|
@ -0,0 +1 @@
|
|||
{"bbox":[-118.2509,34.003,-118.1652,34.0808],"crop":true,"limit":1}
|
|
@ -0,0 +1,132 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import os, argparse, sys
|
||||
import shapely as shp
|
||||
import shapely.geometry as geo
|
||||
from osgeo import gdal
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
import utils
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run crop function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder containing the source file for cropping')
|
||||
parser.add_argument('--config_file_name', required=True, help='Config file name')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def crop(storage_account_name: str,
|
||||
storage_account_key: str,
|
||||
storage_container: str,
|
||||
src_folder_name: str,
|
||||
config_file_name: str):
|
||||
'''
|
||||
Crops the GeoTiff to the Area of Interest (AOI)
|
||||
|
||||
Inputs:
|
||||
storage_account_name - Name of the storage account name where the input data resides
|
||||
storage_account_key - Key to the storage account where the input data resides
|
||||
storage_container - Container under which the input data resides
|
||||
src_folder_name - Folder containing the source file for cropping
|
||||
config_file_name - Config file name
|
||||
|
||||
Output:
|
||||
Cropped GeeTiff saved into the user specified directory
|
||||
'''
|
||||
# enable logging
|
||||
logger = utils.init_logger("stac_download")
|
||||
|
||||
gdal.UseExceptions()
|
||||
|
||||
mssparkutils.fs.unmount(f'/{storage_container}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{storage_container}@{storage_account_name}.dfs.core.windows.net',
|
||||
f'/{storage_container}',
|
||||
{"accountKey": storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
input_path = f'/synfs/{jobId}/{storage_container}/{src_folder_name}'
|
||||
config_path = f'/synfs/{jobId}/{storage_container}/config/{config_file_name}'
|
||||
output_path = f'/synfs/{jobId}/{storage_container}/crop'
|
||||
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
logger.debug(f"config file path {config_path}")
|
||||
|
||||
try:
|
||||
# parse config file
|
||||
config = utils.parse_config(config_path)
|
||||
except Exception:
|
||||
exit(1)
|
||||
|
||||
# get the aoi for cropping from config file
|
||||
geom = config.get("geometry")
|
||||
bbox = config.get("bbox")
|
||||
|
||||
if (geom is not None) and (bbox is not None):
|
||||
logger.error('found both "geomtry" and "bbox"')
|
||||
exit(1)
|
||||
elif (geom is None) and (bbox is None):
|
||||
logger.error('found neither geomtry" and "bbox"')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
aoi = geo.asShape(geom) if bbox is None else geo.box(*bbox)
|
||||
except Exception as e:
|
||||
logger.error(f"error parsing config:{e}")
|
||||
exit(1)
|
||||
|
||||
if aoi.is_empty:
|
||||
logger.error(f"empty area of interest {aoi.wkt}")
|
||||
exit(1)
|
||||
|
||||
logger.debug(f"using aoi '{aoi}'")
|
||||
|
||||
input_files = []
|
||||
|
||||
# list all the files in the folder that will be part of the crop
|
||||
files = mssparkutils.fs.ls(f'abfss://{storage_container}@{storage_account_name}.dfs.core.windows.net/{src_folder_name}')
|
||||
for file in files:
|
||||
if not file.isDir:
|
||||
input_files.append(file)
|
||||
|
||||
# crop the raster file
|
||||
utils.crop_images(input_files, f'abfss://{storage_container}@{storage_account_name}.dfs.core.windows.net/{src_folder_name}', input_path, output_path, aoi)
|
||||
|
||||
for file in input_files:
|
||||
# this is the newly created cropped file path in local host
|
||||
temp_src_path = file.path.replace(f'/{src_folder_name}', '/')
|
||||
|
||||
# this is the destination path (storage account) where the newly
|
||||
# created cropped file will be moved to
|
||||
perm_src_path = file.path.replace(f'/{src_folder_name}/', '/crop/').replace(os.path.basename(file.path), 'output.tif')
|
||||
|
||||
mssparkutils.fs.mv(
|
||||
temp_src_path,
|
||||
perm_src_path,
|
||||
True
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
print("Starting Tiling Process")
|
||||
|
||||
crop(args.storage_account_name,
|
||||
args.storage_account_key,
|
||||
args.storage_container,
|
||||
args.src_folder_name,
|
||||
args.config_file_name)
|
||||
|
||||
print("Tiling Process Completed")
|
|
@ -0,0 +1,136 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
import pyproj
|
||||
import rasterio as rio
|
||||
import rasterio.mask
|
||||
import shapely as shp
|
||||
import shapely.geometry
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
from pathlib import Path
|
||||
from shapely.ops import transform
|
||||
|
||||
def parse_config(config_path: str):
|
||||
LOGGER.info(f"reading config file {config_path}")
|
||||
|
||||
# read the config file
|
||||
try:
|
||||
with open(config_path, "r") as file:
|
||||
config = json.load(file)
|
||||
LOGGER.info(f"using configuration {config}")
|
||||
except Exception as e:
|
||||
LOGGER.error(f"error reading config file:{e}")
|
||||
raise
|
||||
|
||||
return config
|
||||
|
||||
def area_sq_km(area: shp.geometry.base.BaseGeometry, src_crs) -> float:
|
||||
tfmr = pyproj.Transformer.from_crs(src_crs, {'proj':'cea'}, always_xy=True)
|
||||
return transform(tfmr.transform, area).area / 1e6
|
||||
|
||||
def crop_images(
|
||||
images: any,
|
||||
input_path: Path,
|
||||
local_input_path: str,
|
||||
output_path: Path,
|
||||
aoi: shp.geometry.base.BaseGeometry,
|
||||
):
|
||||
for image in images:
|
||||
LOGGER.info(f"starting on file {image}")
|
||||
print(input_path)
|
||||
print(local_input_path)
|
||||
image_path = image.path.replace(input_path, local_input_path)
|
||||
|
||||
print(image_path)
|
||||
|
||||
with rio.open(image_path, "r") as img_src:
|
||||
LOGGER.debug(f"opening file {image.name}")
|
||||
dst_meta = img_src.meta
|
||||
|
||||
crs_src = img_src.crs
|
||||
src_shape = img_src.shape
|
||||
src_area = area_sq_km(shp.geometry.box(*img_src.bounds), crs_src)
|
||||
|
||||
# convert the aoi boundary to the images native CRS
|
||||
# shapely is (x,y) coord order, but its (lat, long) for WGS84
|
||||
# so force consistency with always_xy
|
||||
tfmr = pyproj.Transformer.from_crs("epsg:4326", crs_src, always_xy=True)
|
||||
aoi_src = transform(tfmr.transform, aoi)
|
||||
|
||||
# possible changes - better decision making on nodata choices here
|
||||
#! and use better choice than 0 for floats and signed ints
|
||||
data_dst, tfm_dst = rio.mask.mask(
|
||||
img_src, [aoi_src], crop=True, nodata=0
|
||||
)
|
||||
|
||||
dst_meta.update(
|
||||
{
|
||||
"driver": "gtiff",
|
||||
"height": data_dst.shape[1],
|
||||
"width": data_dst.shape[2],
|
||||
"alpha": "unspecified",
|
||||
"nodata": 0,
|
||||
"transform": tfm_dst,
|
||||
}
|
||||
)
|
||||
|
||||
out_meta_str = str(dst_meta).replace("\n", "")
|
||||
LOGGER.debug(f"using options for destination image {out_meta_str}")
|
||||
local_output_path = output_path.replace('/crop', '')
|
||||
rel_local_path = image_path.replace(local_input_path, '')
|
||||
dst_path = f'{local_output_path}/{rel_local_path}'
|
||||
|
||||
with rio.open(dst_path, "w", **dst_meta) as img_dst:
|
||||
img_dst.write(data_dst)
|
||||
|
||||
dst_area = area_sq_km(shp.geometry.box(*img_dst.bounds), crs_src)
|
||||
dst_shape = img_dst.shape
|
||||
|
||||
|
||||
LOGGER.debug(f"source dimensions {src_shape} and area (sq km) {src_area}")
|
||||
LOGGER.debug(f"destination dimensions {dst_shape} and area (sq km) {dst_area}")
|
||||
|
||||
LOGGER.info(f"saved cropped image to {dst_path}")
|
||||
|
||||
|
||||
##########################################################################################
|
||||
# logging
|
||||
##########################################################################################
|
||||
|
||||
|
||||
LOGGER = None
|
||||
|
||||
|
||||
def init_logger(name: str = __name__, level: int = logging.DEBUG) -> logging.Logger:
|
||||
config = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"standard": {"format": "%(asctime)s:[%(levelname)s]:%(name)s:%(message)s"},
|
||||
},
|
||||
"handlers": {
|
||||
f"{name}_hdl": {
|
||||
"level": level,
|
||||
"formatter": "standard",
|
||||
"class": "logging.StreamHandler",
|
||||
# 'stream': 'ext://sys.stdout', # Default is stderr
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
name: {"propagate": False, "handlers": [f"{name}_hdl"], "level": level,},
|
||||
},
|
||||
}
|
||||
logging.config.dictConfig(config)
|
||||
global LOGGER
|
||||
LOGGER = logging.getLogger(name)
|
||||
return LOGGER
|
||||
|
||||
|
||||
def default_logger():
|
||||
if LOGGER is None:
|
||||
init_logger()
|
||||
return LOGGER
|
|
@ -0,0 +1,101 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import argparse, sys
|
||||
from osgeo import gdal
|
||||
import logging
|
||||
|
||||
from pandas import array
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run mosaic function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder containing the source file for cropping')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def mosaic_tifs(input_path: str,
|
||||
output_path: str,
|
||||
files: any):
|
||||
print("file names are listed below")
|
||||
print(files)
|
||||
'''
|
||||
Stitches two or more GeoTiffs into one single large GeoTiff
|
||||
|
||||
Inputs:
|
||||
storage_account_name - Name of the storage account name where the input data resides
|
||||
storage_account_key - Key to the storage account where the input data resides
|
||||
storage_container - Container under which the input data resides
|
||||
src_folder_name - Folder where the input data is stored
|
||||
file_names - Array of input file names (with extension)
|
||||
|
||||
Output:
|
||||
Single large GeoTiff saved into the user specified storage account
|
||||
|
||||
'''
|
||||
gdal.UseExceptions()
|
||||
|
||||
# two or more files to be mosaic'ed are passed as comma separated values
|
||||
files_to_mosaic = [ f"{input_path}/{file}" for file in files ]
|
||||
|
||||
temp_output_path = output_path.replace('/mosaic', '')
|
||||
|
||||
# gdal library's wrap method is called to perform the mosaic'ing
|
||||
g = gdal.Warp(f'{temp_output_path}/output.tif', files_to_mosaic, format="GTiff", options=["COMPRESS=LZW", "TILED=YES"])
|
||||
|
||||
# close file and flush to disk
|
||||
g = None
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
logger = logging.getLogger("image_mosaic")
|
||||
|
||||
# mount storage account container
|
||||
mssparkutils.fs.unmount(f'/{args.storage_container}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net',
|
||||
f'/{args.storage_container}',
|
||||
{"accountKey": args.storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
input_path = f'/synfs/{jobId}/{args.storage_container}/{args.src_folder_name}'
|
||||
output_path = f'/synfs/{jobId}/{args.storage_container}/mosaic'
|
||||
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
|
||||
# list the files in the source folder path under the storage account's container
|
||||
files = mssparkutils.fs.ls(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{args.src_folder_name}')
|
||||
input_files = []
|
||||
for file in files:
|
||||
if not file.isDir and file.name.endswith('.TIF'):
|
||||
input_files.append(file.name)
|
||||
|
||||
print("Starting Mosaicing Process")
|
||||
|
||||
# mosaic method is called
|
||||
mosaic_tifs(input_path, output_path, input_files)
|
||||
|
||||
temp_output_path = output_path.replace('/mosaic', '')
|
||||
|
||||
# final output from mosaic'ing is moved from its temp location in local host
|
||||
# to a permanent persistent storage account container mounted to the host
|
||||
mssparkutils.fs.mv(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/output.tif',
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/mosaic/output.tif',
|
||||
True
|
||||
)
|
||||
|
||||
print("Mosaicing Process Completed")
|
|
@ -0,0 +1,154 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import os, math, argparse
|
||||
from pathlib import Path
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
import shutil
|
||||
import logging
|
||||
from osgeo import gdal
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
Image.MAX_IMAGE_PIXELS = None
|
||||
|
||||
# Collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run tiling function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder where the input data is stored')
|
||||
parser.add_argument('--file_name', type=str, required=True, help='Input file name to be tiled (with extension)')
|
||||
parser.add_argument('--tile_size', type=str, required=True, help='Tile size')
|
||||
|
||||
#Parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
# Define functions
|
||||
def tile_img(input_path: str,
|
||||
output_path: str,
|
||||
file_name: str,
|
||||
tile_size):
|
||||
'''
|
||||
Tiles/chips images into a user defined size using the tile_size parameter
|
||||
|
||||
Inputs:
|
||||
input_path - Name of the storage account name where the input data resides
|
||||
output_path - Key to the storage account where the input data resides
|
||||
file_name - Input file name to be tiled (with extension)
|
||||
tile_size - Tile size
|
||||
|
||||
Output:
|
||||
All image chips saved into the user specified directory
|
||||
|
||||
'''
|
||||
|
||||
gdal.UseExceptions()
|
||||
|
||||
print("Getting tile size")
|
||||
|
||||
tile_size = int(tile_size)
|
||||
|
||||
print(f"Tile size retrieved - {tile_size}")
|
||||
|
||||
try:
|
||||
print("Getting image")
|
||||
img = Image.open(str(Path(input_path) / file_name))
|
||||
print("Image Retrieved")
|
||||
|
||||
print("Determining Tile width")
|
||||
n_tile_width = list(range(0,math.floor(img.size[0]/tile_size)))
|
||||
print(f"Tile width {n_tile_width}")
|
||||
print("Determining Tile height")
|
||||
n_tile_height = list(range(0,math.floor(img.size[1]/tile_size)))
|
||||
print(f"Tile height {n_tile_height}")
|
||||
tile_combinations = [(a,b) for a in n_tile_width for b in n_tile_height]
|
||||
|
||||
print("Processing tiles")
|
||||
for tile_touple in tile_combinations:
|
||||
print("Getting starting coordinates")
|
||||
x_start_point = tile_touple[0]*tile_size
|
||||
y_start_point = tile_touple[1]*tile_size
|
||||
print(f"Got Starting Coordinates - {x_start_point},{y_start_point}")
|
||||
|
||||
print("Cropping Tile")
|
||||
crop_box = (x_start_point, y_start_point, x_start_point+tile_size, y_start_point+tile_size)
|
||||
tile_crop = img.crop(crop_box)
|
||||
print("Tile Cropped")
|
||||
|
||||
print("Getting tile name")
|
||||
img_name = os.path.basename(file_name)
|
||||
tile_name = img_name.rsplit('.',1)
|
||||
tile_name = '.'.join([tile_name[0],'tile',str(tile_touple[0]),str(tile_touple[1]),tile_name[1]])
|
||||
print(f"Retreived Tile name - {tile_name}")
|
||||
|
||||
print(f"Saving Tile - {tile_name}")
|
||||
tile_crop.save(str(Path(output_path) / tile_name))
|
||||
print(f"Saved Tile - {tile_name}")
|
||||
except UnidentifiedImageError:
|
||||
print("File is not an image, copying to destination directory")
|
||||
sourcePath = str(Path(input_path) / img_name)
|
||||
destinationPath = str(Path(output_path) / img_name)
|
||||
|
||||
print(f"Copying file from {sourcePath} to {destinationPath}")
|
||||
shutil.copyfile(sourcePath,destinationPath)
|
||||
print(f"Copied file from {sourcePath} to {destinationPath}")
|
||||
|
||||
|
||||
def process_img_folder(args):
|
||||
'''
|
||||
Function to process all the images in a given source directory
|
||||
|
||||
Input:
|
||||
args - command line Arguments passed to the file
|
||||
|
||||
Output:
|
||||
Nothing returned. Processed images placed in the output directory
|
||||
|
||||
'''
|
||||
for img_name in os.listdir(args.path_to_input_img):
|
||||
|
||||
print('Processing',str(img_name))
|
||||
|
||||
tile_img(args.path_to_input_img, args.path_to_output, img_name, args.tile_size)
|
||||
|
||||
print(f"{img_name} finished processing")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
logger = logging.getLogger("image_convert")
|
||||
|
||||
# mount the storage account
|
||||
mssparkutils.fs.unmount(f'/{args.storage_container}')
|
||||
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net',
|
||||
f'/{args.storage_container}',
|
||||
{"accountKey": args.storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
input_path = f'/synfs/{jobId}/{args.storage_container}/{args.src_folder_name}'
|
||||
output_path = f'/synfs/{jobId}/{args.storage_container}/tiles'
|
||||
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
|
||||
print("Starting Tiling Process")
|
||||
|
||||
# create a placeholder file
|
||||
mssparkutils.fs.put(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/tiles/__processing__.txt', 'started tiling ...', True)
|
||||
|
||||
try:
|
||||
tile_img(input_path, output_path, args.file_name, args.tile_size)
|
||||
mssparkutils.fs.rm(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/tiles/__processing__.txt', True)
|
||||
except:
|
||||
mssparkutils.fs.append(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/tiles/__processing__.txt', 'tiling errored out', True)
|
||||
raise
|
||||
|
||||
print("Tiling Process Completed")
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import shutil
|
||||
import argparse, sys
|
||||
from osgeo import gdal
|
||||
import logging
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
dst_folder_name = 'warp'
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run warp function')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--src_folder_name', default=None, required=True, help='Folder containing the source file for cropping')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def warp(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
file_name: str):
|
||||
|
||||
gdal.UseExceptions()
|
||||
|
||||
# specify options and run Warp
|
||||
kwargs = {'format': 'GTiff', 'dstSRS': '+proj=lcc +datum=WGS84 +lat_1=25 +lat_2=60 +lat_0=42.5 +lon_0=-100 +x_0=0 +y_0=0 +units=m +no_defs', 'srcSRS': '+proj=longlat +datum=WGS84 +no_defs'}
|
||||
ds = gdal.Warp(f'{output_path}/output_warp.tif', f'{input_path}/{file_name}', **kwargs)
|
||||
|
||||
# close file and flush to disk
|
||||
ds = None
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# enable logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
|
||||
)
|
||||
logger = logging.getLogger("image_convert")
|
||||
|
||||
# unmount any previously mounted storage account
|
||||
mssparkutils.fs.unmount(f'/{args.storage_container}')
|
||||
|
||||
# mount the storage account containing data required for this transform
|
||||
mssparkutils.fs.mount(
|
||||
f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net',
|
||||
f'/{args.storage_container}',
|
||||
{"accountKey": args.storage_account_key}
|
||||
)
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
input_path = f'/synfs/{jobId}/{args.storage_container}/{args.src_folder_name}'
|
||||
output_path = f'/synfs/{jobId}/{args.storage_container}/{dst_folder_name}'
|
||||
|
||||
logger.debug(f"input data directory {input_path}")
|
||||
logger.debug(f"output data directory {output_path}")
|
||||
|
||||
# create a temporary placeholder for the folder path to be available
|
||||
mssparkutils.fs.put(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', 'started tiling ...', True)
|
||||
|
||||
try:
|
||||
files = mssparkutils.fs.ls(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{args.src_folder_name}')
|
||||
|
||||
for file in files:
|
||||
if not file.isDir and file.name.endswith('.tif'):
|
||||
warp(input_path, output_path, file.name)
|
||||
|
||||
# clean up the temporary placeholder on successful run
|
||||
mssparkutils.fs.rm(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', True)
|
||||
except:
|
||||
# clean up the temporary placeholder on failed run
|
||||
mssparkutils.fs.append(f'abfss://{args.storage_container}@{args.storage_account_name}.dfs.core.windows.net/{dst_folder_name}/__processing__.txt', 'tiling errored out', True)
|
||||
raise
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
import geopandas
|
||||
from osgeo import gdal
|
||||
from notebookutils import mssparkutils
|
||||
|
||||
# collect args
|
||||
parser = argparse.ArgumentParser(description='Arguments required to run vector feature')
|
||||
parser.add_argument('--storage_account_name', type=str, required=True, help='Name of the storage account name where the input data resides')
|
||||
parser.add_argument('--storage_account_key', default=None, required=True, help='Key to the storage account where the input data resides')
|
||||
parser.add_argument('--storage_account_src_container', type=str, required=True, help='Container under which the input data resides')
|
||||
parser.add_argument('--storage_account_dst_container', default=None, required=True, help='Container where the output data will be saved')
|
||||
parser.add_argument('--file_name', type=str, required=True, help='Input file name to be tiled (with extension)')
|
||||
|
||||
# parse Args
|
||||
args = parser.parse_args()
|
||||
|
||||
def extract_features_from_gpkg(storage_account_name: str, storage_account_key: str, storage_account_src_container: str, src_storage_folder: str, storage_account_dst_container: str, dst_storage_folder: str, file_name: str):
|
||||
|
||||
gdal.UseExceptions()
|
||||
|
||||
# unmount any previously mounted storage container to this path
|
||||
mssparkutils.fs.unmount("/aoi")
|
||||
|
||||
print(f"abfss://{storage_account_dst_container}@{storage_account_name}.dfs.core.windows.net")
|
||||
|
||||
# mount the storage container containing data required for this transform
|
||||
mssparkutils.fs.mount(
|
||||
f"abfss://{storage_account_dst_container}@{storage_account_name}.dfs.core.windows.net",
|
||||
"/aoi",
|
||||
{"accountKey": storage_account_key}
|
||||
)
|
||||
|
||||
# set Storage Account Information for source TIF data
|
||||
gdal.SetConfigOption('AZURE_STORAGE_ACCOUNT', storage_account_name)
|
||||
gdal.SetConfigOption('AZURE_STORAGE_ACCESS_KEY', storage_account_key)
|
||||
|
||||
# specify options and run warp
|
||||
kwargs = {'format': 'GTiff', 'dstSRS': '+proj=lcc +datum=WGS84 +lat_1=25 +lat_2=60 +lat_0=42.5 +lon_0=-100 +x_0=0 +y_0=0 +units=m +no_defs', 'srcSRS': '+proj=longlat +datum=WGS84 +no_defs'}
|
||||
ds = gdal.Warp('output_warp.tif', f'/vsiadls/{storage_account_src_container}/{src_storage_folder}/{file_name}', **kwargs)
|
||||
|
||||
# close file and flush to disk
|
||||
ds = None
|
||||
|
||||
jobId = mssparkutils.env.getJobId()
|
||||
|
||||
# copy the output file from local host to the storage account container
|
||||
# that is mounted to this host
|
||||
shutil.copy("output_warp.tif", f"/synfs/{jobId}/aoi/output_warp.tif")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
extract_features_from_gpkg(args)
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "__synapse_pool_name__",
|
||||
"location": "__location__",
|
||||
"properties": {
|
||||
"autoPause": {
|
||||
"enabled": true,
|
||||
"delayInMinutes": 15
|
||||
},
|
||||
"autoScale": {
|
||||
"enabled": true,
|
||||
"maxNodeCount": 4,
|
||||
"minNodeCount": 3
|
||||
},
|
||||
"nodeCount": 0,
|
||||
"nodeSize": "Medium",
|
||||
"nodeSizeFamily": "MemoryOptimized",
|
||||
"sparkVersion": "3.1",
|
||||
"libraryRequirements": {
|
||||
"content": "name: aoidemo\r\nchannels:\r\n - conda-forge\r\n - defaults\r\ndependencies:\r\n - gdal=3.3.0\r\n - pip>=20.1.1\r\n - azure-storage-file-datalake\r\n - libgdal\r\n - shapely\r\n - pyproj\r\n - pip:\r\n - rasterio\r\n - geopandas",
|
||||
"filename": "environment.yml",
|
||||
"time": "2022-02-22T00:52:46.8995063Z"
|
||||
},
|
||||
"isComputeIsolationEnabled": false,
|
||||
"sparkConfigProperties": {
|
||||
"configurationType": "File",
|
||||
"filename": "config.txt",
|
||||
"content": "spark.storage.synapse.linkedServiceName \"AOI Geospatial v2\"\rfs.azure.account.oauth.provider.type com.microsoft.azure.synapse.tokenlibrary.LinkedServiceBasedTokenProvider",
|
||||
"time": "2022-02-22T00:52:46.8995063Z"
|
||||
},
|
||||
"sessionLevelPackagesEnabled": true,
|
||||
"annotations": []
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
source(allowSchemaDrift: true,
|
||||
validateSchema: false,
|
||||
ignoreNoFilesFound: false,
|
||||
documentForm: 'arrayOfDocuments') ~> source
|
||||
source sink(allowSchemaDrift: true,
|
||||
validateSchema: false,
|
||||
skipDuplicateMapInputs: true,
|
||||
skipDuplicateMapOutputs: true,
|
||||
store: 'cache',
|
||||
format: 'inline',
|
||||
output: true,
|
||||
saveOrder: 1) ~> sink
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"name": "ReadSpecDocumentFlow",
|
||||
"properties": {
|
||||
"type": "MappingDataFlow",
|
||||
"typeProperties": {
|
||||
"sources": [
|
||||
{
|
||||
"dataset": {
|
||||
"referenceName": "spec_doc_specification",
|
||||
"type": "DatasetReference"
|
||||
},
|
||||
"name": "source"
|
||||
}
|
||||
],
|
||||
"sinks": [
|
||||
{
|
||||
"name": "sink"
|
||||
}
|
||||
],
|
||||
"transformations": [],
|
||||
"scriptLines": [
|
||||
"source(allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" ignoreNoFilesFound: false,",
|
||||
" documentForm: 'arrayOfDocuments') ~> source",
|
||||
"source sink(allowSchemaDrift: true,",
|
||||
" validateSchema: false,",
|
||||
" skipDuplicateMapInputs: true,",
|
||||
" skipDuplicateMapOutputs: true,",
|
||||
" store: 'cache',",
|
||||
" format: 'inline',",
|
||||
" output: true,",
|
||||
" saveOrder: 1) ~> sink"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "gld",
|
||||
"properties": {
|
||||
"linkedServiceName": {
|
||||
"referenceName": "AOI Geospatial v2",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"parameters": {
|
||||
"DestinationFolderPath": {
|
||||
"type": "string"
|
||||
},
|
||||
"DestinationContainerName": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"type": "Binary",
|
||||
"typeProperties": {
|
||||
"location": {
|
||||
"type": "AzureBlobFSLocation",
|
||||
"folderPath": {
|
||||
"value": "@dataset().DestinationFolderPath",
|
||||
"type": "Expression"
|
||||
},
|
||||
"fileSystem": {
|
||||
"value": "@dataset().DestinationContainerName",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/datasets"
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"name": "gls",
|
||||
"properties": {
|
||||
"linkedServiceName": {
|
||||
"referenceName": "AOI Geospatial v2 FS",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"annotations": [],
|
||||
"type": "Binary",
|
||||
"typeProperties": {
|
||||
"location": {
|
||||
"type": "AzureFileStorageLocation"
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/datasets"
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "rawtifs",
|
||||
"properties": {
|
||||
"linkedServiceName": {
|
||||
"referenceName": "AOI Data Storage Account v2",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"parameters": {
|
||||
"containername": {
|
||||
"type": "string"
|
||||
},
|
||||
"folderpath": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"type": "Binary",
|
||||
"typeProperties": {
|
||||
"location": {
|
||||
"type": "AzureBlobStorageLocation",
|
||||
"folderPath": {
|
||||
"value": "@dataset().folderpath",
|
||||
"type": "Expression"
|
||||
},
|
||||
"container": {
|
||||
"value": "@dataset().containername",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/datasets"
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
{
|
||||
"name": "spec_doc_specification",
|
||||
"properties": {
|
||||
"linkedServiceName": {
|
||||
"referenceName": "AOI Data Storage Account v2",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"parameters": {
|
||||
"filename": {
|
||||
"type": "string"
|
||||
},
|
||||
"folderpath": {
|
||||
"type": "string"
|
||||
},
|
||||
"containername": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"type": "Json",
|
||||
"typeProperties": {
|
||||
"location": {
|
||||
"type": "AzureBlobStorageLocation",
|
||||
"fileName": {
|
||||
"value": "@dataset().filename",
|
||||
"type": "Expression"
|
||||
},
|
||||
"folderPath": {
|
||||
"value": "@dataset().folderpath",
|
||||
"type": "Expression"
|
||||
},
|
||||
"container": {
|
||||
"value": "@dataset().containername",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema": {}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/datasets"
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"name": "AOI Batch Storage",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureBlobStorage",
|
||||
"typeProperties": {
|
||||
"connectionString": "DefaultEndpointsProtocol=https;AccountName=__batch_storage_account__;EndpointSuffix=core.windows.net;",
|
||||
"accountKey": {
|
||||
"type": "AzureKeyVaultSecret",
|
||||
"store": {
|
||||
"referenceName": "AOI Pipeline Key Vault",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"secretName": "PackageStorageAccountKey"
|
||||
}
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices"
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"name": "AOI Batch",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureBatch",
|
||||
"typeProperties": {
|
||||
"batchUri": "https://__batch_account__.__location__.batch.azure.com",
|
||||
"poolName": "data-cpu-pool",
|
||||
"accountName": "__batch_account__",
|
||||
"linkedServiceName": {
|
||||
"referenceName": "AOI Batch Storage",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"accessKey": {
|
||||
"type": "AzureKeyVaultSecret",
|
||||
"store": {
|
||||
"referenceName": "AOI Pipeline Key Vault",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"secretName": "GeospatialBatchAccountKey"
|
||||
}
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices"
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"name": "AOI Data Storage Account v2",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureBlobStorage",
|
||||
"typeProperties": {
|
||||
"connectionString": "DefaultEndpointsProtocol=https;AccountName=__raw_data_storage_account__;EndpointSuffix=core.windows.net;",
|
||||
"accountKey": {
|
||||
"type": "AzureKeyVaultSecret",
|
||||
"store": {
|
||||
"referenceName": "AOI Pipeline Key Vault",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"secretName": "GeospatialStorageAccountKey"
|
||||
}
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices"
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "AOI Geospatial v2 FS",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureFileStorage",
|
||||
"typeProperties": {
|
||||
"connectionString": "DefaultEndpointsProtocol=https;AccountName=__raw_data_storage_account__;EndpointSuffix=core.windows.net;",
|
||||
"accountKey": {
|
||||
"type": "AzureKeyVaultSecret",
|
||||
"store": {
|
||||
"referenceName": "AOI Pipeline Key Vault",
|
||||
"type": "LinkedServiceReference"
|
||||
},
|
||||
"secretName": "GeospatialStorageAccountKey"
|
||||
},
|
||||
"fileShare": "volume-a"
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
}
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices"
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"name": "AOI Geospatial v2",
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureBlobFS",
|
||||
"typeProperties": {
|
||||
"url": "https://__raw_data_storage_account__.dfs.core.windows.net"
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"name": "AOI Pipeline Key Vault",
|
||||
"type": "Microsoft.Synapse/workspaces/linkedservices",
|
||||
"properties": {
|
||||
"annotations": [],
|
||||
"type": "AzureKeyVault",
|
||||
"typeProperties": {
|
||||
"baseUrl": "https://__linked_key_vault__.vault.azure.net/"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,377 @@
|
|||
{
|
||||
"name": "Custom Vision Model Transforms v2",
|
||||
"properties": {
|
||||
"activities": [
|
||||
{
|
||||
"name": "GetFilesToMosaic",
|
||||
"type": "GetMetadata",
|
||||
"dependsOn": [],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"dataset": {
|
||||
"referenceName": "rawtifs",
|
||||
"type": "DatasetReference",
|
||||
"parameters": {
|
||||
"containername": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
},
|
||||
"folderpath": "raw"
|
||||
}
|
||||
},
|
||||
"fieldList": [
|
||||
"childItems"
|
||||
],
|
||||
"storeSettings": {
|
||||
"type": "AzureBlobStorageReadSettings",
|
||||
"recursive": true,
|
||||
"enablePartitionDiscovery": false
|
||||
},
|
||||
"formatSettings": {
|
||||
"type": "BinaryReadSettings"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Crop",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "More than one GeoTiff",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Crop",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_crop/src/crop.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--storage_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder_name",
|
||||
"@variables('CropSourceFolder')",
|
||||
"--config_file_name",
|
||||
"config-aoi.json"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Convert",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Crop",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Convert",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_convert/src/convert.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--storage_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder_name",
|
||||
"crop",
|
||||
"--config_file_name",
|
||||
"config-img-convert-png.json"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Tiling",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Convert",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Tiling",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_tiling/src/tiling.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--storage_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder_name",
|
||||
"convert",
|
||||
"--file_name",
|
||||
"output.png",
|
||||
"--tile_size",
|
||||
"512",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "More than one GeoTiff",
|
||||
"type": "IfCondition",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "For Each File to Mosaic",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"expression": {
|
||||
"value": "@greater(length(activity('GetFilesToMosaic').output.childItems),1)",
|
||||
"type": "Expression"
|
||||
},
|
||||
"ifFalseActivities": [
|
||||
{
|
||||
"name": "Set Crop Source Folder to raw",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "CropSourceFolder",
|
||||
"value": "raw"
|
||||
}
|
||||
}
|
||||
],
|
||||
"ifTrueActivities": [
|
||||
{
|
||||
"name": "Mosaic",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Mosaic",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_mosaic/src/mosaic.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--storage_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder_name",
|
||||
"raw"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Set Crop Source Folder to mosaic",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Mosaic",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "CropSourceFolder",
|
||||
"value": "mosaic"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "For Each File to Mosaic",
|
||||
"type": "ForEach",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "GetFilesToMosaic",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"items": {
|
||||
"value": "@activity('GetFilesToMosaic').output.childItems",
|
||||
"type": "Expression"
|
||||
},
|
||||
"isSequential": true,
|
||||
"activities": [
|
||||
{
|
||||
"name": "Set Mosaic File Names",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Store Temp Mosaic File Names",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "MosaicFileNames",
|
||||
"value": {
|
||||
"value": "@concat(variables('TempMosaicFileNames'), if(equals(variables('TempMosaicFileNames'), ''),'',','), item().name)",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Store Temp Mosaic File Names",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "TempMosaicFileNames",
|
||||
"value": {
|
||||
"value": "@variables('MosaicFileNames')",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Prefix": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountKey": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
}
|
||||
},
|
||||
"variables": {
|
||||
"FunctionCompleted": {
|
||||
"type": "String",
|
||||
"defaultValue": "None"
|
||||
},
|
||||
"FunctionError": {
|
||||
"type": "String"
|
||||
},
|
||||
"MosaicFileNames": {
|
||||
"type": "String"
|
||||
},
|
||||
"TempMosaicFileNames": {
|
||||
"type": "String"
|
||||
},
|
||||
"CropSourceFolder": {
|
||||
"type": "String"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"lastPublishTime": "2022-03-06T06:06:58Z"
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/pipelines"
|
||||
}
|
|
@ -0,0 +1,581 @@
|
|||
{
|
||||
"name": "Custom Vision Object Detection v2",
|
||||
"properties": {
|
||||
"activities": [
|
||||
{
|
||||
"name": "Wait for Custom Vision",
|
||||
"type": "Until",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"expression": {
|
||||
"value": "@not(equals(string(variables('FunctionCompleted')), 'running'))",
|
||||
"type": "Expression"
|
||||
},
|
||||
"activities": [
|
||||
{
|
||||
"name": "Wait for Custom Vision Check",
|
||||
"type": "Wait",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Set FunctionCompleted Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"waitTimeInSeconds": 30
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Check Status Custom Vision",
|
||||
"type": "WebActivity",
|
||||
"dependsOn": [],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"url": {
|
||||
"value": "@concat('https://',pipeline().parameters.BatchName,'.',pipeline().parameters.BatchLocation,'.batch.azure.com/jobs/',pipeline().parameters.JobName,'/tasks/aoi-cv-task-', pipeline().RunId, '?api-version=2022-01-01.15.0')",
|
||||
"type": "Expression"
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
},
|
||||
"method": "GET",
|
||||
"authentication": {
|
||||
"type": "MSI",
|
||||
"resource": "https://batch.core.windows.net/"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Set FunctionCompleted Custom Vision",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Check Status Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "FunctionCompleted",
|
||||
"value": {
|
||||
"value": "@activity('Check Status Custom Vision').output['state']",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Set FunctionError",
|
||||
"type": "SetVariable",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Check Status Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Failed"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"variableName": "FunctionError",
|
||||
"value": {
|
||||
"value": "@activity('Check Status Custom Vision').output['executionInfo']['failureInfo']",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"timeout": "7.00:00:00"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Custom Vision",
|
||||
"type": "WebActivity",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Copy Config file",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
},
|
||||
{
|
||||
"activity": "Copy Xml From Convert Transform",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"url": {
|
||||
"value": "@concat('https://',pipeline().parameters.BatchName,'.',pipeline().parameters.BatchLocation,'.batch.azure.com/jobs/',pipeline().parameters.JobName,'/tasks?api-version=2020-03-01.11.0')",
|
||||
"type": "Expression"
|
||||
},
|
||||
"connectVia": {
|
||||
"referenceName": "AutoResolveIntegrationRuntime",
|
||||
"type": "IntegrationRuntimeReference"
|
||||
},
|
||||
"method": "POST",
|
||||
"headers": {
|
||||
"Content-type": "application/json; odata=minimalmetadata; charset=utf-8"
|
||||
},
|
||||
"body": {
|
||||
"value": "@json(concat('{\n \"id\": \"aoi-cv-task-', pipeline().RunId, '\",\n \"commandLine\": \"\",\n \"containerSettings\": {\n \"imageName\": \"', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['algImageName'], '\",\n \"containerRunOptions\": \"--rm --workdir / -v /mnt/batch/tasks/fsmounts/S/', pipeline().parameters.Prefix, ':', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['mountedDirectory'], '\"\n },\n \"environmentSettings\": [\n {\n \"name\": \"APP_INPUT_DIR\",\n \"value\": \"/mnt/batch/tasks/fsmounts/S/', pipeline().parameters.Prefix,'/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['submissionDirectory'],'\"\n },\n {\n \"name\": \"APP_OUTPUT_DIR\",\n \"value\": \"/mnt/batch/tasks/fsmounts/S/', pipeline().parameters.Prefix,'/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['resultsDirectory'],'\"\n },\n {\n \"name\": \"APP_CONFIG_DIR\",\n \"value\": \"/mnt/batch/tasks/fsmounts/S/', pipeline().parameters.Prefix,'/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['contextFileName'],'\"\n }\n ]\n}'))",
|
||||
"type": "Expression"
|
||||
},
|
||||
"authentication": {
|
||||
"type": "MSI",
|
||||
"resource": "https://batch.core.windows.net/"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Pool Geolocation",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Copy Xml",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
},
|
||||
{
|
||||
"activity": "Copy Json",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Pool Geolocation",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/pool_geolocation/src/main.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--storage_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder_name",
|
||||
"detections",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--config_file_name",
|
||||
"config-pool-geolocation.json"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Copy Tiles",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Read Spec Document",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Copy noop",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/copy_noop/src/main.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--src_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder",
|
||||
"tiles",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--dst_fileshare",
|
||||
"volume-a",
|
||||
"--dst_folder",
|
||||
"@concat(pipeline().parameters.Prefix,'/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['submissionDirectory'])",
|
||||
"--folders_to_create",
|
||||
"@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['submissionDirectory'])",
|
||||
"--folders_to_create",
|
||||
"@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['resultsDirectory'])",
|
||||
"--folders_to_create",
|
||||
"@concat(pipeline().parameters.Prefix,'/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['logsDirectory'])"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Copy Config file",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Copy Tiles",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Copy noop",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/copy_noop/src/main.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--src_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder",
|
||||
"@concat('config/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['contextFileName'])",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--dst_fileshare",
|
||||
"volume-a",
|
||||
"--dst_folder",
|
||||
"@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['contextFileName'])"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Copy Json",
|
||||
"type": "Copy",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Wait for Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"source": {
|
||||
"type": "BinarySource",
|
||||
"storeSettings": {
|
||||
"type": "AzureFileStorageReadSettings",
|
||||
"recursive": true,
|
||||
"wildcardFolderPath": {
|
||||
"value": "@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['resultsDirectory'], '/json')",
|
||||
"type": "Expression"
|
||||
},
|
||||
"wildcardFileName": "*.json",
|
||||
"deleteFilesAfterCompletion": false
|
||||
},
|
||||
"formatSettings": {
|
||||
"type": "BinaryReadSettings"
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "BinarySink",
|
||||
"storeSettings": {
|
||||
"type": "AzureBlobFSWriteSettings"
|
||||
}
|
||||
},
|
||||
"enableStaging": false
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"referenceName": "gls",
|
||||
"type": "DatasetReference"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"referenceName": "gld",
|
||||
"type": "DatasetReference",
|
||||
"parameters": {
|
||||
"DestinationFolderPath": "detections",
|
||||
"DestinationContainerName": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Copy Xml",
|
||||
"type": "Copy",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Wait for Custom Vision",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"source": {
|
||||
"type": "BinarySource",
|
||||
"storeSettings": {
|
||||
"type": "AzureFileStorageReadSettings",
|
||||
"recursive": true,
|
||||
"wildcardFolderPath": {
|
||||
"value": "@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['resultsDirectory'], '/other')",
|
||||
"type": "Expression"
|
||||
},
|
||||
"wildcardFileName": "*.xml",
|
||||
"deleteFilesAfterCompletion": false
|
||||
},
|
||||
"formatSettings": {
|
||||
"type": "BinaryReadSettings"
|
||||
}
|
||||
},
|
||||
"sink": {
|
||||
"type": "BinarySink",
|
||||
"storeSettings": {
|
||||
"type": "AzureBlobFSWriteSettings"
|
||||
}
|
||||
},
|
||||
"enableStaging": false
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"referenceName": "gls",
|
||||
"type": "DatasetReference"
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"referenceName": "gld",
|
||||
"type": "DatasetReference",
|
||||
"parameters": {
|
||||
"DestinationFolderPath": "detections",
|
||||
"DestinationContainerName": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Read Spec Document",
|
||||
"type": "ExecuteDataFlow",
|
||||
"dependsOn": [],
|
||||
"policy": {
|
||||
"timeout": "1.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"dataflow": {
|
||||
"referenceName": "ReadSpecDocumentFlow",
|
||||
"type": "DataFlowReference",
|
||||
"datasetParameters": {
|
||||
"source": {
|
||||
"filename": "custom_vision_object_detection.json",
|
||||
"folderpath": "config",
|
||||
"containername": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"compute": {
|
||||
"coreCount": 8,
|
||||
"computeType": "General"
|
||||
},
|
||||
"traceLevel": "None",
|
||||
"cacheSinks": {
|
||||
"firstRowOnly": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Copy Xml From Convert Transform",
|
||||
"type": "SparkJob",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Copy Tiles",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"policy": {
|
||||
"timeout": "7.00:00:00",
|
||||
"retry": 0,
|
||||
"retryIntervalInSeconds": 30,
|
||||
"secureOutput": false,
|
||||
"secureInput": false
|
||||
},
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"sparkJob": {
|
||||
"referenceName": "Copy noop",
|
||||
"type": "SparkJobDefinitionReference"
|
||||
},
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/copy_noop/src/main.py",
|
||||
"args": [
|
||||
"--storage_account_name",
|
||||
"@pipeline().parameters.StorageAccountName",
|
||||
"--src_container",
|
||||
"@pipeline().parameters.Prefix",
|
||||
"--src_folder",
|
||||
"convert/output.png.aux.xml",
|
||||
"--storage_account_key",
|
||||
"@pipeline().parameters.StorageAccountKey",
|
||||
"--dst_fileshare",
|
||||
"volume-a",
|
||||
"--dst_folder",
|
||||
"@concat(pipeline().parameters.Prefix, '/', activity('Read Spec Document').output['runStatus'].output.sink.value[0]['submissionDirectory'], '/output.png.aux.xml')"
|
||||
],
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"executorSize": "Medium",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.minExecutors": 2,
|
||||
"spark.dynamicAllocation.maxExecutors": 3
|
||||
},
|
||||
"driverSize": "Medium",
|
||||
"numExecutors": 2
|
||||
}
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Prefix": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"BatchName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"JobName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"BatchLocation": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountKey": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
}
|
||||
},
|
||||
"variables": {
|
||||
"FunctionCompleted": {
|
||||
"type": "String"
|
||||
},
|
||||
"FunctionError": {
|
||||
"type": "String"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"lastPublishTime": "2022-03-06T05:52:44Z"
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/pipelines"
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
{
|
||||
"name": "E2E Custom Vision Model Flow",
|
||||
"properties": {
|
||||
"activities": [
|
||||
{
|
||||
"name": "Transforms",
|
||||
"type": "ExecutePipeline",
|
||||
"dependsOn": [],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"pipeline": {
|
||||
"referenceName": "Custom Vision Model Transforms v2",
|
||||
"type": "PipelineReference"
|
||||
},
|
||||
"waitOnCompletion": true,
|
||||
"parameters": {
|
||||
"Prefix": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
},
|
||||
"StorageAccountName": {
|
||||
"value": "@pipeline().parameters.StorageAccountName",
|
||||
"type": "Expression"
|
||||
},
|
||||
"StorageAccountKey": {
|
||||
"value": "@pipeline().parameters.StorageAccountKey",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Custom Vision Object Detection",
|
||||
"type": "ExecutePipeline",
|
||||
"dependsOn": [
|
||||
{
|
||||
"activity": "Transforms",
|
||||
"dependencyConditions": [
|
||||
"Succeeded"
|
||||
]
|
||||
}
|
||||
],
|
||||
"userProperties": [],
|
||||
"typeProperties": {
|
||||
"pipeline": {
|
||||
"referenceName": "Custom Vision Object Detection v2",
|
||||
"type": "PipelineReference"
|
||||
},
|
||||
"waitOnCompletion": true,
|
||||
"parameters": {
|
||||
"Prefix": {
|
||||
"value": "@pipeline().parameters.Prefix",
|
||||
"type": "Expression"
|
||||
},
|
||||
"BatchName": {
|
||||
"value": "@pipeline().parameters.BatchAccountName",
|
||||
"type": "Expression"
|
||||
},
|
||||
"JobName": {
|
||||
"value": "@pipeline().parameters.BatchJobName",
|
||||
"type": "Expression"
|
||||
},
|
||||
"BatchLocation": {
|
||||
"value": "@pipeline().parameters.BatchLocation",
|
||||
"type": "Expression"
|
||||
},
|
||||
"StorageAccountName": {
|
||||
"value": "@pipeline().parameters.StorageAccountName",
|
||||
"type": "Expression"
|
||||
},
|
||||
"StorageAccountKey": {
|
||||
"value": "@pipeline().parameters.StorageAccountKey",
|
||||
"type": "Expression"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Prefix": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"StorageAccountKey": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"BatchAccountName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"BatchJobName": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
},
|
||||
"BatchLocation": {
|
||||
"type": "string",
|
||||
"defaultValue": ""
|
||||
}
|
||||
},
|
||||
"variables": {
|
||||
"Storage_Account_Conn_String": {
|
||||
"type": "String"
|
||||
}
|
||||
},
|
||||
"annotations": [],
|
||||
"lastPublishTime": "2022-03-06T05:42:39Z"
|
||||
},
|
||||
"type": "Microsoft.Synapse/workspaces/pipelines"
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"publishBranch":"workspace_test"}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"name": "Convert",
|
||||
"properties": {
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"requiredSparkVersion": "3.1",
|
||||
"language": "python",
|
||||
"jobProperties": {
|
||||
"name": "Convert",
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_convert/src/convert.py",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.enabled": "false",
|
||||
"spark.dynamicAllocation.minExecutors": "1",
|
||||
"spark.dynamicAllocation.maxExecutors": "2",
|
||||
"spark.autotune.trackingId": "72aef2fd-aaae-40ed-8a09-7b2e87353ace"
|
||||
},
|
||||
"args": [],
|
||||
"jars": [],
|
||||
"files": [],
|
||||
"driverMemory": "56g",
|
||||
"driverCores": 8,
|
||||
"executorMemory": "56g",
|
||||
"executorCores": 8,
|
||||
"numExecutors": 2
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"name": "Copy noop",
|
||||
"properties": {
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"requiredSparkVersion": "3.1",
|
||||
"language": "python",
|
||||
"jobProperties": {
|
||||
"name": "Copy noop",
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/copy_noop/src/mosaic.py",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.enabled": "false",
|
||||
"spark.dynamicAllocation.minExecutors": "1",
|
||||
"spark.dynamicAllocation.maxExecutors": "2",
|
||||
"spark.autotune.trackingId": "01767b3a-cede-4abf-8b79-52cb6d0ff80d"
|
||||
},
|
||||
"args": [],
|
||||
"jars": [],
|
||||
"files": [],
|
||||
"driverMemory": "56g",
|
||||
"driverCores": 8,
|
||||
"executorMemory": "56g",
|
||||
"executorCores": 8,
|
||||
"numExecutors": 2
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"name": "Crop",
|
||||
"properties": {
|
||||
"targetBigDataPool": {
|
||||
"referenceName": "__synapse_pool_name__",
|
||||
"type": "BigDataPoolReference"
|
||||
},
|
||||
"requiredSparkVersion": "3.1",
|
||||
"language": "python",
|
||||
"jobProperties": {
|
||||
"name": "Crop",
|
||||
"file": "abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_crop/src/crop.py",
|
||||
"conf": {
|
||||
"spark.dynamicAllocation.enabled": "false",
|
||||
"spark.dynamicAllocation.minExecutors": "1",
|
||||
"spark.dynamicAllocation.maxExecutors": "2",
|
||||
"spark.autotune.trackingId": "f4cbbafe-9d98-476f-9bd4-e5bfc7bad06c"
|
||||
},
|
||||
"args": [],
|
||||
"jars": [],
|
||||
"files": [
|
||||
"abfss://spark-jobs@__synapse_storage_account__.dfs.core.windows.net/raster_crop/src/utils.py"
|
||||
],
|
||||
"driverMemory": "56g",
|
||||
"driverCores": 8,
|
||||
"executorMemory": "56g",
|
||||
"executorCores": 8,
|
||||
"numExecutors": 2
|
||||
}
|
||||
}
|
||||
}
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче