This commit is contained in:
xpillons 2021-01-28 23:27:34 +00:00
Родитель e41d6b1f8f
Коммит 7ccc0731f8
14 изменённых файлов: 67 добавлений и 96 удалений

Просмотреть файл

8
.github/workflows/dev.yml поставляемый
Просмотреть файл

@ -23,8 +23,8 @@ env:
TF_CLI_ARGS: '-no-color'
TF_CLI_ARGS_destroy: '-auto-approve -refresh=false'
TF_CLI_ARGS_apply: '-auto-approve'
TF_TEMPLATE_CONFIGURATION: '.github/workflows/deployhpc.yml.tpl'
TF_CONFIGURATION: 'deployhpc.yml'
TF_TEMPLATE_CONFIGURATION: '.github/workflows/config.yml.tpl'
TF_CONFIGURATION: 'config.yml'
ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }}
@ -61,7 +61,7 @@ jobs:
echo "VERSION_TAG=${VERSION_TAG}" >> $GITHUB_ENV
UUID="$(cat /proc/sys/kernel/random/uuid | tr -d '\n-' | tr '[:upper:]' '[:lower:]' | cut -c 1-6)"
RESOURCE_GROUP="deployhpc_$UUID"
RESOURCE_GROUP="azhop_$UUID"
#echo "RESOURCE_GROUP=$RESOURCE_GROUP" >> $GITHUB_ENV
echo "::set-output name=RESOURCE_GROUP::$RESOURCE_GROUP"
@ -78,7 +78,7 @@ jobs:
with:
name: artifact
path: |
deployhpc.yml
azhop.yml
playbooks/inventory
playbooks/group_vars/*
packer/options.json

8
.github/workflows/main.yml поставляемый
Просмотреть файл

@ -23,8 +23,8 @@ env:
TF_CLI_ARGS: '-no-color'
TF_CLI_ARGS_destroy: '-auto-approve -refresh=false'
TF_CLI_ARGS_apply: '-auto-approve'
TF_TEMPLATE_CONFIGURATION: '.github/workflows/deployhpc.yml.tpl'
TF_CONFIGURATION: 'deployhpc.yml'
TF_TEMPLATE_CONFIGURATION: '.github/workflows/config.yml.tpl'
TF_CONFIGURATION: 'config.yml'
ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }}
@ -61,7 +61,7 @@ jobs:
echo "VERSION_TAG=${VERSION_TAG}" >> $GITHUB_ENV
UUID="$(cat /proc/sys/kernel/random/uuid | tr -d '\n-' | tr '[:upper:]' '[:lower:]' | cut -c 1-6)"
RESOURCE_GROUP="deployhpc_$UUID"
RESOURCE_GROUP="azhop_$UUID"
#echo "RESOURCE_GROUP=$RESOURCE_GROUP" >> $GITHUB_ENV
echo "::set-output name=RESOURCE_GROUP::$RESOURCE_GROUP"
@ -78,7 +78,7 @@ jobs:
with:
name: artifact
path: |
deployhpc.yml
azhop.yml
playbooks/inventory
playbooks/group_vars/*
packer/options.json

107
README.md
Просмотреть файл

@ -1,8 +1,19 @@
# DeployHPC, your deployment to be HPC-Ready!
# Azure HPC On demand Platform, your deployment to be HPC-Ready!
DeployHPC provides the end-2-end deployment mechanism for a base HPC infrastructure on Azure. Industry standard tools like Terraform, Ansible and Packer will be used.
Azure HPC On demand Platform, provides the end-2-end deployment mechanism for a base HPC infrastructure on Azure. Industry standard tools like Terraform, Ansible and Packer are used to provision and configure this environment containing :
- An OpenOn Demand Portal for all user access, remote shell access, remote visualization access, job submission, file access and more,
- An Active Directory for user authentication and domain control,
- A PBS Job Scheduler,
- Cycle Cloud 8.1 to handle autoscaling of PBS Nodes thru PBS integration,
- A Jumpbox to provide admin access,
- Azure Netapp Files for home directory and data storage,
- CVMFS over blobs mounted to access the application library,
- A Grafana dashboard to monitor your cluster
## HPC Rover - Setup the toolchain
# Toolchain setup
The toolchain can be setup either from a docker container or locally. See below for instructions regarding the installation.
## HPC Rover - Setup the toolchain from a container
The `HPC Rover` is a docker container acting as a sandbox toolchain development environemnt to avoid impacting the local machine configuration. It is the same container if you are using Windows, Linux or macOS, you only need Visual Studio Code.
@ -24,13 +35,36 @@ Install
* Visual Studio Code version 1.41+ - [link](https://code.visualstudio.com/Download)
* Install Visual Studio Code Extension - Remote Development - [link](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack)
## Setup on Ubuntu (e.g. WSL2)
```
# install terraform
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add -
sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
sudo apt-get update && sudo apt-get install terraform
# install ansible
sudo apt-get install ansible
# These are needed for AD
ansible-galaxy collection install ansible.windows
ansible-galaxy collection install community.windows
# These are needed for OpenOnDemand
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.general
# install python packages
sudo apt-get install python3-pip
pip3 install pypsrp
pip3 install pysocks
```
## Deploying
```
# Login to Azure
az login
# Use the deployhpc.tpl.yml as a template to create the deployhpc.yml file.
# Use the **config.tpl.yml** as a template to create the **config.yml** file.
# Build the whole infrastructure
./build.sh -f ./tf -a apply
@ -60,12 +94,10 @@ grep ondemand_fqdn playbooks/group_vars/all.yml
# From the OnDemand portal, select the menu "Clusters/_my_cluster Shell Access" to open a shell window
# Submit a simple test job
```
qsub -l select=1:slot_type=hb60rs -- bash -c "sleep 60"
qstat
```
# Delete all
./build.sh -f ./tf -a destroy
@ -120,64 +152,3 @@ trademarks or logos is subject to and must follow
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
Any use of third-party trademarks or logos are subject to those third-party's policies.
## Old documentation below => to be deleted
The installation steps consist of:
- prerequirements
Resource group, Virtual network etc.
- base infrastructure
Active Directory, CycleCloud, Scheduler, OpenOndemand and Home-storage
## Pre-requisites
You need the following installed to launch:
* Terraform
* Ansible with the following collections:
- community.windows
- ansible.windows
- ansible.posix
* Python3 with the following packages:
- pypsrp
- pysocks
## Setup on Ubuntu (e.g. WSL2)
```
# install terraform
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add -
sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
sudo apt-get update && sudo apt-get install terraform
# install ansible
sudo apt-get install ansible
# These are needed for AD
ansible-galaxy collection install ansible.windows
ansible-galaxy collection install community.windows
# These are needed for OpenOnDemand
ansible-galaxy collection install ansible.posix
ansible-galaxy collection install community.general
# install python packages
sudo apt-get install python3-pip
pip3 install pypsrp
pip3 install pysocks
```
## TODO: Users
* Create home directory
* SSH config to StictHostKeyChecking false
* Create ssh key
* Copy public key to authorized keys

Просмотреть файл

@ -2,7 +2,7 @@
LOCATION=${1:-westeurope}
RESOURCE_GROUP_NAME=tfstate-$LOCATION
CONTAINER_NAME=tfstate
TFSTATE_FILE=deploy$RANDOM
TFSTATE_FILE=azhop$RANDOM
# Create resource group
az group create --name $RESOURCE_GROUP_NAME --location $LOCATION -o table

Просмотреть файл

@ -1,6 +1,6 @@
---
location: westeurope
resource_group: deployhpc
resource_group: azhop
homefs_size_tb: 4
homedir_mountpoint: /anfhome
admin_user: hpcadmin

Просмотреть файл

@ -5,7 +5,7 @@
[cluster OpenPBS-headless]
FormLayout = selectionpanel
Category = DeployHPC
Category = Azure HPC OnDemand Platform
Autoscale = true

Просмотреть файл

@ -41,7 +41,7 @@ resource "azurerm_windows_virtual_machine" "ad" {
resource "azurerm_key_vault_secret" "admin_password" {
name = format("%s-password", local.admin_username)
value = random_password.password.result
key_vault_id = azurerm_key_vault.deployhpc.id
key_vault_id = azurerm_key_vault.azhop.id
lifecycle {
ignore_changes = [

Просмотреть файл

@ -1,11 +1,11 @@
resource "azurerm_netapp_account" "deployhpc" {
resource "azurerm_netapp_account" "azhop" {
name = "hpcanf-${random_string.resource_postfix.result}"
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
}
resource "azurerm_netapp_pool" "anfpool" {
name = "anfpool-${random_string.resource_postfix.result}"
account_name = azurerm_netapp_account.deployhpc.name
account_name = azurerm_netapp_account.azhop.name
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
service_level = "Standard"
@ -19,7 +19,7 @@ resource "azurerm_netapp_volume" "home" {
name = "anfhome"
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
account_name = azurerm_netapp_account.deployhpc.name
account_name = azurerm_netapp_account.azhop.name
pool_name = azurerm_netapp_pool.anfpool.name
volume_path = "home-${random_string.resource_postfix.result}"
service_level = "Standard"

Просмотреть файл

@ -1,6 +1,6 @@
data "azurerm_client_config" "current" {}
resource "azurerm_key_vault" "deployhpc" {
resource "azurerm_key_vault" "azhop" {
name = format("%s%s", "kv", random_string.resource_postfix.result)
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name

Просмотреть файл

@ -63,8 +63,8 @@ resource "local_file" "public_key" {
# Storage account used for
# - CycleCloud projects
# - Terraform states
resource "azurerm_storage_account" "deployhpc" {
name = "deployhpc${random_string.resource_postfix.result}"
resource "azurerm_storage_account" "azhop" {
name = "azhop${random_string.resource_postfix.result}"
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
account_tier = "Standard"

Просмотреть файл

@ -1,4 +1,4 @@
resource "azurerm_virtual_network" "deployhpc" {
resource "azurerm_virtual_network" "azhop" {
name = "hpcvnet"
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
@ -6,19 +6,19 @@ resource "azurerm_virtual_network" "deployhpc" {
}
resource "azurerm_subnet" "frontend" {
name = "frontend"
virtual_network_name = azurerm_virtual_network.deployhpc.name
virtual_network_name = azurerm_virtual_network.azhop.name
resource_group_name = azurerm_resource_group.rg.name
address_prefixes = ["10.0.0.0/24"]
}
resource "azurerm_subnet" "admin" {
name = "admin"
virtual_network_name = azurerm_virtual_network.deployhpc.name
virtual_network_name = azurerm_virtual_network.azhop.name
resource_group_name = azurerm_resource_group.rg.name
address_prefixes = ["10.0.1.0/24"]
}
resource "azurerm_subnet" "netapp" {
name = "netapp"
virtual_network_name = azurerm_virtual_network.deployhpc.name
virtual_network_name = azurerm_virtual_network.azhop.name
resource_group_name = azurerm_resource_group.rg.name
address_prefixes = ["10.0.2.0/24"]
delegation {
@ -32,13 +32,13 @@ resource "azurerm_subnet" "netapp" {
}
# resource "azurerm_subnet" "bastion" {
# name = "AzureBastionSubnet"
# virtual_network_name = azurerm_virtual_network.deployhpc.name
# virtual_network_name = azurerm_virtual_network.azhop.name
# resource_group_name = azurerm_resource_group.rg.name
# address_prefixes = ["10.0.3.0/24"]
#}
resource "azurerm_subnet" "compute" {
name = "compute"
virtual_network_name = azurerm_virtual_network.deployhpc.name
virtual_network_name = azurerm_virtual_network.azhop.name
resource_group_name = azurerm_resource_group.rg.name
address_prefixes = ["10.0.16.0/20"]
}

Просмотреть файл

@ -20,7 +20,7 @@ resource "local_file" "global_variables" {
{
admin_username = local.admin_username
ssh_public_key = tls_private_key.internal.public_key_openssh
cc_storage = azurerm_storage_account.deployhpc.name
cc_storage = azurerm_storage_account.azhop.name
region = local.location
resource_group = local.resource_group
config_file = local.configuration_file
@ -30,7 +30,7 @@ resource "local_file" "global_variables" {
anf-home-path = azurerm_netapp_volume.home.volume_path
ondemand-fqdn = azurerm_public_ip.ondemand-pip.fqdn
subscription_id = data.azurerm_subscription.primary.subscription_id
key_vault = azurerm_key_vault.deployhpc.name
key_vault = azurerm_key_vault.azhop.name
}
)
filename = "${local.playbook_root_dir}/group_vars/all.yml"
@ -51,7 +51,7 @@ resource "local_file" "connect_script" {
resource "local_file" "get_secret_script" {
sensitive_content = templatefile("${local.playbooks_template_dir}/get_secret.tmpl",
{
key_vault = azurerm_key_vault.deployhpc.name
key_vault = azurerm_key_vault.azhop.name
admin_user = local.admin_username
}
)

Просмотреть файл

@ -2,7 +2,7 @@ locals {
packer_root_dir = "${path.root}/../packer"
playbook_root_dir = "${path.root}/../playbooks"
playbooks_template_dir = "${path.root}/templates"
configuration_file="${path.root}/../deployhpc.yml"
configuration_file="${path.root}/../config.yml"
configuration_yml=yamldecode(file(local.configuration_file))
location = local.configuration_yml["location"]