зеркало из https://github.com/Azure/az-hop.git
rename deployhpc to azhop
This commit is contained in:
Родитель
e41d6b1f8f
Коммит
7ccc0731f8
|
@ -23,8 +23,8 @@ env:
|
|||
TF_CLI_ARGS: '-no-color'
|
||||
TF_CLI_ARGS_destroy: '-auto-approve -refresh=false'
|
||||
TF_CLI_ARGS_apply: '-auto-approve'
|
||||
TF_TEMPLATE_CONFIGURATION: '.github/workflows/deployhpc.yml.tpl'
|
||||
TF_CONFIGURATION: 'deployhpc.yml'
|
||||
TF_TEMPLATE_CONFIGURATION: '.github/workflows/config.yml.tpl'
|
||||
TF_CONFIGURATION: 'config.yml'
|
||||
ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
|
||||
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
|
||||
ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }}
|
||||
|
@ -61,7 +61,7 @@ jobs:
|
|||
echo "VERSION_TAG=${VERSION_TAG}" >> $GITHUB_ENV
|
||||
|
||||
UUID="$(cat /proc/sys/kernel/random/uuid | tr -d '\n-' | tr '[:upper:]' '[:lower:]' | cut -c 1-6)"
|
||||
RESOURCE_GROUP="deployhpc_$UUID"
|
||||
RESOURCE_GROUP="azhop_$UUID"
|
||||
#echo "RESOURCE_GROUP=$RESOURCE_GROUP" >> $GITHUB_ENV
|
||||
echo "::set-output name=RESOURCE_GROUP::$RESOURCE_GROUP"
|
||||
|
||||
|
@ -78,7 +78,7 @@ jobs:
|
|||
with:
|
||||
name: artifact
|
||||
path: |
|
||||
deployhpc.yml
|
||||
azhop.yml
|
||||
playbooks/inventory
|
||||
playbooks/group_vars/*
|
||||
packer/options.json
|
||||
|
|
|
@ -23,8 +23,8 @@ env:
|
|||
TF_CLI_ARGS: '-no-color'
|
||||
TF_CLI_ARGS_destroy: '-auto-approve -refresh=false'
|
||||
TF_CLI_ARGS_apply: '-auto-approve'
|
||||
TF_TEMPLATE_CONFIGURATION: '.github/workflows/deployhpc.yml.tpl'
|
||||
TF_CONFIGURATION: 'deployhpc.yml'
|
||||
TF_TEMPLATE_CONFIGURATION: '.github/workflows/config.yml.tpl'
|
||||
TF_CONFIGURATION: 'config.yml'
|
||||
ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }}
|
||||
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
|
||||
ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }}
|
||||
|
@ -61,7 +61,7 @@ jobs:
|
|||
echo "VERSION_TAG=${VERSION_TAG}" >> $GITHUB_ENV
|
||||
|
||||
UUID="$(cat /proc/sys/kernel/random/uuid | tr -d '\n-' | tr '[:upper:]' '[:lower:]' | cut -c 1-6)"
|
||||
RESOURCE_GROUP="deployhpc_$UUID"
|
||||
RESOURCE_GROUP="azhop_$UUID"
|
||||
#echo "RESOURCE_GROUP=$RESOURCE_GROUP" >> $GITHUB_ENV
|
||||
echo "::set-output name=RESOURCE_GROUP::$RESOURCE_GROUP"
|
||||
|
||||
|
@ -78,7 +78,7 @@ jobs:
|
|||
with:
|
||||
name: artifact
|
||||
path: |
|
||||
deployhpc.yml
|
||||
azhop.yml
|
||||
playbooks/inventory
|
||||
playbooks/group_vars/*
|
||||
packer/options.json
|
||||
|
|
107
README.md
107
README.md
|
@ -1,8 +1,19 @@
|
|||
# DeployHPC, your deployment to be HPC-Ready!
|
||||
# Azure HPC On demand Platform, your deployment to be HPC-Ready!
|
||||
|
||||
DeployHPC provides the end-2-end deployment mechanism for a base HPC infrastructure on Azure. Industry standard tools like Terraform, Ansible and Packer will be used.
|
||||
Azure HPC On demand Platform, provides the end-2-end deployment mechanism for a base HPC infrastructure on Azure. Industry standard tools like Terraform, Ansible and Packer are used to provision and configure this environment containing :
|
||||
- An OpenOn Demand Portal for all user access, remote shell access, remote visualization access, job submission, file access and more,
|
||||
- An Active Directory for user authentication and domain control,
|
||||
- A PBS Job Scheduler,
|
||||
- Cycle Cloud 8.1 to handle autoscaling of PBS Nodes thru PBS integration,
|
||||
- A Jumpbox to provide admin access,
|
||||
- Azure Netapp Files for home directory and data storage,
|
||||
- CVMFS over blobs mounted to access the application library,
|
||||
- A Grafana dashboard to monitor your cluster
|
||||
|
||||
## HPC Rover - Setup the toolchain
|
||||
# Toolchain setup
|
||||
The toolchain can be setup either from a docker container or locally. See below for instructions regarding the installation.
|
||||
|
||||
## HPC Rover - Setup the toolchain from a container
|
||||
|
||||
The `HPC Rover` is a docker container acting as a sandbox toolchain development environemnt to avoid impacting the local machine configuration. It is the same container if you are using Windows, Linux or macOS, you only need Visual Studio Code.
|
||||
|
||||
|
@ -24,13 +35,36 @@ Install
|
|||
* Visual Studio Code version 1.41+ - [link](https://code.visualstudio.com/Download)
|
||||
* Install Visual Studio Code Extension - Remote Development - [link](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack)
|
||||
|
||||
## Setup on Ubuntu (e.g. WSL2)
|
||||
|
||||
```
|
||||
# install terraform
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add -
|
||||
sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
|
||||
sudo apt-get update && sudo apt-get install terraform
|
||||
|
||||
# install ansible
|
||||
sudo apt-get install ansible
|
||||
# These are needed for AD
|
||||
ansible-galaxy collection install ansible.windows
|
||||
ansible-galaxy collection install community.windows
|
||||
# These are needed for OpenOnDemand
|
||||
ansible-galaxy collection install ansible.posix
|
||||
ansible-galaxy collection install community.general
|
||||
|
||||
# install python packages
|
||||
sudo apt-get install python3-pip
|
||||
pip3 install pypsrp
|
||||
pip3 install pysocks
|
||||
```
|
||||
|
||||
## Deploying
|
||||
|
||||
```
|
||||
# Login to Azure
|
||||
az login
|
||||
|
||||
# Use the deployhpc.tpl.yml as a template to create the deployhpc.yml file.
|
||||
# Use the **config.tpl.yml** as a template to create the **config.yml** file.
|
||||
|
||||
# Build the whole infrastructure
|
||||
./build.sh -f ./tf -a apply
|
||||
|
@ -60,12 +94,10 @@ grep ondemand_fqdn playbooks/group_vars/all.yml
|
|||
# From the OnDemand portal, select the menu "Clusters/_my_cluster Shell Access" to open a shell window
|
||||
# Submit a simple test job
|
||||
|
||||
```
|
||||
|
||||
qsub -l select=1:slot_type=hb60rs -- bash -c "sleep 60"
|
||||
qstat
|
||||
|
||||
```
|
||||
|
||||
# Delete all
|
||||
./build.sh -f ./tf -a destroy
|
||||
|
||||
|
@ -120,64 +152,3 @@ trademarks or logos is subject to and must follow
|
|||
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
|
||||
Any use of third-party trademarks or logos are subject to those third-party's policies.
|
||||
|
||||
|
||||
|
||||
|
||||
## Old documentation below => to be deleted
|
||||
|
||||
|
||||
|
||||
The installation steps consist of:
|
||||
- prerequirements
|
||||
Resource group, Virtual network etc.
|
||||
- base infrastructure
|
||||
Active Directory, CycleCloud, Scheduler, OpenOndemand and Home-storage
|
||||
|
||||
|
||||
## Pre-requisites
|
||||
|
||||
You need the following installed to launch:
|
||||
|
||||
* Terraform
|
||||
* Ansible with the following collections:
|
||||
- community.windows
|
||||
- ansible.windows
|
||||
- ansible.posix
|
||||
* Python3 with the following packages:
|
||||
- pypsrp
|
||||
- pysocks
|
||||
|
||||
|
||||
## Setup on Ubuntu (e.g. WSL2)
|
||||
|
||||
```
|
||||
# install terraform
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo apt-key add -
|
||||
sudo apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
|
||||
sudo apt-get update && sudo apt-get install terraform
|
||||
|
||||
# install ansible
|
||||
sudo apt-get install ansible
|
||||
# These are needed for AD
|
||||
ansible-galaxy collection install ansible.windows
|
||||
ansible-galaxy collection install community.windows
|
||||
# These are needed for OpenOnDemand
|
||||
ansible-galaxy collection install ansible.posix
|
||||
ansible-galaxy collection install community.general
|
||||
|
||||
# install python packages
|
||||
sudo apt-get install python3-pip
|
||||
pip3 install pypsrp
|
||||
pip3 install pysocks
|
||||
```
|
||||
|
||||
|
||||
|
||||
## TODO: Users
|
||||
|
||||
* Create home directory
|
||||
* SSH config to StictHostKeyChecking false
|
||||
* Create ssh key
|
||||
* Copy public key to authorized keys
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
LOCATION=${1:-westeurope}
|
||||
RESOURCE_GROUP_NAME=tfstate-$LOCATION
|
||||
CONTAINER_NAME=tfstate
|
||||
TFSTATE_FILE=deploy$RANDOM
|
||||
TFSTATE_FILE=azhop$RANDOM
|
||||
|
||||
# Create resource group
|
||||
az group create --name $RESOURCE_GROUP_NAME --location $LOCATION -o table
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
location: westeurope
|
||||
resource_group: deployhpc
|
||||
resource_group: azhop
|
||||
homefs_size_tb: 4
|
||||
homedir_mountpoint: /anfhome
|
||||
admin_user: hpcadmin
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
[cluster OpenPBS-headless]
|
||||
FormLayout = selectionpanel
|
||||
Category = DeployHPC
|
||||
Category = Azure HPC OnDemand Platform
|
||||
|
||||
Autoscale = true
|
||||
|
||||
|
|
2
tf/ad.tf
2
tf/ad.tf
|
@ -41,7 +41,7 @@ resource "azurerm_windows_virtual_machine" "ad" {
|
|||
resource "azurerm_key_vault_secret" "admin_password" {
|
||||
name = format("%s-password", local.admin_username)
|
||||
value = random_password.password.result
|
||||
key_vault_id = azurerm_key_vault.deployhpc.id
|
||||
key_vault_id = azurerm_key_vault.azhop.id
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
resource "azurerm_netapp_account" "deployhpc" {
|
||||
resource "azurerm_netapp_account" "azhop" {
|
||||
name = "hpcanf-${random_string.resource_postfix.result}"
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
location = azurerm_resource_group.rg.location
|
||||
}
|
||||
resource "azurerm_netapp_pool" "anfpool" {
|
||||
name = "anfpool-${random_string.resource_postfix.result}"
|
||||
account_name = azurerm_netapp_account.deployhpc.name
|
||||
account_name = azurerm_netapp_account.azhop.name
|
||||
location = azurerm_resource_group.rg.location
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
service_level = "Standard"
|
||||
|
@ -19,7 +19,7 @@ resource "azurerm_netapp_volume" "home" {
|
|||
name = "anfhome"
|
||||
location = azurerm_resource_group.rg.location
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
account_name = azurerm_netapp_account.deployhpc.name
|
||||
account_name = azurerm_netapp_account.azhop.name
|
||||
pool_name = azurerm_netapp_pool.anfpool.name
|
||||
volume_path = "home-${random_string.resource_postfix.result}"
|
||||
service_level = "Standard"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
data "azurerm_client_config" "current" {}
|
||||
|
||||
resource "azurerm_key_vault" "deployhpc" {
|
||||
resource "azurerm_key_vault" "azhop" {
|
||||
name = format("%s%s", "kv", random_string.resource_postfix.result)
|
||||
location = azurerm_resource_group.rg.location
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
|
|
|
@ -63,8 +63,8 @@ resource "local_file" "public_key" {
|
|||
# Storage account used for
|
||||
# - CycleCloud projects
|
||||
# - Terraform states
|
||||
resource "azurerm_storage_account" "deployhpc" {
|
||||
name = "deployhpc${random_string.resource_postfix.result}"
|
||||
resource "azurerm_storage_account" "azhop" {
|
||||
name = "azhop${random_string.resource_postfix.result}"
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
location = azurerm_resource_group.rg.location
|
||||
account_tier = "Standard"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
resource "azurerm_virtual_network" "deployhpc" {
|
||||
resource "azurerm_virtual_network" "azhop" {
|
||||
name = "hpcvnet"
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
location = azurerm_resource_group.rg.location
|
||||
|
@ -6,19 +6,19 @@ resource "azurerm_virtual_network" "deployhpc" {
|
|||
}
|
||||
resource "azurerm_subnet" "frontend" {
|
||||
name = "frontend"
|
||||
virtual_network_name = azurerm_virtual_network.deployhpc.name
|
||||
virtual_network_name = azurerm_virtual_network.azhop.name
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
address_prefixes = ["10.0.0.0/24"]
|
||||
}
|
||||
resource "azurerm_subnet" "admin" {
|
||||
name = "admin"
|
||||
virtual_network_name = azurerm_virtual_network.deployhpc.name
|
||||
virtual_network_name = azurerm_virtual_network.azhop.name
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
address_prefixes = ["10.0.1.0/24"]
|
||||
}
|
||||
resource "azurerm_subnet" "netapp" {
|
||||
name = "netapp"
|
||||
virtual_network_name = azurerm_virtual_network.deployhpc.name
|
||||
virtual_network_name = azurerm_virtual_network.azhop.name
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
address_prefixes = ["10.0.2.0/24"]
|
||||
delegation {
|
||||
|
@ -32,13 +32,13 @@ resource "azurerm_subnet" "netapp" {
|
|||
}
|
||||
# resource "azurerm_subnet" "bastion" {
|
||||
# name = "AzureBastionSubnet"
|
||||
# virtual_network_name = azurerm_virtual_network.deployhpc.name
|
||||
# virtual_network_name = azurerm_virtual_network.azhop.name
|
||||
# resource_group_name = azurerm_resource_group.rg.name
|
||||
# address_prefixes = ["10.0.3.0/24"]
|
||||
#}
|
||||
resource "azurerm_subnet" "compute" {
|
||||
name = "compute"
|
||||
virtual_network_name = azurerm_virtual_network.deployhpc.name
|
||||
virtual_network_name = azurerm_virtual_network.azhop.name
|
||||
resource_group_name = azurerm_resource_group.rg.name
|
||||
address_prefixes = ["10.0.16.0/20"]
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ resource "local_file" "global_variables" {
|
|||
{
|
||||
admin_username = local.admin_username
|
||||
ssh_public_key = tls_private_key.internal.public_key_openssh
|
||||
cc_storage = azurerm_storage_account.deployhpc.name
|
||||
cc_storage = azurerm_storage_account.azhop.name
|
||||
region = local.location
|
||||
resource_group = local.resource_group
|
||||
config_file = local.configuration_file
|
||||
|
@ -30,7 +30,7 @@ resource "local_file" "global_variables" {
|
|||
anf-home-path = azurerm_netapp_volume.home.volume_path
|
||||
ondemand-fqdn = azurerm_public_ip.ondemand-pip.fqdn
|
||||
subscription_id = data.azurerm_subscription.primary.subscription_id
|
||||
key_vault = azurerm_key_vault.deployhpc.name
|
||||
key_vault = azurerm_key_vault.azhop.name
|
||||
}
|
||||
)
|
||||
filename = "${local.playbook_root_dir}/group_vars/all.yml"
|
||||
|
@ -51,7 +51,7 @@ resource "local_file" "connect_script" {
|
|||
resource "local_file" "get_secret_script" {
|
||||
sensitive_content = templatefile("${local.playbooks_template_dir}/get_secret.tmpl",
|
||||
{
|
||||
key_vault = azurerm_key_vault.deployhpc.name
|
||||
key_vault = azurerm_key_vault.azhop.name
|
||||
admin_user = local.admin_username
|
||||
}
|
||||
)
|
||||
|
|
|
@ -2,7 +2,7 @@ locals {
|
|||
packer_root_dir = "${path.root}/../packer"
|
||||
playbook_root_dir = "${path.root}/../playbooks"
|
||||
playbooks_template_dir = "${path.root}/templates"
|
||||
configuration_file="${path.root}/../deployhpc.yml"
|
||||
configuration_file="${path.root}/../config.yml"
|
||||
configuration_yml=yamldecode(file(local.configuration_file))
|
||||
|
||||
location = local.configuration_yml["location"]
|
||||
|
|
Загрузка…
Ссылка в новой задаче