From 1fb38f12bc9bc1bcaf782984349affd3fd6e708e Mon Sep 17 00:00:00 2001 From: JonShelley Date: Tue, 3 Dec 2019 21:29:32 +0000 Subject: [PATCH 1/2] Added an example for a peer network cluster setup --- .../simple_hpc_pbs_peer_network/config.json | 118 ++++++++++++++++++ .../simple_hpc_pbs_peer_network/readme.md | 65 ++++++++++ 2 files changed, 183 insertions(+) create mode 100644 examples/simple_hpc_pbs_peer_network/config.json create mode 100644 examples/simple_hpc_pbs_peer_network/readme.md diff --git a/examples/simple_hpc_pbs_peer_network/config.json b/examples/simple_hpc_pbs_peer_network/config.json new file mode 100644 index 00000000..84a19550 --- /dev/null +++ b/examples/simple_hpc_pbs_peer_network/config.json @@ -0,0 +1,118 @@ +{ + "location": "variables.location", + "resource_group": "variables.resource_group", + "install_from": "headnode", + "admin_user": "hpcadmin", + "variables": { + "hpc_image": "OpenLogic:CentOS-HPC:7.6:latest", + "location": "southcentralus", + "resource_group": "", + "vm_type": "Standard_HB60rs", + "compute_instances": 2, + "vnet_resource_group": "variables.resource_group", + "peer_network_resource_group": "", + "peer_vnet_name": "" + }, + "vnet": { + "resource_group": "variables.vnet_resource_group", + "name": "hpcvnet", + "address_prefix": "10.2.0.0/20", + "subnets": { + "compute": "10.2.4.0/22" + }, + "peer": { + "Network1": { + "resource_group": "variables.peer_network_resource_group", + "vnet_name": "variables.peer_vnet_name" + } + } + }, + "resources": { + "headnode": { + "type": "vm", + "vm_type": "variables.vm_type", + "public_ip": true, + "image": "variables.hpc_image", + "subnet": "compute", + "tags": [ + "cndefault", + "nfsserver", + "pbsserver", + "loginnode", + "localuser", + "disable-selinux" + ] + }, + "compute": { + "type": "vmss", + "vm_type": "variables.vm_type", + "instances": "variables.compute_instances", + "image": "variables.hpc_image", + "subnet": "compute", + "tags": [ + "nfsclient", + "pbsclient", + "cndefault", + "localuser", + "disable-selinux" + ] + } + }, + "install": [ + { + "script": "disable-selinux.sh", + "tag": "disable-selinux", + "sudo": true + }, + { + "script": "cndefault.sh", + "tag": "cndefault", + "sudo": true + }, + { + "script": "nfsserver.sh", + "tag": "nfsserver", + "sudo": true + }, + { + "script": "nfsclient.sh", + "args": [ + "$( Note: You can still update variables even if they are already set. For example, in the command below we change the region to `westus2` and the SKU to `Standard_HC44rs`: + +``` +azhpc-init -c $azhpc_dir/examples/simple_hpc_pbs_peer_network -d simple_hpc_pbs_peer_network -v location=westus2,vm_type=Standard_HC44rs,resource_group=azhpc-cluster,peer_network_resource_group=some_name,peer_vnet_name=some_name +``` + +## Create the cluster + +``` +cd simple_hpc_pbs_peer_network +azhpc-build +``` + +Allow ~10 minutes for deployment. You are able to view the status VMs being deployed by running `azhpc-status` in another terminal. + +## Log in the cluster + +Connect to the headnode and check PBS and NFS + +``` +$ azhpc-connect -u hpcuser headnode +Fri Jun 28 09:18:04 UTC 2019 : logging in to headnode (via headnode6cfe86.westus2.cloudapp.azure.com) +[hpcuser@headnode ~]$ pbsnodes -avS +vnode state OS hardware host queue mem ncpus nmics ngpus comment +--------------- --------------- -------- -------- --------------- ---------- -------- ------- ------- ------- --------- +compuc407000003 free -- -- 10.2.4.8 -- 224gb 60 0 0 -- +compuc407000002 free -- -- 10.2.4.7 -- 224gb 60 0 0 -- +[hpcuser@headnode ~]$ sudo exportfs -v +/share/apps (sync,wdelay,hide,no_subtree_check,sec=sys,rw,secure,root_squash,no_all_squash) +/share/data (sync,wdelay,hide,no_subtree_check,sec=sys,rw,secure,root_squash,no_all_squash) +/share/home (sync,wdelay,hide,no_subtree_check,sec=sys,rw,secure,root_squash,no_all_squash) +/mnt/resource/scratch + (sync,wdelay,hide,no_subtree_check,sec=sys,rw,secure,root_squash,no_all_squash) +[hpcuser@headnode ~]$ +``` + +To check the state of the cluster you can run the following commands + +``` +azhpc-connect -u hpcuser headnode +qstat -Q +pbsnodes -avS +df -h +``` + From a2a33c29356c1a4e32134e3fd914d166ab68cd5e Mon Sep 17 00:00:00 2001 From: JonShelley Date: Wed, 4 Dec 2019 17:52:13 +0000 Subject: [PATCH 2/2] updated config file based on pull request feedback --- examples/simple_hpc_pbs_peer_network/config.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/simple_hpc_pbs_peer_network/config.json b/examples/simple_hpc_pbs_peer_network/config.json index 84a19550..311101a8 100644 --- a/examples/simple_hpc_pbs_peer_network/config.json +++ b/examples/simple_hpc_pbs_peer_network/config.json @@ -4,14 +4,15 @@ "install_from": "headnode", "admin_user": "hpcadmin", "variables": { - "hpc_image": "OpenLogic:CentOS-HPC:7.6:latest", + "hpc_image": "OpenLogic:CentOS-HPC:7.7:latest", "location": "southcentralus", "resource_group": "", - "vm_type": "Standard_HB60rs", + "vm_type": "Standard_D8_v3", + "compute_vm_type": "Standard_HB60rs", "compute_instances": 2, "vnet_resource_group": "variables.resource_group", - "peer_network_resource_group": "", - "peer_vnet_name": "" + "peer_network_resource_group": "", + "peer_vnet_name": "" }, "vnet": { "resource_group": "variables.vnet_resource_group", @@ -45,7 +46,7 @@ }, "compute": { "type": "vmss", - "vm_type": "variables.vm_type", + "vm_type": "variables.compute_vm_type", "instances": "variables.compute_instances", "image": "variables.hpc_image", "subnet": "compute",