Hadoop Support multiple data disks (#416)
* Remove apiVersion, use mikes RAID code. * Add numberDataDisks to parameters.json * resource deployments need api version * Worker DataDisk names * Naming bug * -_- * Type * limits were not set correctly.
This commit is contained in:
Родитель
1f5577c2ef
Коммит
35c70483ae
|
@ -16,7 +16,7 @@
|
|||
operations.
|
||||
</description>
|
||||
<name>io.file.buffer.size</name>
|
||||
<value>131072</value>
|
||||
<value>32768</value>
|
||||
</property>
|
||||
<property>
|
||||
<description>
|
||||
|
|
|
@ -102,6 +102,15 @@
|
|||
"description": "Size of the attached data disks in GB."
|
||||
}
|
||||
},
|
||||
"numberDataDisks" : {
|
||||
"type": "int",
|
||||
"defaultValue": 1,
|
||||
"minValue": 1,
|
||||
"maxValue": 32,
|
||||
"metadata": {
|
||||
"description": "The number of data disks to attach to each VM."
|
||||
}
|
||||
},
|
||||
"batchSize": {
|
||||
"type": "int",
|
||||
"defaultValue": 10,
|
||||
|
@ -144,7 +153,6 @@
|
|||
"comments": "Storage account which holds all containers and logs.",
|
||||
"type": "Microsoft.Storage/storageAccounts",
|
||||
"name": "[toLower(variables('storageAccountName'))]",
|
||||
"apiVersion": "2016-01-01",
|
||||
"location": "[variables('location')]",
|
||||
"properties": {
|
||||
"accountType": "[parameters('storageAccountType')]"
|
||||
|
@ -152,7 +160,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "Virtual network in which the nodes will reside.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/virtualNetworks",
|
||||
"name": "[variables('virtualNetworkName')]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -174,7 +181,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "The public IP addresses for NAT.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/publicIPAddresses",
|
||||
"name": "[variables('publicIPAddressName')]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -188,7 +194,6 @@
|
|||
{
|
||||
"type": "Microsoft.Compute/availabilitySets",
|
||||
"name": "hadoop",
|
||||
"apiVersion": "2016-03-30",
|
||||
"location": "[resourceGroup().location]",
|
||||
"properties": {
|
||||
"platformFaultDomainCount": 1,
|
||||
|
@ -197,7 +202,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "Network security group for the network.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/networkSecurityGroups",
|
||||
"name": "[variables('networkSecurityGroupName')]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -261,7 +265,6 @@
|
|||
{
|
||||
"name": "[variables('loadBalancerName')]",
|
||||
"type": "Microsoft.Network/loadBalancers",
|
||||
"apiVersion": "2015-06-15",
|
||||
"location": "[variables('location')]",
|
||||
"properties": {
|
||||
"frontendIPConfigurations": [
|
||||
|
@ -335,9 +338,9 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"apiVersion": "2016-02-01",
|
||||
"name": "workerNodeTemplate",
|
||||
"type": "Microsoft.Resources/deployments",
|
||||
"apiVersion": "2015-01-01",
|
||||
"dependsOn": [
|
||||
"[variables('storageAccountName')]",
|
||||
"[variables('loadBalancerName')]"
|
||||
|
@ -364,6 +367,9 @@
|
|||
"numberWorkerNodes": {
|
||||
"value": "[parameters('numberWorkerNodes')]"
|
||||
},
|
||||
"numberDataDisks" : {
|
||||
"value": "[parameters('numberDataDisks')]"
|
||||
},
|
||||
"dataDiskSize": {
|
||||
"value": "[parameters('dataDiskSize')]"
|
||||
},
|
||||
|
@ -392,9 +398,9 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"apiVersion": "2016-02-01",
|
||||
"name": "masterNodeTemplate",
|
||||
"type": "Microsoft.Resources/deployments",
|
||||
"apiVersion": "2015-01-01",
|
||||
"dependsOn": [
|
||||
"[variables('storageAccountName')]",
|
||||
"[variables('loadBalancerName')]"
|
||||
|
@ -446,9 +452,9 @@
|
|||
}
|
||||
},
|
||||
{
|
||||
"apiVersion": "2016-02-01",
|
||||
"name": "jumpBoxTemplate",
|
||||
"type": "Microsoft.Resources/deployments",
|
||||
"apiVersion": "2015-01-01",
|
||||
"dependsOn": [
|
||||
"masterNodeTemplate",
|
||||
"workerNodeTemplate"
|
||||
|
|
|
@ -56,7 +56,6 @@
|
|||
"resources": [
|
||||
{
|
||||
"comments": "Network interfaces for jumpbox.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/networkInterfaces",
|
||||
"name": "[variables('nicName')]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -89,7 +88,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "Jump box node.",
|
||||
"apiVersion": "2016-03-30",
|
||||
"type": "Microsoft.Compute/virtualMachines",
|
||||
"name": "[variables('vmName')]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -145,7 +143,6 @@
|
|||
{
|
||||
"name": "[concat(variables('vmName'),'/jumpboxCustomScript')]",
|
||||
"type": "Microsoft.Compute/virtualMachines/extensions",
|
||||
"apiVersion": "2016-03-30",
|
||||
"location": "[resourceGroup().location]",
|
||||
"dependsOn": [
|
||||
"[resourceId(resourceGroup().name, 'Microsoft.Compute/virtualMachines',variables('vmName'))]"
|
||||
|
|
|
@ -62,7 +62,6 @@
|
|||
"resources": [
|
||||
{
|
||||
"comments": "Network interfaces for each master node. This has a public and private IP address.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/networkInterfaces",
|
||||
"name": "[concat(variables('nicName'),variables('masters')[copyIndex()])]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -99,7 +98,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "These are the master nodes.",
|
||||
"apiVersion": "2016-03-30",
|
||||
"type": "Microsoft.Compute/virtualMachines",
|
||||
"name": "[concat(variables('vmName'),variables('masters')[copyIndex()])]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -171,7 +169,6 @@
|
|||
{
|
||||
"name": "[concat(parameters('clusterName'),variables('masters')[copyIndex()],'/masterCustomScript')]",
|
||||
"type": "Microsoft.Compute/virtualMachines/extensions",
|
||||
"apiVersion": "2016-03-30",
|
||||
"location": "[resourceGroup().location]",
|
||||
"dependsOn": [
|
||||
"masterNodes"
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
"numberWorkerNodes": {
|
||||
"value": 4
|
||||
},
|
||||
"numberDataDisks": {
|
||||
"value": 1
|
||||
},
|
||||
"dataDiskSize": {
|
||||
"value": 512
|
||||
},
|
||||
|
|
|
@ -121,86 +121,97 @@ preinstall () {
|
|||
|
||||
attach_disks () {
|
||||
|
||||
BLACKLIST="/dev/sdz|/dev/sdy"
|
||||
|
||||
scan_for_new_disks() {
|
||||
# Looks for unpartitioned disks
|
||||
declare -a RET
|
||||
DEVS=($(ls -1 /dev/sd*|egrep -v "${BLACKLIST}"|egrep -v "[0-9]$"))
|
||||
for DEV in "${DEVS[@]}";
|
||||
do
|
||||
# Check each device if there is a "1" partition. If not,
|
||||
# "assume" it is not partitioned.
|
||||
if [ ! -b ${DEV}1 ];
|
||||
then
|
||||
RET+="${DEV} "
|
||||
fi
|
||||
done
|
||||
echo "${RET}"
|
||||
}
|
||||
|
||||
is_partitioned() {
|
||||
# Checks if there is a valid partition table on the
|
||||
# specified disk
|
||||
OUTPUT=$(sfdisk -l ${1} 2>&1)
|
||||
grep "No partitions found" "${OUTPUT}" >/dev/null 2>&1
|
||||
return "${?}"
|
||||
}
|
||||
|
||||
do_partition() {
|
||||
# This function creates one (1) primary partition on the
|
||||
# disk, using all available space
|
||||
DISK=${1}
|
||||
echo "n
|
||||
p
|
||||
1
|
||||
|
||||
|
||||
t
|
||||
fd
|
||||
w"| fdisk "${DISK}" > /dev/null 2>&1
|
||||
|
||||
#
|
||||
# Use the bash-specific $PIPESTATUS to ensure we get the correct exit code
|
||||
# from fdisk and not from echo
|
||||
if [ ${PIPESTATUS[1]} -ne 0 ];
|
||||
then
|
||||
echo "An error occurred partitioning ${DISK}" >&2
|
||||
echo "I cannot continue" >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Locate the datadisk
|
||||
#
|
||||
DISKS=($(scan_for_new_disks))
|
||||
PARTLIST=""
|
||||
NUM_DISKS=0
|
||||
|
||||
Log "Everything under /dev\n$(ls /dev)"
|
||||
echo "Disks are ${DISKS[@]}"
|
||||
|
||||
# List all disks.
|
||||
Log "lsblk: \n$(lsblk)"
|
||||
|
||||
local DISKS=`lsblk -d | grep "disk" | awk -F ' ' '{print $1}'`
|
||||
Log "DISKS=$DISKS"
|
||||
|
||||
# List all partitions.
|
||||
local PARTS=`lsblk | grep part`
|
||||
Log "PARTS=$PARTS"
|
||||
|
||||
# Get the disk without any partitions.
|
||||
local DD=`for d in $DISKS; do echo $PARTS | grep -vo $d && echo $d; done`
|
||||
Log "DD=$DD"
|
||||
|
||||
#
|
||||
# Format/Create partitions
|
||||
#
|
||||
|
||||
Log "Creating label"
|
||||
local n=0
|
||||
until [ $n -ge 5 ];
|
||||
for DISK in "${DISKS[@]}";
|
||||
do
|
||||
parted /dev/$DD mklabel gpt && break
|
||||
n=$[$n + 1]
|
||||
Log "Label creation failures $n"
|
||||
sleep 10
|
||||
NUM_DISKS=$((NUM_DISKS + 1))
|
||||
echo "Working on ${DISK}"
|
||||
is_partitioned ${DISK}
|
||||
if [ ${?} -ne 0 ];
|
||||
then
|
||||
echo "${DISK} is not partitioned, partitioning"
|
||||
do_partition ${DISK}
|
||||
fi
|
||||
PARTITION=$(fdisk -l ${DISK}|grep -A 1 Device|tail -n 1|awk '{print $1}')
|
||||
MOUNTPOINT=${PARTITION}
|
||||
PARTLIST="${PARTLIST} ${PARTITION}"
|
||||
echo "Next mount point appears to be ${MOUNTPOINT}"
|
||||
echo "Partition ${PARTITION}"
|
||||
read UUID UNUSED < <(blkid -u filesystem ${PARTITION}|awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
|
||||
echo -e "\t${UUID}\t${MOUNTPOINT}\t${UNUSED}"
|
||||
done
|
||||
|
||||
Log "Creating partition"
|
||||
n=0
|
||||
until [ $n -ge 5 ];
|
||||
do
|
||||
parted -a opt /dev/$DD mkpart primary ext4 0% 100% && break
|
||||
n=$[$n + 1]
|
||||
Log "Partition creation failures $n"
|
||||
sleep 10
|
||||
done
|
||||
echo "PARTLIST = ${PARTLIST}"
|
||||
|
||||
# write file-system lazily for performance reasons.
|
||||
n=0
|
||||
until [ $n -ge 5 ];
|
||||
do
|
||||
mkfs.ext4 -L datapartition /dev/${DD}1 -F && break
|
||||
n=$[$n + 1]
|
||||
Log "FS creation failures $n"
|
||||
sleep 10
|
||||
done
|
||||
echo "Create software raid array"
|
||||
mkdir $MOUNT
|
||||
mdadm --create /dev/md127 --level 0 --raid-devices $NUM_DISKS ${PARTLIST##*( )}
|
||||
mkfs -t ext4 /dev/md127
|
||||
echo "/dev/md127 $MOUNT ext4 defaults,nofail 0 2" >> /etc/fstab
|
||||
mount -a
|
||||
mount
|
||||
|
||||
# Create mount point
|
||||
mkdir $MOUNT -p
|
||||
df -BG
|
||||
|
||||
#
|
||||
# Add to FSTAB
|
||||
#
|
||||
|
||||
# Get the UUID
|
||||
blkid -s none
|
||||
local UUID=`blkid -s UUID -o value /dev/${DD}1`
|
||||
local LINE=""
|
||||
|
||||
if [ -z "$UUID" ]; then
|
||||
# Fall back to disk
|
||||
LINE="$/dev/${DD}1\t$MOUNT\text4\tnoatime,nodiratime,nodev,noexec,nosuid\t1 2"
|
||||
else
|
||||
# Use UUID
|
||||
LINE="UUID=$UUID\t$MOUNT\text4\tnoatime,nodiratime,nodev,noexec,nosuid\t1 2"
|
||||
fi
|
||||
|
||||
Log "Adding '$LINE' to FSTAB"
|
||||
echo -e "$LINE" >> /etc/fstab
|
||||
|
||||
# mount
|
||||
mount $MOUNT
|
||||
check_error $? "Could not mount $DD to $MOUNT"
|
||||
chmod -R 0777 $MOUNT
|
||||
}
|
||||
|
||||
############################################################
|
||||
|
@ -413,10 +424,10 @@ setup_node () {
|
|||
|
||||
}
|
||||
|
||||
echo -e 'soft nofile 38768' >> /etc/security/limits.conf
|
||||
echo -e 'hard nofile 38768' >> /etc/security/limits.conf
|
||||
echo -e 'soft nproc 38768' >> /etc/security/limits.conf
|
||||
echo -e 'hard nproc 38768' >> /etc/security/limits.conf
|
||||
echo -e '* soft nofile 38768' >> /etc/security/limits.conf
|
||||
echo -e '* hard nofile 38768' >> /etc/security/limits.conf
|
||||
echo -e '* soft nproc 38768' >> /etc/security/limits.conf
|
||||
echo -e '* hard nproc 38768' >> /etc/security/limits.conf
|
||||
|
||||
echo -e '
|
||||
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
"dataDiskSize": {
|
||||
"type": "int"
|
||||
},
|
||||
"numberDataDisks": {
|
||||
"type": "int"
|
||||
},
|
||||
"storageAccountName": {
|
||||
"type": "string"
|
||||
},
|
||||
|
@ -41,8 +44,8 @@
|
|||
"minValue": 1,
|
||||
"maxValue": 40
|
||||
},
|
||||
"replication" : {
|
||||
"type" : "int"
|
||||
"replication": {
|
||||
"type": "int"
|
||||
}
|
||||
},
|
||||
"variables": {
|
||||
|
@ -60,7 +63,6 @@
|
|||
"resources": [
|
||||
{
|
||||
"comments": "Network interfaces for each worker node. These will only be private IP addresses.",
|
||||
"apiVersion": "2015-06-15",
|
||||
"type": "Microsoft.Network/networkInterfaces",
|
||||
"name": "[concat(variables('nicName'),'Worker',copyIndex())]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -87,7 +89,6 @@
|
|||
},
|
||||
{
|
||||
"comments": "Worker nodes.",
|
||||
"apiVersion": "2016-03-30",
|
||||
"type": "Microsoft.Compute/virtualMachines",
|
||||
"name": "[concat(variables('defaultName'),'Worker',copyIndex())]",
|
||||
"location": "[variables('location')]",
|
||||
|
@ -130,16 +131,20 @@
|
|||
"caching": "ReadWrite",
|
||||
"createOption": "FromImage"
|
||||
},
|
||||
"dataDisks": [
|
||||
"copy": [
|
||||
{
|
||||
"name": "[concat('workerdatadisk',copyIndex())]",
|
||||
"diskSizeGB": "[parameters('dataDiskSize')]",
|
||||
"vhd": {
|
||||
"uri": "[concat(concat(reference(resourceId(resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2015-06-15').primaryEndpoints['blob'], concat('worker',copyIndex(),'/')),'data.vhd')]"
|
||||
},
|
||||
"lun": 0,
|
||||
"caching": "ReadWrite",
|
||||
"createOption": "Empty"
|
||||
"name": "dataDisks",
|
||||
"count": "[parameters('numberDataDisks')]",
|
||||
"input": {
|
||||
"name": "[concat('workerdatadisk_',copyIndex('workerNodes'),'_',copyIndex('dataDisks'))]",
|
||||
"diskSizeGB": "[parameters('dataDiskSize')]",
|
||||
"vhd": {
|
||||
"uri": "[concat(concat(reference(resourceId(resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2015-06-15').primaryEndpoints['blob'], concat('worker',copyIndex('workerNodes'),'/')),'data-',copyIndex('datadisks'),'.vhd')]"
|
||||
},
|
||||
"lun": "[copyIndex('dataDisks')]",
|
||||
"caching": "ReadWrite",
|
||||
"createOption": "Empty"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -169,7 +174,6 @@
|
|||
"name": "workerHadoopSetup",
|
||||
"count": "[parameters('numberWorkerNodes')]"
|
||||
},
|
||||
"apiVersion": "2016-03-30",
|
||||
"tags": {
|
||||
"displayName": "hadoopSetup"
|
||||
},
|
||||
|
|
Загрузка…
Ссылка в новой задаче