Hadoop Support multiple data disks (#416)

* Remove apiVersion, use mikes RAID code.

* Add numberDataDisks to parameters.json

* resource deployments need api version

* Worker DataDisk names

* Naming bug

* -_-

* Type

* limits were not set correctly.
This commit is contained in:
Jeffrey Robinson 2019-02-06 15:09:51 -08:00 коммит произвёл Krishna Nithin
Родитель 1f5577c2ef
Коммит 35c70483ae
7 изменённых файлов: 121 добавлений и 103 удалений

Просмотреть файл

@ -16,7 +16,7 @@
operations.
</description>
<name>io.file.buffer.size</name>
<value>131072</value>
<value>32768</value>
</property>
<property>
<description>

Просмотреть файл

@ -102,6 +102,15 @@
"description": "Size of the attached data disks in GB."
}
},
"numberDataDisks" : {
"type": "int",
"defaultValue": 1,
"minValue": 1,
"maxValue": 32,
"metadata": {
"description": "The number of data disks to attach to each VM."
}
},
"batchSize": {
"type": "int",
"defaultValue": 10,
@ -144,7 +153,6 @@
"comments": "Storage account which holds all containers and logs.",
"type": "Microsoft.Storage/storageAccounts",
"name": "[toLower(variables('storageAccountName'))]",
"apiVersion": "2016-01-01",
"location": "[variables('location')]",
"properties": {
"accountType": "[parameters('storageAccountType')]"
@ -152,7 +160,6 @@
},
{
"comments": "Virtual network in which the nodes will reside.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/virtualNetworks",
"name": "[variables('virtualNetworkName')]",
"location": "[variables('location')]",
@ -174,7 +181,6 @@
},
{
"comments": "The public IP addresses for NAT.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/publicIPAddresses",
"name": "[variables('publicIPAddressName')]",
"location": "[variables('location')]",
@ -188,7 +194,6 @@
{
"type": "Microsoft.Compute/availabilitySets",
"name": "hadoop",
"apiVersion": "2016-03-30",
"location": "[resourceGroup().location]",
"properties": {
"platformFaultDomainCount": 1,
@ -197,7 +202,6 @@
},
{
"comments": "Network security group for the network.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/networkSecurityGroups",
"name": "[variables('networkSecurityGroupName')]",
"location": "[variables('location')]",
@ -261,7 +265,6 @@
{
"name": "[variables('loadBalancerName')]",
"type": "Microsoft.Network/loadBalancers",
"apiVersion": "2015-06-15",
"location": "[variables('location')]",
"properties": {
"frontendIPConfigurations": [
@ -335,9 +338,9 @@
]
},
{
"apiVersion": "2016-02-01",
"name": "workerNodeTemplate",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2015-01-01",
"dependsOn": [
"[variables('storageAccountName')]",
"[variables('loadBalancerName')]"
@ -364,6 +367,9 @@
"numberWorkerNodes": {
"value": "[parameters('numberWorkerNodes')]"
},
"numberDataDisks" : {
"value": "[parameters('numberDataDisks')]"
},
"dataDiskSize": {
"value": "[parameters('dataDiskSize')]"
},
@ -392,9 +398,9 @@
}
},
{
"apiVersion": "2016-02-01",
"name": "masterNodeTemplate",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2015-01-01",
"dependsOn": [
"[variables('storageAccountName')]",
"[variables('loadBalancerName')]"
@ -446,9 +452,9 @@
}
},
{
"apiVersion": "2016-02-01",
"name": "jumpBoxTemplate",
"type": "Microsoft.Resources/deployments",
"apiVersion": "2015-01-01",
"dependsOn": [
"masterNodeTemplate",
"workerNodeTemplate"

Просмотреть файл

@ -56,7 +56,6 @@
"resources": [
{
"comments": "Network interfaces for jumpbox.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/networkInterfaces",
"name": "[variables('nicName')]",
"location": "[variables('location')]",
@ -89,7 +88,6 @@
},
{
"comments": "Jump box node.",
"apiVersion": "2016-03-30",
"type": "Microsoft.Compute/virtualMachines",
"name": "[variables('vmName')]",
"location": "[variables('location')]",
@ -145,7 +143,6 @@
{
"name": "[concat(variables('vmName'),'/jumpboxCustomScript')]",
"type": "Microsoft.Compute/virtualMachines/extensions",
"apiVersion": "2016-03-30",
"location": "[resourceGroup().location]",
"dependsOn": [
"[resourceId(resourceGroup().name, 'Microsoft.Compute/virtualMachines',variables('vmName'))]"

Просмотреть файл

@ -62,7 +62,6 @@
"resources": [
{
"comments": "Network interfaces for each master node. This has a public and private IP address.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/networkInterfaces",
"name": "[concat(variables('nicName'),variables('masters')[copyIndex()])]",
"location": "[variables('location')]",
@ -99,7 +98,6 @@
},
{
"comments": "These are the master nodes.",
"apiVersion": "2016-03-30",
"type": "Microsoft.Compute/virtualMachines",
"name": "[concat(variables('vmName'),variables('masters')[copyIndex()])]",
"location": "[variables('location')]",
@ -171,7 +169,6 @@
{
"name": "[concat(parameters('clusterName'),variables('masters')[copyIndex()],'/masterCustomScript')]",
"type": "Microsoft.Compute/virtualMachines/extensions",
"apiVersion": "2016-03-30",
"location": "[resourceGroup().location]",
"dependsOn": [
"masterNodes"

Просмотреть файл

@ -20,6 +20,9 @@
"numberWorkerNodes": {
"value": 4
},
"numberDataDisks": {
"value": 1
},
"dataDiskSize": {
"value": 512
},

Просмотреть файл

@ -121,86 +121,97 @@ preinstall () {
attach_disks () {
BLACKLIST="/dev/sdz|/dev/sdy"
scan_for_new_disks() {
# Looks for unpartitioned disks
declare -a RET
DEVS=($(ls -1 /dev/sd*|egrep -v "${BLACKLIST}"|egrep -v "[0-9]$"))
for DEV in "${DEVS[@]}";
do
# Check each device if there is a "1" partition. If not,
# "assume" it is not partitioned.
if [ ! -b ${DEV}1 ];
then
RET+="${DEV} "
fi
done
echo "${RET}"
}
is_partitioned() {
# Checks if there is a valid partition table on the
# specified disk
OUTPUT=$(sfdisk -l ${1} 2>&1)
grep "No partitions found" "${OUTPUT}" >/dev/null 2>&1
return "${?}"
}
do_partition() {
# This function creates one (1) primary partition on the
# disk, using all available space
DISK=${1}
echo "n
p
1
t
fd
w"| fdisk "${DISK}" > /dev/null 2>&1
#
# Use the bash-specific $PIPESTATUS to ensure we get the correct exit code
# from fdisk and not from echo
if [ ${PIPESTATUS[1]} -ne 0 ];
then
echo "An error occurred partitioning ${DISK}" >&2
echo "I cannot continue" >&2
exit 2
fi
}
#
# Locate the datadisk
#
DISKS=($(scan_for_new_disks))
PARTLIST=""
NUM_DISKS=0
Log "Everything under /dev\n$(ls /dev)"
echo "Disks are ${DISKS[@]}"
# List all disks.
Log "lsblk: \n$(lsblk)"
local DISKS=`lsblk -d | grep "disk" | awk -F ' ' '{print $1}'`
Log "DISKS=$DISKS"
# List all partitions.
local PARTS=`lsblk | grep part`
Log "PARTS=$PARTS"
# Get the disk without any partitions.
local DD=`for d in $DISKS; do echo $PARTS | grep -vo $d && echo $d; done`
Log "DD=$DD"
#
# Format/Create partitions
#
Log "Creating label"
local n=0
until [ $n -ge 5 ];
for DISK in "${DISKS[@]}";
do
parted /dev/$DD mklabel gpt && break
n=$[$n + 1]
Log "Label creation failures $n"
sleep 10
NUM_DISKS=$((NUM_DISKS + 1))
echo "Working on ${DISK}"
is_partitioned ${DISK}
if [ ${?} -ne 0 ];
then
echo "${DISK} is not partitioned, partitioning"
do_partition ${DISK}
fi
PARTITION=$(fdisk -l ${DISK}|grep -A 1 Device|tail -n 1|awk '{print $1}')
MOUNTPOINT=${PARTITION}
PARTLIST="${PARTLIST} ${PARTITION}"
echo "Next mount point appears to be ${MOUNTPOINT}"
echo "Partition ${PARTITION}"
read UUID UNUSED < <(blkid -u filesystem ${PARTITION}|awk -F "[= ]" '{print $3" "$5}'|tr -d "\"")
echo -e "\t${UUID}\t${MOUNTPOINT}\t${UNUSED}"
done
Log "Creating partition"
n=0
until [ $n -ge 5 ];
do
parted -a opt /dev/$DD mkpart primary ext4 0% 100% && break
n=$[$n + 1]
Log "Partition creation failures $n"
sleep 10
done
echo "PARTLIST = ${PARTLIST}"
# write file-system lazily for performance reasons.
n=0
until [ $n -ge 5 ];
do
mkfs.ext4 -L datapartition /dev/${DD}1 -F && break
n=$[$n + 1]
Log "FS creation failures $n"
sleep 10
done
echo "Create software raid array"
mkdir $MOUNT
mdadm --create /dev/md127 --level 0 --raid-devices $NUM_DISKS ${PARTLIST##*( )}
mkfs -t ext4 /dev/md127
echo "/dev/md127 $MOUNT ext4 defaults,nofail 0 2" >> /etc/fstab
mount -a
mount
# Create mount point
mkdir $MOUNT -p
df -BG
#
# Add to FSTAB
#
# Get the UUID
blkid -s none
local UUID=`blkid -s UUID -o value /dev/${DD}1`
local LINE=""
if [ -z "$UUID" ]; then
# Fall back to disk
LINE="$/dev/${DD}1\t$MOUNT\text4\tnoatime,nodiratime,nodev,noexec,nosuid\t1 2"
else
# Use UUID
LINE="UUID=$UUID\t$MOUNT\text4\tnoatime,nodiratime,nodev,noexec,nosuid\t1 2"
fi
Log "Adding '$LINE' to FSTAB"
echo -e "$LINE" >> /etc/fstab
# mount
mount $MOUNT
check_error $? "Could not mount $DD to $MOUNT"
chmod -R 0777 $MOUNT
}
############################################################
@ -413,10 +424,10 @@ setup_node () {
}
echo -e 'soft nofile 38768' >> /etc/security/limits.conf
echo -e 'hard nofile 38768' >> /etc/security/limits.conf
echo -e 'soft nproc 38768' >> /etc/security/limits.conf
echo -e 'hard nproc 38768' >> /etc/security/limits.conf
echo -e '* soft nofile 38768' >> /etc/security/limits.conf
echo -e '* hard nofile 38768' >> /etc/security/limits.conf
echo -e '* soft nproc 38768' >> /etc/security/limits.conf
echo -e '* hard nproc 38768' >> /etc/security/limits.conf
echo -e '
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then

Просмотреть файл

@ -21,6 +21,9 @@
"dataDiskSize": {
"type": "int"
},
"numberDataDisks": {
"type": "int"
},
"storageAccountName": {
"type": "string"
},
@ -41,8 +44,8 @@
"minValue": 1,
"maxValue": 40
},
"replication" : {
"type" : "int"
"replication": {
"type": "int"
}
},
"variables": {
@ -60,7 +63,6 @@
"resources": [
{
"comments": "Network interfaces for each worker node. These will only be private IP addresses.",
"apiVersion": "2015-06-15",
"type": "Microsoft.Network/networkInterfaces",
"name": "[concat(variables('nicName'),'Worker',copyIndex())]",
"location": "[variables('location')]",
@ -87,7 +89,6 @@
},
{
"comments": "Worker nodes.",
"apiVersion": "2016-03-30",
"type": "Microsoft.Compute/virtualMachines",
"name": "[concat(variables('defaultName'),'Worker',copyIndex())]",
"location": "[variables('location')]",
@ -130,16 +131,20 @@
"caching": "ReadWrite",
"createOption": "FromImage"
},
"dataDisks": [
"copy": [
{
"name": "[concat('workerdatadisk',copyIndex())]",
"diskSizeGB": "[parameters('dataDiskSize')]",
"vhd": {
"uri": "[concat(concat(reference(resourceId(resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2015-06-15').primaryEndpoints['blob'], concat('worker',copyIndex(),'/')),'data.vhd')]"
},
"lun": 0,
"caching": "ReadWrite",
"createOption": "Empty"
"name": "dataDisks",
"count": "[parameters('numberDataDisks')]",
"input": {
"name": "[concat('workerdatadisk_',copyIndex('workerNodes'),'_',copyIndex('dataDisks'))]",
"diskSizeGB": "[parameters('dataDiskSize')]",
"vhd": {
"uri": "[concat(concat(reference(resourceId(resourceGroup().name, 'Microsoft.Storage/storageAccounts', parameters('storageAccountName')), '2015-06-15').primaryEndpoints['blob'], concat('worker',copyIndex('workerNodes'),'/')),'data-',copyIndex('datadisks'),'.vhd')]"
},
"lun": "[copyIndex('dataDisks')]",
"caching": "ReadWrite",
"createOption": "Empty"
}
}
]
},
@ -169,7 +174,6 @@
"name": "workerHadoopSetup",
"count": "[parameters('numberWorkerNodes')]"
},
"apiVersion": "2016-03-30",
"tags": {
"displayName": "hadoopSetup"
},