зеркало из https://github.com/Azure/AgentBaker.git
cleanup: remove unused self-contained content (#5060)
Co-authored-by: Cameron Meissner <cameissner@microsoft.com>
This commit is contained in:
Родитель
f9bcb7af2c
Коммит
71c8d8a7b0
|
@ -25,7 +25,7 @@ else
|
|||
echo "shellcheck installed"
|
||||
fi
|
||||
|
||||
filesToCheck=$(find . -type f -name "*.sh" -not -path './parts/linux/cloud-init/artifacts/*' -not -path './pkg/agent/testdata/*' -not -path './vendor/*' -not -path './hack/tools/vendor/*' -not -path './.git/*' -not -path './self-contained/*' -not -path './hack/tools/bin/shellspecsrc/*')
|
||||
filesToCheck=$(find . -type f -name "*.sh" -not -path './parts/linux/cloud-init/artifacts/*' -not -path './pkg/agent/testdata/*' -not -path './vendor/*' -not -path './hack/tools/vendor/*' -not -path './.git/*' -not -path './hack/tools/bin/shellspecsrc/*')
|
||||
|
||||
# also shell-check generated test data
|
||||
generatedTestData=$(find ./pkg/agent/testdata -type f -name "*.sh" )
|
||||
|
|
|
@ -1,156 +0,0 @@
|
|||
PROVISION_OUTPUT="/var/log/azure/cluster-provision-cse-output.log";
|
||||
echo $(date),$(hostname) > ${PROVISION_OUTPUT};
|
||||
{{if ShouldEnableCustomData}}
|
||||
cloud-init status --wait > /dev/null 2>&1;
|
||||
[ $? -ne 0 ] && echo 'cloud-init failed' >> ${PROVISION_OUTPUT} && exit 1;
|
||||
echo "cloud-init succeeded" >> ${PROVISION_OUTPUT};
|
||||
{{end}}
|
||||
{{if IsAKSCustomCloud}}
|
||||
REPO_DEPOT_ENDPOINT="{{AKSCustomCloudRepoDepotEndpoint}}"
|
||||
{{GetInitAKSCustomCloudFilepath}} >> /var/log/azure/cluster-provision.log 2>&1;
|
||||
{{end}}
|
||||
ADMINUSER={{GetParameter "linuxAdminUsername"}}
|
||||
MOBY_VERSION={{GetParameter "mobyVersion"}}
|
||||
TENANT_ID={{GetVariable "tenantID"}}
|
||||
KUBERNETES_VERSION={{GetParameter "kubernetesVersion"}}
|
||||
HYPERKUBE_URL={{GetParameter "kubernetesHyperkubeSpec"}}
|
||||
KUBE_BINARY_URL={{GetParameter "kubeBinaryURL"}}
|
||||
CUSTOM_KUBE_BINARY_URL={{GetParameter "customKubeBinaryURL"}}
|
||||
PRIVATE_KUBE_BINARY_URL="{{GetLinuxPrivatePackageURL}}"
|
||||
KUBEPROXY_URL={{GetParameter "kubeProxySpec"}}
|
||||
APISERVER_PUBLIC_KEY={{GetParameter "apiServerCertificate"}}
|
||||
SUBSCRIPTION_ID={{GetVariable "subscriptionId"}}
|
||||
RESOURCE_GROUP={{GetVariable "resourceGroup"}}
|
||||
LOCATION={{GetVariable "location"}}
|
||||
VM_TYPE={{GetVariable "vmType"}}
|
||||
SUBNET={{GetVariable "subnetName"}}
|
||||
NETWORK_SECURITY_GROUP={{GetVariable "nsgName"}}
|
||||
VIRTUAL_NETWORK={{GetVariable "virtualNetworkName"}}
|
||||
VIRTUAL_NETWORK_RESOURCE_GROUP={{GetVariable "virtualNetworkResourceGroupName"}}
|
||||
ROUTE_TABLE={{GetVariable "routeTableName"}}
|
||||
PRIMARY_AVAILABILITY_SET={{GetVariable "primaryAvailabilitySetName"}}
|
||||
PRIMARY_SCALE_SET={{GetVariable "primaryScaleSetName"}}
|
||||
SERVICE_PRINCIPAL_CLIENT_ID={{GetParameter "servicePrincipalClientId"}}
|
||||
NETWORK_PLUGIN={{GetParameter "networkPlugin"}}
|
||||
NETWORK_POLICY={{GetParameter "networkPolicy"}}
|
||||
VNET_CNI_PLUGINS_URL={{GetParameter "vnetCniLinuxPluginsURL"}}
|
||||
CLOUDPROVIDER_BACKOFF={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoff"}}
|
||||
CLOUDPROVIDER_BACKOFF_MODE={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoffMode"}}
|
||||
CLOUDPROVIDER_BACKOFF_RETRIES={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoffRetries"}}
|
||||
CLOUDPROVIDER_BACKOFF_EXPONENT={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoffExponent"}}
|
||||
CLOUDPROVIDER_BACKOFF_DURATION={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoffDuration"}}
|
||||
CLOUDPROVIDER_BACKOFF_JITTER={{GetParameterProperty "cloudproviderConfig" "cloudProviderBackoffJitter"}}
|
||||
CLOUDPROVIDER_RATELIMIT={{GetParameterProperty "cloudproviderConfig" "cloudProviderRateLimit"}}
|
||||
CLOUDPROVIDER_RATELIMIT_QPS={{GetParameterProperty "cloudproviderConfig" "cloudProviderRateLimitQPS"}}
|
||||
CLOUDPROVIDER_RATELIMIT_QPS_WRITE={{GetParameterProperty "cloudproviderConfig" "cloudProviderRateLimitQPSWrite"}}
|
||||
CLOUDPROVIDER_RATELIMIT_BUCKET={{GetParameterProperty "cloudproviderConfig" "cloudProviderRateLimitBucket"}}
|
||||
CLOUDPROVIDER_RATELIMIT_BUCKET_WRITE={{GetParameterProperty "cloudproviderConfig" "cloudProviderRateLimitBucketWrite"}}
|
||||
LOAD_BALANCER_DISABLE_OUTBOUND_SNAT={{GetParameterProperty "cloudproviderConfig" "cloudProviderDisableOutboundSNAT"}}
|
||||
USE_MANAGED_IDENTITY_EXTENSION={{GetVariable "useManagedIdentityExtension"}}
|
||||
USE_INSTANCE_METADATA={{GetVariable "useInstanceMetadata"}}
|
||||
LOAD_BALANCER_SKU={{GetVariable "loadBalancerSku"}}
|
||||
EXCLUDE_MASTER_FROM_STANDARD_LB={{GetVariable "excludeMasterFromStandardLB"}}
|
||||
MAXIMUM_LOADBALANCER_RULE_COUNT={{GetVariable "maximumLoadBalancerRuleCount"}}
|
||||
CONTAINER_RUNTIME={{GetParameter "containerRuntime"}}
|
||||
CLI_TOOL={{GetParameter "cliTool"}}
|
||||
CONTAINERD_DOWNLOAD_URL_BASE={{GetParameter "containerdDownloadURLBase"}}
|
||||
NETWORK_MODE={{GetParameter "networkMode"}}
|
||||
KUBE_BINARY_URL={{GetParameter "kubeBinaryURL"}}
|
||||
USER_ASSIGNED_IDENTITY_ID={{GetVariable "userAssignedIdentityID"}}
|
||||
API_SERVER_NAME={{GetKubernetesEndpoint}}
|
||||
IS_VHD={{GetVariable "isVHD"}}
|
||||
GPU_NODE={{GetVariable "gpuNode"}}
|
||||
SGX_NODE={{GetVariable "sgxNode"}}
|
||||
MIG_NODE={{GetVariable "migNode"}}
|
||||
CONFIG_GPU_DRIVER_IF_NEEDED={{GetVariable "configGPUDriverIfNeeded"}}
|
||||
ENABLE_GPU_DEVICE_PLUGIN_IF_NEEDED={{GetVariable "enableGPUDevicePluginIfNeeded"}}
|
||||
TELEPORTD_PLUGIN_DOWNLOAD_URL={{GetParameter "teleportdPluginURL"}}
|
||||
CONTAINERD_VERSION={{GetParameter "containerdVersion"}}
|
||||
CONTAINERD_PACKAGE_URL={{GetParameter "containerdPackageURL"}}
|
||||
RUNC_VERSION={{GetParameter "runcVersion"}}
|
||||
RUNC_PACKAGE_URL={{GetParameter "runcPackageURL"}}
|
||||
ENABLE_HOSTS_CONFIG_AGENT="{{EnableHostsConfigAgent}}"
|
||||
DISABLE_SSH="{{ShouldDisableSSH}}"
|
||||
NEEDS_CONTAINERD="{{NeedsContainerd}}"
|
||||
TELEPORT_ENABLED="{{TeleportEnabled}}"
|
||||
SHOULD_CONFIGURE_HTTP_PROXY="{{ShouldConfigureHTTPProxy}}"
|
||||
SHOULD_CONFIGURE_HTTP_PROXY_CA="{{ShouldConfigureHTTPProxyCA}}"
|
||||
HTTP_PROXY_TRUSTED_CA="{{GetHTTPProxyCA}}"
|
||||
SHOULD_CONFIGURE_CUSTOM_CA_TRUST="{{ShouldConfigureCustomCATrust}}"
|
||||
CUSTOM_CA_TRUST_COUNT="{{len GetCustomCATrustConfigCerts}}"
|
||||
{{range $i, $cert := GetCustomCATrustConfigCerts}}
|
||||
CUSTOM_CA_CERT_{{$i}}="{{$cert}}"
|
||||
{{end}}
|
||||
IS_KRUSTLET="{{IsKrustlet}}"
|
||||
GPU_NEEDS_FABRIC_MANAGER="{{GPUNeedsFabricManager}}"
|
||||
#NEEDS_DOCKER_LOGIN="{{and IsDockerContainerRuntime HasPrivateAzureRegistryServer}}" This field is no longer required for the new contract since Docker is out of support and its value depends on Container Runtime = Docker
|
||||
IPV6_DUAL_STACK_ENABLED="{{IsIPv6DualStackFeatureEnabled}}"
|
||||
OUTBOUND_COMMAND="{{GetOutboundCommand}}"
|
||||
ENABLE_UNATTENDED_UPGRADES="{{EnableUnattendedUpgrade}}"
|
||||
ENSURE_NO_DUPE_PROMISCUOUS_BRIDGE="{{ and NeedsContainerd IsKubenet (not HasCalicoNetworkPolicy) }}"
|
||||
SHOULD_CONFIG_SWAP_FILE="{{ShouldConfigSwapFile}}"
|
||||
SHOULD_CONFIG_TRANSPARENT_HUGE_PAGE="{{ShouldConfigTransparentHugePage}}"
|
||||
SHOULD_CONFIG_CONTAINERD_ULIMITS="{{ShouldConfigContainerdUlimits}}"
|
||||
CONTAINERD_ULIMITS="{{GetContainerdUlimitString}}"
|
||||
{{/* both CLOUD and ENVIRONMENT have special values when IsAKSCustomCloud == true */}}
|
||||
{{/* CLOUD uses AzureStackCloud and seems to be used by kubelet, k8s cloud provider */}}
|
||||
{{/* target environment seems to go to ARM SDK config */}}
|
||||
{{/* not sure why separate/inconsistent? */}}
|
||||
{{/* see GetCustomEnvironmentJSON for more weirdness. */}}
|
||||
TARGET_CLOUD="{{- if IsAKSCustomCloud -}} AzureStackCloud {{- else -}} {{GetTargetEnvironment}} {{- end -}}"
|
||||
TARGET_ENVIRONMENT="{{GetTargetEnvironment}}"
|
||||
CUSTOM_ENV_JSON="{{GetBase64EncodedEnvironmentJSON}}"
|
||||
IS_CUSTOM_CLOUD="{{IsAKSCustomCloud}}"
|
||||
CSE_HELPERS_FILEPATH="{{GetCSEHelpersScriptFilepath}}"
|
||||
CSE_DISTRO_HELPERS_FILEPATH="{{GetCSEHelpersScriptDistroFilepath}}"
|
||||
CSE_INSTALL_FILEPATH="{{GetCSEInstallScriptFilepath}}"
|
||||
CSE_DISTRO_INSTALL_FILEPATH="{{GetCSEInstallScriptDistroFilepath}}"
|
||||
CSE_CONFIG_FILEPATH="{{GetCSEConfigScriptFilepath}}"
|
||||
AZURE_PRIVATE_REGISTRY_SERVER="{{GetPrivateAzureRegistryServer}}"
|
||||
HAS_CUSTOM_SEARCH_DOMAIN="{{HasCustomSearchDomain}}"
|
||||
CUSTOM_SEARCH_DOMAIN_FILEPATH="{{GetCustomSearchDomainsCSEScriptFilepath}}"
|
||||
HTTP_PROXY_URLS="{{GetHTTPProxy}}"
|
||||
HTTPS_PROXY_URLS="{{GetHTTPSProxy}}"
|
||||
NO_PROXY_URLS="{{GetNoProxy}}"
|
||||
PROXY_VARS="{{GetProxyVariables}}"
|
||||
ENABLE_TLS_BOOTSTRAPPING="{{EnableTLSBootstrapping}}"
|
||||
ENABLE_SECURE_TLS_BOOTSTRAPPING="{{EnableSecureTLSBootstrapping}}"
|
||||
DHCPV6_SERVICE_FILEPATH="{{GetDHCPv6ServiceCSEScriptFilepath}}"
|
||||
DHCPV6_CONFIG_FILEPATH="{{GetDHCPv6ConfigCSEScriptFilepath}}"
|
||||
THP_ENABLED="{{GetTransparentHugePageEnabled}}"
|
||||
THP_DEFRAG="{{GetTransparentHugePageDefrag}}"
|
||||
SERVICE_PRINCIPAL_FILE_CONTENT="{{GetServicePrincipalSecret}}"
|
||||
KUBELET_CLIENT_CONTENT="{{GetKubeletClientKey}}"
|
||||
KUBELET_CLIENT_CERT_CONTENT="{{GetKubeletClientCert}}"
|
||||
KUBELET_CONFIG_FILE_ENABLED="{{IsKubeletConfigFileEnabled}}"
|
||||
KUBELET_CONFIG_FILE_CONTENT="{{GetKubeletConfigFileContentBase64}}"
|
||||
SWAP_FILE_SIZE_MB="{{GetSwapFileSizeMB}}"
|
||||
GPU_DRIVER_VERSION="{{GPUDriverVersion}}"
|
||||
GPU_INSTANCE_PROFILE="{{GetGPUInstanceProfile}}"
|
||||
CUSTOM_SEARCH_DOMAIN_NAME="{{GetSearchDomainName}}"
|
||||
CUSTOM_SEARCH_REALM_USER="{{GetSearchDomainRealmUser}}"
|
||||
CUSTOM_SEARCH_REALM_PASSWORD="{{GetSearchDomainRealmPassword}}"
|
||||
MESSAGE_OF_THE_DAY="{{GetMessageOfTheDay}}"
|
||||
HAS_KUBELET_DISK_TYPE="{{HasKubeletDiskType}}"
|
||||
NEEDS_CGROUPV2="{{IsCgroupV2}}"
|
||||
TLS_BOOTSTRAP_TOKEN="{{GetTLSBootstrapTokenForKubeConfig}}"
|
||||
KUBELET_FLAGS="{{GetKubeletConfigKeyVals}}"
|
||||
NETWORK_POLICY="{{GetParameter "networkPolicy"}}"
|
||||
{{- if not (IsKubernetesVersionGe "1.17.0")}}
|
||||
KUBELET_IMAGE="{{GetHyperkubeImageReference}}"
|
||||
{{end}}
|
||||
{{if IsKubernetesVersionGe "1.16.0"}}
|
||||
KUBELET_NODE_LABELS="{{GetAgentKubernetesLabels . }}"
|
||||
{{else}}
|
||||
KUBELET_NODE_LABELS="{{GetAgentKubernetesLabelsDeprecated . }}"
|
||||
{{end}}
|
||||
AZURE_ENVIRONMENT_FILEPATH="{{- if IsAKSCustomCloud}}/etc/kubernetes/{{GetTargetEnvironment}}.json{{end}}"
|
||||
KUBE_CA_CRT="{{GetParameter "caCertificate"}}"
|
||||
KUBENET_TEMPLATE="{{GetKubenetTemplate}}"
|
||||
CONTAINERD_CONFIG_CONTENT="{{GetContainerdConfigContent}}"
|
||||
CONTAINERD_CONFIG_NO_GPU_CONTENT="{{GetContainerdConfigNoGPUContent}}"
|
||||
IS_KATA="{{IsKata}}"
|
||||
ARTIFACT_STREAMING_ENABLED="{{IsArtifactStreamingEnabled}}"
|
||||
SYSCTL_CONTENT="{{GetSysctlContent}}"
|
||||
PRIVATE_EGRESS_PROXY_ADDRESS="{{GetPrivateEgressProxyAddress}}"
|
||||
/usr/bin/nohup /bin/bash -c "/bin/bash /opt/azure/containers/provision_start.sh"
|
|
@ -1,697 +0,0 @@
|
|||
#!/bin/bash
|
||||
NODE_INDEX=$(hostname | tail -c 2)
|
||||
NODE_NAME=$(hostname)
|
||||
|
||||
configureAdminUser(){
|
||||
chage -E -1 -I -1 -m 0 -M 99999 "${ADMINUSER}"
|
||||
chage -l "${ADMINUSER}"
|
||||
}
|
||||
|
||||
configPrivateClusterHosts() {
|
||||
mkdir -p /etc/systemd/system/reconcile-private-hosts.service.d/
|
||||
touch /etc/systemd/system/reconcile-private-hosts.service.d/10-fqdn.conf
|
||||
tee /etc/systemd/system/reconcile-private-hosts.service.d/10-fqdn.conf > /dev/null <<EOF
|
||||
[Service]
|
||||
Environment="KUBE_API_SERVER_NAME=${API_SERVER_NAME}"
|
||||
EOF
|
||||
systemctlEnableAndStart reconcile-private-hosts || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
}
|
||||
configureTransparentHugePage() {
|
||||
ETC_SYSFS_CONF="/etc/sysfs.conf"
|
||||
if [[ "${THP_ENABLED}" != "" ]]; then
|
||||
echo "${THP_ENABLED}" > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
echo "kernel/mm/transparent_hugepage/enabled=${THP_ENABLED}" >> ${ETC_SYSFS_CONF}
|
||||
fi
|
||||
if [[ "${THP_DEFRAG}" != "" ]]; then
|
||||
echo "${THP_DEFRAG}" > /sys/kernel/mm/transparent_hugepage/defrag
|
||||
echo "kernel/mm/transparent_hugepage/defrag=${THP_DEFRAG}" >> ${ETC_SYSFS_CONF}
|
||||
fi
|
||||
}
|
||||
|
||||
configureSwapFile() {
|
||||
# https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/troubleshoot-device-names-problems#identify-disk-luns
|
||||
swap_size_kb=$(expr ${SWAP_FILE_SIZE_MB} \* 1000)
|
||||
swap_location=""
|
||||
|
||||
# Attempt to use the resource disk
|
||||
if [[ -L /dev/disk/azure/resource-part1 ]]; then
|
||||
resource_disk_path=$(findmnt -nr -o target -S $(readlink -f /dev/disk/azure/resource-part1))
|
||||
disk_free_kb=$(df ${resource_disk_path} | sed 1d | awk '{print $4}')
|
||||
if [[ ${disk_free_kb} -gt ${swap_size_kb} ]]; then
|
||||
echo "Will use resource disk for swap file"
|
||||
swap_location=${resource_disk_path}/swapfile
|
||||
else
|
||||
echo "Insufficient disk space on resource disk to create swap file: request ${swap_size_kb} free ${disk_free_kb}, attempting to fall back to OS disk..."
|
||||
fi
|
||||
fi
|
||||
|
||||
# If we couldn't use the resource disk, attempt to use the OS disk
|
||||
if [[ -z "${swap_location}" ]]; then
|
||||
# Directly check size on the root directory since we can't rely on 'root-part1' always being the correct label
|
||||
os_device=$(readlink -f /dev/disk/azure/root)
|
||||
disk_free_kb=$(df -P / | sed 1d | awk '{print $4}')
|
||||
if [[ ${disk_free_kb} -gt ${swap_size_kb} ]]; then
|
||||
echo "Will use OS disk for swap file"
|
||||
swap_location=/swapfile
|
||||
else
|
||||
echo "Insufficient disk space on OS device ${os_device} to create swap file: request ${swap_size_kb} free ${disk_free_kb}"
|
||||
exit $ERR_SWAP_CREATE_INSUFFICIENT_DISK_SPACE
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Swap file will be saved to: ${swap_location}"
|
||||
retrycmd_if_failure 24 5 25 fallocate -l ${swap_size_kb}K ${swap_location} || exit $ERR_SWAP_CREATE_FAIL
|
||||
chmod 600 ${swap_location}
|
||||
retrycmd_if_failure 24 5 25 mkswap ${swap_location} || exit $ERR_SWAP_CREATE_FAIL
|
||||
retrycmd_if_failure 24 5 25 swapon ${swap_location} || exit $ERR_SWAP_CREATE_FAIL
|
||||
retrycmd_if_failure 24 5 25 swapon --show | grep ${swap_location} || exit $ERR_SWAP_CREATE_FAIL
|
||||
echo "${swap_location} none swap sw 0 0" >> /etc/fstab
|
||||
}
|
||||
|
||||
configureEtcEnvironment() {
|
||||
mkdir -p /etc/systemd/system.conf.d/
|
||||
touch /etc/systemd/system.conf.d/proxy.conf
|
||||
chmod 0644 /etc/systemd/system.conf.d/proxy.conf
|
||||
|
||||
mkdir -p /etc/apt/apt.conf.d
|
||||
touch /etc/apt/apt.conf.d/95proxy
|
||||
chmod 0644 /etc/apt/apt.conf.d/95proxy
|
||||
|
||||
# TODO(ace): this pains me but quick and dirty refactor
|
||||
echo "[Manager]" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
if [ "${HTTP_PROXY_URLS}" != "" ]; then
|
||||
echo "HTTP_PROXY=${HTTP_PROXY_URLS}" >> /etc/environment
|
||||
echo "http_proxy=${HTTP_PROXY_URLS}" >> /etc/environment
|
||||
echo "Acquire::http::proxy \"${HTTP_PROXY_URLS}\";" >> /etc/apt/apt.conf.d/95proxy
|
||||
echo "DefaultEnvironment=\"HTTP_PROXY=${HTTP_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
echo "DefaultEnvironment=\"http_proxy=${HTTP_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
fi
|
||||
if [ "${HTTPS_PROXY_URLS}" != "" ]; then
|
||||
echo "HTTPS_PROXY=${HTTPS_PROXY_URLS}" >> /etc/environment
|
||||
echo "https_proxy=${HTTPS_PROXY_URLS}" >> /etc/environment
|
||||
echo "Acquire::https::proxy \"${HTTPS_PROXY_URLS}\";" >> /etc/apt/apt.conf.d/95proxy
|
||||
echo "DefaultEnvironment=\"HTTPS_PROXY=${HTTPS_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
echo "DefaultEnvironment=\"https_proxy=${HTTPS_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
fi
|
||||
if [ "${NO_PROXY_URLS}" != "" ]; then
|
||||
echo "NO_PROXY=${NO_PROXY_URLS}" >> /etc/environment
|
||||
echo "no_proxy=${NO_PROXY_URLS}" >> /etc/environment
|
||||
echo "DefaultEnvironment=\"NO_PROXY=${NO_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
echo "DefaultEnvironment=\"no_proxy=${NO_PROXY_URLS}\"" >> /etc/systemd/system.conf.d/proxy.conf
|
||||
fi
|
||||
|
||||
# for kubelet to pick up the proxy
|
||||
mkdir -p "/etc/systemd/system/kubelet.service.d"
|
||||
tee "/etc/systemd/system/kubelet.service.d/10-httpproxy.conf" > /dev/null <<'EOF'
|
||||
[Service]
|
||||
EnvironmentFile=/etc/environment
|
||||
EOF
|
||||
}
|
||||
|
||||
configureHTTPProxyCA() {
|
||||
if isMarinerOrAzureLinux "$OS"; then
|
||||
cert_dest="/usr/share/pki/ca-trust-source/anchors"
|
||||
update_cmd="update-ca-trust"
|
||||
else
|
||||
cert_dest="/usr/local/share/ca-certificates"
|
||||
update_cmd="update-ca-certificates"
|
||||
fi
|
||||
echo "${HTTP_PROXY_TRUSTED_CA}" | base64 -d > "${cert_dest}/proxyCA.crt" || exit $ERR_UPDATE_CA_CERTS
|
||||
$update_cmd || exit $ERR_UPDATE_CA_CERTS
|
||||
}
|
||||
|
||||
configureCustomCaCertificate() {
|
||||
mkdir -p /opt/certs
|
||||
for i in $(seq 0 $((${CUSTOM_CA_TRUST_COUNT} - 1))); do
|
||||
# directly referring to the variable as "${CUSTOM_CA_CERT_${i}}"
|
||||
# causes bad substitution errors in bash
|
||||
# dynamically declare and use `!` to add a layer of indirection
|
||||
declare varname=CUSTOM_CA_CERT_${i}
|
||||
echo "${!varname}" | base64 -d > /opt/certs/00000000000000cert${i}.crt
|
||||
done
|
||||
# This will block until the service is considered active.
|
||||
# Update_certs.service is a oneshot type of unit that
|
||||
# is considered active when the ExecStart= command terminates with a zero status code.
|
||||
systemctl restart update_certs.service || exit $ERR_UPDATE_CA_CERTS
|
||||
# after new certs are added to trust store, containerd will not pick them up properly before restart.
|
||||
# aim here is to have this working straight away for a freshly provisioned node
|
||||
# so we force a restart after the certs are updated
|
||||
# custom CA daemonset copies certs passed by the user to the node, what then triggers update_certs.path unit
|
||||
# path unit then triggers the script that copies over cert files to correct location on the node and updates the trust store
|
||||
# as a part of this flow we could restart containerd everytime a new cert is added to the trust store using custom CA
|
||||
systemctl restart containerd
|
||||
}
|
||||
|
||||
configureContainerdUlimits() {
|
||||
CONTAINERD_ULIMIT_DROP_IN_FILE_PATH="/etc/systemd/system/containerd.service.d/set_ulimits.conf"
|
||||
touch "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}"
|
||||
chmod 0600 "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}"
|
||||
tee "${CONTAINERD_ULIMIT_DROP_IN_FILE_PATH}" > /dev/null <<EOF
|
||||
$(echo "$CONTAINERD_ULIMITS" | tr ' ' '\n')
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl restart containerd
|
||||
}
|
||||
|
||||
|
||||
configureKubeletServerCert() {
|
||||
KUBELET_SERVER_PRIVATE_KEY_PATH="/etc/kubernetes/certs/kubeletserver.key"
|
||||
KUBELET_SERVER_CERT_PATH="/etc/kubernetes/certs/kubeletserver.crt"
|
||||
|
||||
openssl genrsa -out $KUBELET_SERVER_PRIVATE_KEY_PATH 2048
|
||||
openssl req -new -x509 -days 7300 -key $KUBELET_SERVER_PRIVATE_KEY_PATH -out $KUBELET_SERVER_CERT_PATH -subj "/CN=${NODE_NAME}" -addext "subjectAltName=DNS:${NODE_NAME}"
|
||||
}
|
||||
|
||||
configureK8s() {
|
||||
APISERVER_PUBLIC_KEY_PATH="/etc/kubernetes/certs/apiserver.crt"
|
||||
touch "${APISERVER_PUBLIC_KEY_PATH}"
|
||||
chmod 0644 "${APISERVER_PUBLIC_KEY_PATH}"
|
||||
chown root:root "${APISERVER_PUBLIC_KEY_PATH}"
|
||||
|
||||
AZURE_JSON_PATH="/etc/kubernetes/azure.json"
|
||||
touch "${AZURE_JSON_PATH}"
|
||||
chmod 0600 "${AZURE_JSON_PATH}"
|
||||
chown root:root "${AZURE_JSON_PATH}"
|
||||
|
||||
mkdir -p "/etc/kubernetes/certs"
|
||||
|
||||
set +x
|
||||
if [ -n "${KUBELET_CLIENT_CONTENT}" ]; then
|
||||
echo "${KUBELET_CLIENT_CONTENT}" | base64 -d > /etc/kubernetes/certs/client.key
|
||||
fi
|
||||
if [ -n "${KUBELET_CLIENT_CERT_CONTENT}" ]; then
|
||||
echo "${KUBELET_CLIENT_CERT_CONTENT}" | base64 -d > /etc/kubernetes/certs/client.crt
|
||||
fi
|
||||
if [ -n "${SERVICE_PRINCIPAL_FILE_CONTENT}" ]; then
|
||||
echo "${SERVICE_PRINCIPAL_FILE_CONTENT}" | base64 -d > /etc/kubernetes/sp.txt
|
||||
fi
|
||||
|
||||
echo "${APISERVER_PUBLIC_KEY}" | base64 --decode > "${APISERVER_PUBLIC_KEY_PATH}"
|
||||
# Perform the required JSON escaping
|
||||
SP_FILE="/etc/kubernetes/sp.txt"
|
||||
SERVICE_PRINCIPAL_CLIENT_SECRET="$(cat "$SP_FILE")"
|
||||
SERVICE_PRINCIPAL_CLIENT_SECRET=${SERVICE_PRINCIPAL_CLIENT_SECRET//\\/\\\\}
|
||||
SERVICE_PRINCIPAL_CLIENT_SECRET=${SERVICE_PRINCIPAL_CLIENT_SECRET//\"/\\\"}
|
||||
rm "$SP_FILE" # unneeded after reading from disk.
|
||||
cat << EOF > "${AZURE_JSON_PATH}"
|
||||
{
|
||||
"cloud": "${TARGET_CLOUD}",
|
||||
"tenantId": "${TENANT_ID}",
|
||||
"subscriptionId": "${SUBSCRIPTION_ID}",
|
||||
"aadClientId": "${SERVICE_PRINCIPAL_CLIENT_ID}",
|
||||
"aadClientSecret": "${SERVICE_PRINCIPAL_CLIENT_SECRET}",
|
||||
"resourceGroup": "${RESOURCE_GROUP}",
|
||||
"location": "${LOCATION}",
|
||||
"vmType": "${VM_TYPE}",
|
||||
"subnetName": "${SUBNET}",
|
||||
"securityGroupName": "${NETWORK_SECURITY_GROUP}",
|
||||
"vnetName": "${VIRTUAL_NETWORK}",
|
||||
"vnetResourceGroup": "${VIRTUAL_NETWORK_RESOURCE_GROUP}",
|
||||
"routeTableName": "${ROUTE_TABLE}",
|
||||
"primaryAvailabilitySetName": "${PRIMARY_AVAILABILITY_SET}",
|
||||
"primaryScaleSetName": "${PRIMARY_SCALE_SET}",
|
||||
"cloudProviderBackoffMode": "${CLOUDPROVIDER_BACKOFF_MODE}",
|
||||
"cloudProviderBackoff": ${CLOUDPROVIDER_BACKOFF},
|
||||
"cloudProviderBackoffRetries": ${CLOUDPROVIDER_BACKOFF_RETRIES},
|
||||
"cloudProviderBackoffExponent": ${CLOUDPROVIDER_BACKOFF_EXPONENT},
|
||||
"cloudProviderBackoffDuration": ${CLOUDPROVIDER_BACKOFF_DURATION},
|
||||
"cloudProviderBackoffJitter": ${CLOUDPROVIDER_BACKOFF_JITTER},
|
||||
"cloudProviderRateLimit": ${CLOUDPROVIDER_RATELIMIT},
|
||||
"cloudProviderRateLimitQPS": ${CLOUDPROVIDER_RATELIMIT_QPS},
|
||||
"cloudProviderRateLimitBucket": ${CLOUDPROVIDER_RATELIMIT_BUCKET},
|
||||
"cloudProviderRateLimitQPSWrite": ${CLOUDPROVIDER_RATELIMIT_QPS_WRITE},
|
||||
"cloudProviderRateLimitBucketWrite": ${CLOUDPROVIDER_RATELIMIT_BUCKET_WRITE},
|
||||
"useManagedIdentityExtension": ${USE_MANAGED_IDENTITY_EXTENSION},
|
||||
"userAssignedIdentityID": "${USER_ASSIGNED_IDENTITY_ID}",
|
||||
"useInstanceMetadata": ${USE_INSTANCE_METADATA},
|
||||
"loadBalancerSku": "${LOAD_BALANCER_SKU}",
|
||||
"disableOutboundSNAT": ${LOAD_BALANCER_DISABLE_OUTBOUND_SNAT},
|
||||
"excludeMasterFromStandardLB": ${EXCLUDE_MASTER_FROM_STANDARD_LB},
|
||||
"providerVaultName": "${KMS_PROVIDER_VAULT_NAME}",
|
||||
"maximumLoadBalancerRuleCount": ${MAXIMUM_LOADBALANCER_RULE_COUNT},
|
||||
"providerKeyName": "k8s",
|
||||
"providerKeyVersion": ""
|
||||
}
|
||||
EOF
|
||||
set -x
|
||||
if [[ "${CLOUDPROVIDER_BACKOFF_MODE}" = "v2" ]]; then
|
||||
sed -i "/cloudProviderBackoffExponent/d" /etc/kubernetes/azure.json
|
||||
sed -i "/cloudProviderBackoffJitter/d" /etc/kubernetes/azure.json
|
||||
fi
|
||||
|
||||
configureKubeletServerCert
|
||||
if [ "${IS_CUSTOM_CLOUD}" == "true" ]; then
|
||||
set +x
|
||||
AKS_CUSTOM_CLOUD_JSON_PATH="/etc/kubernetes/${TARGET_ENVIRONMENT}.json"
|
||||
touch "${AKS_CUSTOM_CLOUD_JSON_PATH}"
|
||||
chmod 0600 "${AKS_CUSTOM_CLOUD_JSON_PATH}"
|
||||
chown root:root "${AKS_CUSTOM_CLOUD_JSON_PATH}"
|
||||
|
||||
echo "${CUSTOM_ENV_JSON}" | base64 -d > "${AKS_CUSTOM_CLOUD_JSON_PATH}"
|
||||
set -x
|
||||
fi
|
||||
|
||||
if [ "${KUBELET_CONFIG_FILE_ENABLED}" == "true" ]; then
|
||||
set +x
|
||||
KUBELET_CONFIG_JSON_PATH="/etc/default/kubeletconfig.json"
|
||||
touch "${KUBELET_CONFIG_JSON_PATH}"
|
||||
chmod 0600 "${KUBELET_CONFIG_JSON_PATH}"
|
||||
chown root:root "${KUBELET_CONFIG_JSON_PATH}"
|
||||
echo "${KUBELET_CONFIG_FILE_CONTENT}" | base64 -d > "${KUBELET_CONFIG_JSON_PATH}"
|
||||
set -x
|
||||
KUBELET_CONFIG_DROP_IN="/etc/systemd/system/kubelet.service.d/10-componentconfig.conf"
|
||||
touch "${KUBELET_CONFIG_DROP_IN}"
|
||||
chmod 0600 "${KUBELET_CONFIG_DROP_IN}"
|
||||
tee "${KUBELET_CONFIG_DROP_IN}" > /dev/null <<EOF
|
||||
[Service]
|
||||
Environment="KUBELET_CONFIG_FILE_FLAGS=--config /etc/default/kubeletconfig.json"
|
||||
EOF
|
||||
fi
|
||||
}
|
||||
|
||||
configureCNI() {
|
||||
# needed for the iptables rules to work on bridges
|
||||
retrycmd_if_failure 120 5 25 modprobe br_netfilter || exit $ERR_MODPROBE_FAIL
|
||||
echo -n "br_netfilter" > /etc/modules-load.d/br_netfilter.conf
|
||||
configureCNIIPTables
|
||||
}
|
||||
|
||||
configureCNIIPTables() {
|
||||
if [[ "${NETWORK_PLUGIN}" = "azure" ]]; then
|
||||
mv $CNI_BIN_DIR/10-azure.conflist $CNI_CONFIG_DIR/
|
||||
chmod 600 $CNI_CONFIG_DIR/10-azure.conflist
|
||||
if [[ "${NETWORK_POLICY}" == "calico" ]]; then
|
||||
sed -i 's#"mode":"bridge"#"mode":"transparent"#g' $CNI_CONFIG_DIR/10-azure.conflist
|
||||
elif [[ "${NETWORK_POLICY}" == "" || "${NETWORK_POLICY}" == "none" ]] && [[ "${NETWORK_MODE}" == "transparent" ]]; then
|
||||
sed -i 's#"mode":"bridge"#"mode":"transparent"#g' $CNI_CONFIG_DIR/10-azure.conflist
|
||||
fi
|
||||
/sbin/ebtables -t nat --list
|
||||
fi
|
||||
}
|
||||
|
||||
disableSystemdResolved() {
|
||||
ls -ltr /etc/resolv.conf
|
||||
cat /etc/resolv.conf
|
||||
UBUNTU_RELEASE=$(lsb_release -r -s)
|
||||
if [[ "${UBUNTU_RELEASE}" == "18.04" || "${UBUNTU_RELEASE}" == "20.04" || "${UBUNTU_RELEASE}" == "22.04" ]]; then
|
||||
echo "Ingorings systemd-resolved query service but using its resolv.conf file"
|
||||
echo "This is the simplest approach to workaround resolved issues without completely uninstall it"
|
||||
[ -f /run/systemd/resolve/resolv.conf ] && sudo ln -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf
|
||||
ls -ltr /etc/resolv.conf
|
||||
cat /etc/resolv.conf
|
||||
fi
|
||||
}
|
||||
|
||||
ensureContainerd() {
|
||||
if [ "${TELEPORT_ENABLED}" == "true" ]; then
|
||||
ensureTeleportd
|
||||
fi
|
||||
mkdir -p "/etc/systemd/system/containerd.service.d"
|
||||
tee "/etc/systemd/system/containerd.service.d/exec_start.conf" > /dev/null <<EOF
|
||||
[Service]
|
||||
ExecStartPost=/sbin/iptables -P FORWARD ACCEPT
|
||||
EOF
|
||||
|
||||
if [ "${ARTIFACT_STREAMING_ENABLED}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.ensureContainerd.ensureArtifactStreaming" ensureArtifactStreaming || exit $ERR_ARTIFACT_STREAMING_INSTALL
|
||||
fi
|
||||
|
||||
mkdir -p /etc/containerd
|
||||
if [[ "${GPU_NODE}" = true ]] && [[ "${skip_nvidia_driver_install}" == "true" ]]; then
|
||||
echo "Generating non-GPU containerd config for GPU node due to VM tags"
|
||||
echo "${CONTAINERD_CONFIG_NO_GPU_CONTENT}" | base64 -d > /etc/containerd/config.toml || exit $ERR_FILE_WATCH_TIMEOUT
|
||||
else
|
||||
echo "Generating containerd config..."
|
||||
echo "${CONTAINERD_CONFIG_CONTENT}" | base64 -d > /etc/containerd/config.toml || exit $ERR_FILE_WATCH_TIMEOUT
|
||||
fi
|
||||
|
||||
tee "/etc/sysctl.d/99-force-bridge-forward.conf" > /dev/null <<EOF
|
||||
net.ipv4.ip_forward = 1
|
||||
net.ipv4.conf.all.forwarding = 1
|
||||
net.ipv6.conf.all.forwarding = 1
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
EOF
|
||||
retrycmd_if_failure 120 5 25 sysctl --system || exit $ERR_SYSCTL_RELOAD
|
||||
systemctl is-active --quiet docker && (systemctl_disable 20 30 120 docker || exit $ERR_SYSTEMD_DOCKER_STOP_FAIL)
|
||||
systemctlEnableAndStart containerd || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
}
|
||||
|
||||
ensureNoDupOnPromiscuBridge() {
|
||||
systemctlEnableAndStart ensure-no-dup || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
}
|
||||
|
||||
ensureTeleportd() {
|
||||
systemctlEnableAndStart teleportd || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
}
|
||||
|
||||
ensureArtifactStreaming() {
|
||||
systemctl enable acr-mirror.service
|
||||
systemctl start acr-mirror.service
|
||||
sudo /opt/acr/tools/overlaybd/install.sh
|
||||
sudo /opt/acr/tools/overlaybd/enable-http-auth.sh
|
||||
modprobe target_core_user
|
||||
curl -X PUT 'localhost:8578/config?ns=_default&enable_suffix=azurecr.io&stream_format=overlaybd' -O
|
||||
systemctl enable /opt/overlaybd/overlaybd-tcmu.service
|
||||
systemctl enable /opt/overlaybd/snapshotter/overlaybd-snapshotter.service
|
||||
systemctl start overlaybd-tcmu
|
||||
systemctl start overlaybd-snapshotter
|
||||
systemctl start acr-nodemon
|
||||
}
|
||||
|
||||
ensureDocker() {
|
||||
DOCKER_SERVICE_EXEC_START_FILE=/etc/systemd/system/docker.service.d/exec_start.conf
|
||||
usermod -aG docker ${ADMINUSER}
|
||||
DOCKER_MOUNT_FLAGS_SYSTEMD_FILE=/etc/systemd/system/docker.service.d/clear_mount_propagation_flags.conf
|
||||
DOCKER_JSON_FILE=/etc/docker/daemon.json
|
||||
for i in $(seq 1 1200); do
|
||||
if [ -s $DOCKER_JSON_FILE ]; then
|
||||
jq '.' < $DOCKER_JSON_FILE && break
|
||||
fi
|
||||
if [ $i -eq 1200 ]; then
|
||||
exit $ERR_FILE_WATCH_TIMEOUT
|
||||
else
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
systemctl is-active --quiet containerd && (systemctl_disable 20 30 120 containerd || exit $ERR_SYSTEMD_CONTAINERD_STOP_FAIL)
|
||||
systemctlEnableAndStart docker || exit $ERR_DOCKER_START_FAIL
|
||||
|
||||
}
|
||||
|
||||
ensureDHCPv6() {
|
||||
systemctlEnableAndStart dhcpv6 || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
retrycmd_if_failure 120 5 25 modprobe ip6_tables || exit $ERR_MODPROBE_FAIL
|
||||
}
|
||||
|
||||
ensureKubelet() {
|
||||
KUBELET_DEFAULT_FILE=/etc/default/kubelet
|
||||
mkdir -p /etc/default
|
||||
|
||||
# In k8s >= 1.29 kubelet no longer sets node internalIP when using external cloud provider
|
||||
# https://github.com/kubernetes/kubernetes/pull/121028
|
||||
# This regresses node startup performance in Azure CNI Overlay and Podsubnet clusters, which require the node to be
|
||||
# assigned an internal IP before configuring pod networking.
|
||||
# To improve node startup performance, explicitly set `--node-ip` to the IP returned from IMDS so kubelet sets
|
||||
# the internal IP when it registers the node.
|
||||
# If this fails, skip setting --node-ip, which is safe because cloud-node-manager will assign it later anyway.
|
||||
if semverCompare ${KUBERNETES_VERSION:-"0.0.0"} "1.29.0"; then
|
||||
logs_to_events "AKS.CSE.ensureKubelet.setKubeletNodeIPFlag" setKubeletNodeIPFlag
|
||||
fi
|
||||
|
||||
echo "KUBELET_FLAGS=${KUBELET_FLAGS}" > "${KUBELET_DEFAULT_FILE}"
|
||||
echo "KUBELET_REGISTER_SCHEDULABLE=true" >> "${KUBELET_DEFAULT_FILE}"
|
||||
echo "NETWORK_POLICY=${NETWORK_POLICY}" >> "${KUBELET_DEFAULT_FILE}"
|
||||
echo "KUBELET_IMAGE=${KUBELET_IMAGE}" >> "${KUBELET_DEFAULT_FILE}"
|
||||
echo "KUBELET_NODE_LABELS=${KUBELET_NODE_LABELS}" >> "${KUBELET_DEFAULT_FILE}"
|
||||
if [ -n "${AZURE_ENVIRONMENT_FILEPATH}" ]; then
|
||||
echo "AZURE_ENVIRONMENT_FILEPATH=${AZURE_ENVIRONMENT_FILEPATH}" >> "${KUBELET_DEFAULT_FILE}"
|
||||
fi
|
||||
|
||||
KUBE_CA_FILE="/etc/kubernetes/certs/ca.crt"
|
||||
mkdir -p "$(dirname "${KUBE_CA_FILE}")"
|
||||
echo "${KUBE_CA_CRT}" | base64 -d > "${KUBE_CA_FILE}"
|
||||
chmod 0600 "${KUBE_CA_FILE}"
|
||||
|
||||
if [ "${ENABLE_TLS_BOOTSTRAPPING}" == "true" ]; then
|
||||
KUBELET_TLS_DROP_IN="/etc/systemd/system/kubelet.service.d/10-tlsbootstrap.conf"
|
||||
mkdir -p "$(dirname "${KUBELET_TLS_DROP_IN}")"
|
||||
touch "${KUBELET_TLS_DROP_IN}"
|
||||
chmod 0600 "${KUBELET_TLS_DROP_IN}"
|
||||
tee "${KUBELET_TLS_DROP_IN}" > /dev/null <<EOF
|
||||
[Service]
|
||||
Environment="KUBELET_TLS_BOOTSTRAP_FLAGS=--kubeconfig /var/lib/kubelet/kubeconfig --bootstrap-kubeconfig /var/lib/kubelet/bootstrap-kubeconfig"
|
||||
EOF
|
||||
BOOTSTRAP_KUBECONFIG_FILE=/var/lib/kubelet/bootstrap-kubeconfig
|
||||
mkdir -p "$(dirname "${BOOTSTRAP_KUBECONFIG_FILE}")"
|
||||
touch "${BOOTSTRAP_KUBECONFIG_FILE}"
|
||||
chmod 0644 "${BOOTSTRAP_KUBECONFIG_FILE}"
|
||||
tee "${BOOTSTRAP_KUBECONFIG_FILE}" > /dev/null <<EOF
|
||||
apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- name: localcluster
|
||||
cluster:
|
||||
certificate-authority: /etc/kubernetes/certs/ca.crt
|
||||
server: https://${API_SERVER_NAME}:443
|
||||
users:
|
||||
- name: kubelet-bootstrap
|
||||
user:
|
||||
token: "${TLS_BOOTSTRAP_TOKEN}"
|
||||
contexts:
|
||||
- context:
|
||||
cluster: localcluster
|
||||
user: kubelet-bootstrap
|
||||
name: bootstrap-context
|
||||
current-context: bootstrap-context
|
||||
EOF
|
||||
else
|
||||
KUBECONFIG_FILE=/var/lib/kubelet/kubeconfig
|
||||
mkdir -p "$(dirname "${KUBECONFIG_FILE}")"
|
||||
touch "${KUBECONFIG_FILE}"
|
||||
chmod 0644 "${KUBECONFIG_FILE}"
|
||||
tee "${KUBECONFIG_FILE}" > /dev/null <<EOF
|
||||
apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- name: localcluster
|
||||
cluster:
|
||||
certificate-authority: /etc/kubernetes/certs/ca.crt
|
||||
server: https://${API_SERVER_NAME}:443
|
||||
users:
|
||||
- name: client
|
||||
user:
|
||||
client-certificate: /etc/kubernetes/certs/client.crt
|
||||
client-key: /etc/kubernetes/certs/client.key
|
||||
contexts:
|
||||
- context:
|
||||
cluster: localcluster
|
||||
user: client
|
||||
name: localclustercontext
|
||||
current-context: localclustercontext
|
||||
EOF
|
||||
fi
|
||||
KUBELET_RUNTIME_CONFIG_SCRIPT_FILE=/opt/azure/containers/kubelet.sh
|
||||
tee "${KUBELET_RUNTIME_CONFIG_SCRIPT_FILE}" > /dev/null <<EOF
|
||||
#!/bin/bash
|
||||
# Disallow container from reaching out to the special IP address 168.63.129.16
|
||||
# for TCP protocol (which http uses)
|
||||
#
|
||||
# 168.63.129.16 contains protected settings that have priviledged info.
|
||||
#
|
||||
# The host can still reach 168.63.129.16 because it goes through the OUTPUT chain, not FORWARD.
|
||||
#
|
||||
# Note: we should not block all traffic to 168.63.129.16. For example UDP traffic is still needed
|
||||
# for DNS.
|
||||
iptables -I FORWARD -d 168.63.129.16 -p tcp --dport 80 -j DROP
|
||||
EOF
|
||||
systemctlEnableAndStart kubelet || exit $ERR_KUBELET_START_FAIL
|
||||
}
|
||||
|
||||
ensureMigPartition(){
|
||||
mkdir -p /etc/systemd/system/mig-partition.service.d/
|
||||
touch /etc/systemd/system/mig-partition.service.d/10-mig-profile.conf
|
||||
tee /etc/systemd/system/mig-partition.service.d/10-mig-profile.conf > /dev/null <<EOF
|
||||
[Service]
|
||||
Environment="GPU_INSTANCE_PROFILE=${GPU_INSTANCE_PROFILE}"
|
||||
EOF
|
||||
# this is expected to fail and work only on next reboot
|
||||
# it MAY succeed, only due to unreliability of systemd
|
||||
# service type=Simple, which does not exit non-zero
|
||||
# on failure if ExecStart failed to invoke.
|
||||
systemctlEnableAndStart mig-partition
|
||||
}
|
||||
|
||||
ensureSysctl() {
|
||||
SYSCTL_CONFIG_FILE=/etc/sysctl.d/999-sysctl-aks.conf
|
||||
mkdir -p "$(dirname "${SYSCTL_CONFIG_FILE}")"
|
||||
touch "${SYSCTL_CONFIG_FILE}"
|
||||
chmod 0644 "${SYSCTL_CONFIG_FILE}"
|
||||
echo "${SYSCTL_CONTENT}" | base64 -d > "${SYSCTL_CONFIG_FILE}"
|
||||
retrycmd_if_failure 24 5 25 sysctl --system
|
||||
}
|
||||
|
||||
ensureK8sControlPlane() {
|
||||
if $REBOOTREQUIRED || [ "$NO_OUTBOUND" = "true" ]; then
|
||||
return
|
||||
fi
|
||||
retrycmd_if_failure 120 5 25 $KUBECTL 2>/dev/null cluster-info || exit $ERR_K8S_RUNNING_TIMEOUT
|
||||
}
|
||||
|
||||
createKubeManifestDir() {
|
||||
KUBEMANIFESTDIR=/etc/kubernetes/manifests
|
||||
mkdir -p $KUBEMANIFESTDIR
|
||||
}
|
||||
|
||||
writeKubeConfig() {
|
||||
KUBECONFIGDIR=/home/$ADMINUSER/.kube
|
||||
KUBECONFIGFILE=$KUBECONFIGDIR/config
|
||||
mkdir -p $KUBECONFIGDIR
|
||||
touch $KUBECONFIGFILE
|
||||
chown $ADMINUSER:$ADMINUSER $KUBECONFIGDIR
|
||||
chown $ADMINUSER:$ADMINUSER $KUBECONFIGFILE
|
||||
chmod 700 $KUBECONFIGDIR
|
||||
chmod 600 $KUBECONFIGFILE
|
||||
set +x
|
||||
echo "
|
||||
---
|
||||
apiVersion: v1
|
||||
clusters:
|
||||
- cluster:
|
||||
certificate-authority-data: \"$CA_CERTIFICATE\"
|
||||
server: $KUBECONFIG_SERVER
|
||||
name: \"$MASTER_FQDN\"
|
||||
contexts:
|
||||
- context:
|
||||
cluster: \"$MASTER_FQDN\"
|
||||
user: \"$MASTER_FQDN-admin\"
|
||||
name: \"$MASTER_FQDN\"
|
||||
current-context: \"$MASTER_FQDN\"
|
||||
kind: Config
|
||||
users:
|
||||
- name: \"$MASTER_FQDN-admin\"
|
||||
user:
|
||||
client-certificate-data: \"$KUBECONFIG_CERTIFICATE\"
|
||||
client-key-data: \"$KUBECONFIG_KEY\"
|
||||
" > $KUBECONFIGFILE
|
||||
set -x
|
||||
}
|
||||
|
||||
configClusterAutoscalerAddon() {
|
||||
CLUSTER_AUTOSCALER_ADDON_FILE=/etc/kubernetes/addons/cluster-autoscaler-deployment.yaml
|
||||
sed -i "s|<clientID>|$(echo $SERVICE_PRINCIPAL_CLIENT_ID | base64)|g" $CLUSTER_AUTOSCALER_ADDON_FILE
|
||||
sed -i "s|<clientSec>|$(echo $SERVICE_PRINCIPAL_CLIENT_SECRET | base64)|g" $CLUSTER_AUTOSCALER_ADDON_FILE
|
||||
sed -i "s|<subID>|$(echo $SUBSCRIPTION_ID | base64)|g" $CLUSTER_AUTOSCALER_ADDON_FILE
|
||||
sed -i "s|<tenantID>|$(echo $TENANT_ID | base64)|g" $CLUSTER_AUTOSCALER_ADDON_FILE
|
||||
sed -i "s|<rg>|$(echo $RESOURCE_GROUP | base64)|g" $CLUSTER_AUTOSCALER_ADDON_FILE
|
||||
}
|
||||
|
||||
configACIConnectorAddon() {
|
||||
ACI_CONNECTOR_CREDENTIALS=$(printf "{\"clientId\": \"%s\", \"clientSecret\": \"%s\", \"tenantId\": \"%s\", \"subscriptionId\": \"%s\", \"activeDirectoryEndpointUrl\": \"https://login.microsoftonline.com\",\"resourceManagerEndpointUrl\": \"https://management.azure.com/\", \"activeDirectoryGraphResourceId\": \"https://graph.windows.net/\", \"sqlManagementEndpointUrl\": \"https://management.core.windows.net:8443/\", \"galleryEndpointUrl\": \"https://gallery.azure.com/\", \"managementEndpointUrl\": \"https://management.core.windows.net/\"}" "$SERVICE_PRINCIPAL_CLIENT_ID" "$SERVICE_PRINCIPAL_CLIENT_SECRET" "$TENANT_ID" "$SUBSCRIPTION_ID" | base64 -w 0)
|
||||
|
||||
openssl req -newkey rsa:4096 -new -nodes -x509 -days 3650 -keyout /etc/kubernetes/certs/aci-connector-key.pem -out /etc/kubernetes/certs/aci-connector-cert.pem -subj "/C=US/ST=CA/L=virtualkubelet/O=virtualkubelet/OU=virtualkubelet/CN=virtualkubelet"
|
||||
ACI_CONNECTOR_KEY=$(base64 /etc/kubernetes/certs/aci-connector-key.pem -w0)
|
||||
ACI_CONNECTOR_CERT=$(base64 /etc/kubernetes/certs/aci-connector-cert.pem -w0)
|
||||
|
||||
ACI_CONNECTOR_ADDON_FILE=/etc/kubernetes/addons/aci-connector-deployment.yaml
|
||||
sed -i "s|<creds>|$ACI_CONNECTOR_CREDENTIALS|g" $ACI_CONNECTOR_ADDON_FILE
|
||||
sed -i "s|<rgName>|$RESOURCE_GROUP|g" $ACI_CONNECTOR_ADDON_FILE
|
||||
sed -i "s|<cert>|$ACI_CONNECTOR_CERT|g" $ACI_CONNECTOR_ADDON_FILE
|
||||
sed -i "s|<key>|$ACI_CONNECTOR_KEY|g" $ACI_CONNECTOR_ADDON_FILE
|
||||
}
|
||||
|
||||
configAzurePolicyAddon() {
|
||||
AZURE_POLICY_ADDON_FILE=/etc/kubernetes/addons/azure-policy-deployment.yaml
|
||||
sed -i "s|<resourceId>|/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROUP|g" $AZURE_POLICY_ADDON_FILE
|
||||
}
|
||||
|
||||
configGPUDrivers() {
|
||||
# install gpu driver
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]]; then
|
||||
mkdir -p /opt/{actions,gpu}
|
||||
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
|
||||
ctr image pull $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
|
||||
retrycmd_if_failure 5 10 600 bash -c "$CTR_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG gpuinstall /entrypoint.sh install"
|
||||
ret=$?
|
||||
if [[ "$ret" != "0" ]]; then
|
||||
echo "Failed to install GPU driver, exiting..."
|
||||
exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
fi
|
||||
ctr images rm --sync $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
|
||||
else
|
||||
bash -c "$DOCKER_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG install"
|
||||
ret=$?
|
||||
if [[ "$ret" != "0" ]]; then
|
||||
echo "Failed to install GPU driver, exiting..."
|
||||
exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
fi
|
||||
docker rmi $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
|
||||
fi
|
||||
elif isMarinerOrAzureLinux "$OS"; then
|
||||
downloadGPUDrivers
|
||||
installNvidiaContainerToolkit
|
||||
enableNvidiaPersistenceMode
|
||||
else
|
||||
echo "os $OS not supported at this time. skipping configGPUDrivers"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
retrycmd_if_failure 120 5 25 nvidia-modprobe -u -c0 || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
retrycmd_if_failure 120 5 300 nvidia-smi || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
retrycmd_if_failure 120 5 25 ldconfig || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
|
||||
# Fix the NVIDIA /dev/char link issue
|
||||
if isMarinerOrAzureLinux "$OS"; then
|
||||
createNvidiaSymlinkToAllDeviceNodes
|
||||
fi
|
||||
|
||||
# reload containerd/dockerd
|
||||
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
|
||||
retrycmd_if_failure 120 5 25 pkill -SIGHUP containerd || exit $ERR_GPU_DRIVERS_INSTALL_TIMEOUT
|
||||
else
|
||||
retrycmd_if_failure 120 5 25 pkill -SIGHUP dockerd || exit $ERR_GPU_DRIVERS_INSTALL_TIMEOUT
|
||||
fi
|
||||
}
|
||||
|
||||
validateGPUDrivers() {
|
||||
if [[ $(isARM64) == 1 ]]; then
|
||||
# no GPU on ARM64
|
||||
return
|
||||
fi
|
||||
|
||||
retrycmd_if_failure 24 5 25 nvidia-modprobe -u -c0 && echo "gpu driver loaded" || configGPUDrivers || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
which nvidia-smi
|
||||
if [[ $? == 0 ]]; then
|
||||
SMI_RESULT=$(retrycmd_if_failure 24 5 300 nvidia-smi)
|
||||
else
|
||||
SMI_RESULT=$(retrycmd_if_failure 24 5 300 $GPU_DEST/bin/nvidia-smi)
|
||||
fi
|
||||
SMI_STATUS=$?
|
||||
if [[ $SMI_STATUS != 0 ]]; then
|
||||
if [[ $SMI_RESULT == *"infoROM is corrupted"* ]]; then
|
||||
exit $ERR_GPU_INFO_ROM_CORRUPTED
|
||||
else
|
||||
exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
fi
|
||||
else
|
||||
echo "gpu driver working fine"
|
||||
fi
|
||||
}
|
||||
|
||||
ensureGPUDrivers() {
|
||||
if [[ $(isARM64) == 1 ]]; then
|
||||
# no GPU on ARM64
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ "${CONFIG_GPU_DRIVER_IF_NEEDED}" = true ]]; then
|
||||
logs_to_events "AKS.CSE.ensureGPUDrivers.configGPUDrivers" configGPUDrivers
|
||||
else
|
||||
logs_to_events "AKS.CSE.ensureGPUDrivers.validateGPUDrivers" validateGPUDrivers
|
||||
fi
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]]; then
|
||||
logs_to_events "AKS.CSE.ensureGPUDrivers.nvidia-modprobe" "systemctlEnableAndStart nvidia-modprobe" || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
fi
|
||||
}
|
||||
|
||||
disableSSH() {
|
||||
systemctlDisableAndStop ssh || exit $ERR_DISABLE_SSH
|
||||
}
|
||||
|
||||
setKubeletNodeIPFlag() {
|
||||
imdsOutput=$(curl -s -H Metadata:true --noproxy "*" --max-time 5 "http://169.254.169.254/metadata/instance/network/interface?api-version=2021-02-01" 2> /dev/null)
|
||||
if [[ $? -eq 0 ]]; then
|
||||
nodeIPAddrs=()
|
||||
ipv4Addr=$(echo $imdsOutput | jq -r '.[0].ipv4.ipAddress[0].privateIpAddress // ""')
|
||||
[ -n "$ipv4Addr" ] && nodeIPAddrs+=("$ipv4Addr")
|
||||
ipv6Addr=$(echo $imdsOutput | jq -r '.[0].ipv6.ipAddress[0].privateIpAddress // ""')
|
||||
[ -n "$ipv6Addr" ] && nodeIPAddrs+=("$ipv6Addr")
|
||||
nodeIPArg=$(IFS=, ; echo "${nodeIPAddrs[*]}") # join, comma-separated
|
||||
if [ -n "$nodeIPArg" ]; then
|
||||
echo "Adding --node-ip=$nodeIPArg to kubelet flags"
|
||||
KUBELET_FLAGS="$KUBELET_FLAGS --node-ip=$nodeIPArg"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
#EOF
|
|
@ -1,400 +0,0 @@
|
|||
#!/bin/bash
|
||||
# ERR_SYSTEMCTL_ENABLE_FAIL=3 Service could not be enabled by systemctl -- DEPRECATED
|
||||
ERR_SYSTEMCTL_START_FAIL=4 # Service could not be started or enabled by systemctl
|
||||
ERR_CLOUD_INIT_TIMEOUT=5 # Timeout waiting for cloud-init runcmd to complete
|
||||
ERR_FILE_WATCH_TIMEOUT=6 # Timeout waiting for a file
|
||||
ERR_HOLD_WALINUXAGENT=7 # Unable to place walinuxagent apt package on hold during install
|
||||
ERR_RELEASE_HOLD_WALINUXAGENT=8 # Unable to release hold on walinuxagent apt package after install
|
||||
ERR_APT_INSTALL_TIMEOUT=9 # Timeout installing required apt packages
|
||||
ERR_DOCKER_INSTALL_TIMEOUT=20 # Timeout waiting for docker install
|
||||
ERR_DOCKER_DOWNLOAD_TIMEOUT=21 # Timout waiting for docker downloads
|
||||
ERR_DOCKER_KEY_DOWNLOAD_TIMEOUT=22 # Timeout waiting to download docker repo key
|
||||
ERR_DOCKER_APT_KEY_TIMEOUT=23 # Timeout waiting for docker apt-key
|
||||
ERR_DOCKER_START_FAIL=24 # Docker could not be started by systemctl
|
||||
ERR_MOBY_APT_LIST_TIMEOUT=25 # Timeout waiting for moby apt sources
|
||||
ERR_MS_GPG_KEY_DOWNLOAD_TIMEOUT=26 # Timeout waiting for MS GPG key download
|
||||
ERR_MOBY_INSTALL_TIMEOUT=27 # Timeout waiting for moby-docker install
|
||||
ERR_CONTAINERD_INSTALL_TIMEOUT=28 # Timeout waiting for moby-containerd install
|
||||
ERR_RUNC_INSTALL_TIMEOUT=29 # Timeout waiting for moby-runc install
|
||||
ERR_K8S_RUNNING_TIMEOUT=30 # Timeout waiting for k8s cluster to be healthy
|
||||
ERR_K8S_DOWNLOAD_TIMEOUT=31 # Timeout waiting for Kubernetes downloads
|
||||
ERR_KUBECTL_NOT_FOUND=32 # kubectl client binary not found on local disk
|
||||
ERR_IMG_DOWNLOAD_TIMEOUT=33 # Timeout waiting for img download
|
||||
ERR_KUBELET_START_FAIL=34 # kubelet could not be started by systemctl
|
||||
ERR_DOCKER_IMG_PULL_TIMEOUT=35 # Timeout trying to pull a Docker image
|
||||
ERR_CONTAINERD_CTR_IMG_PULL_TIMEOUT=36 # Timeout trying to pull a containerd image via cli tool ctr
|
||||
ERR_CONTAINERD_CRICTL_IMG_PULL_TIMEOUT=37 # Timeout trying to pull a containerd image via cli tool crictl
|
||||
ERR_CONTAINERD_INSTALL_FILE_NOT_FOUND=38 # Unable to locate containerd debian pkg file
|
||||
ERR_CNI_DOWNLOAD_TIMEOUT=41 # Timeout waiting for CNI downloads
|
||||
ERR_MS_PROD_DEB_DOWNLOAD_TIMEOUT=42 # Timeout waiting for https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb
|
||||
ERR_MS_PROD_DEB_PKG_ADD_FAIL=43 # Failed to add repo pkg file
|
||||
# ERR_FLEXVOLUME_DOWNLOAD_TIMEOUT=44 Failed to add repo pkg file -- DEPRECATED
|
||||
ERR_ORAS_DOWNLOAD_ERROR=45 # Unable to install oras
|
||||
ERR_SYSTEMD_INSTALL_FAIL=48 # Unable to install required systemd version
|
||||
ERR_MODPROBE_FAIL=49 # Unable to load a kernel module using modprobe
|
||||
ERR_OUTBOUND_CONN_FAIL=50 # Unable to establish outbound connection
|
||||
ERR_K8S_API_SERVER_CONN_FAIL=51 # Unable to establish connection to k8s api serve
|
||||
ERR_K8S_API_SERVER_DNS_LOOKUP_FAIL=52 # Unable to resolve k8s api server name
|
||||
ERR_K8S_API_SERVER_AZURE_DNS_LOOKUP_FAIL=53 # Unable to resolve k8s api server name due to Azure DNS issue
|
||||
ERR_KATA_KEY_DOWNLOAD_TIMEOUT=60 # Timeout waiting to download kata repo key
|
||||
ERR_KATA_APT_KEY_TIMEOUT=61 # Timeout waiting for kata apt-key
|
||||
ERR_KATA_INSTALL_TIMEOUT=62 # Timeout waiting for kata install
|
||||
ERR_VHD_FILE_NOT_FOUND=65 # VHD log file not found on VM built from VHD distro (previously classified as exit code 124)
|
||||
ERR_CONTAINERD_DOWNLOAD_TIMEOUT=70 # Timeout waiting for containerd downloads
|
||||
ERR_RUNC_DOWNLOAD_TIMEOUT=71 # Timeout waiting for runc downloads
|
||||
ERR_CUSTOM_SEARCH_DOMAINS_FAIL=80 # Unable to configure custom search domains
|
||||
ERR_GPU_DOWNLOAD_TIMEOUT=83 # Timeout waiting for GPU driver download
|
||||
ERR_GPU_DRIVERS_START_FAIL=84 # nvidia-modprobe could not be started by systemctl
|
||||
ERR_GPU_DRIVERS_INSTALL_TIMEOUT=85 # Timeout waiting for GPU drivers install
|
||||
ERR_GPU_DEVICE_PLUGIN_START_FAIL=86 # nvidia device plugin could not be started by systemctl
|
||||
ERR_GPU_INFO_ROM_CORRUPTED=87 # info ROM corrupted error when executing nvidia-smi
|
||||
ERR_SGX_DRIVERS_INSTALL_TIMEOUT=90 # Timeout waiting for SGX prereqs to download
|
||||
ERR_SGX_DRIVERS_START_FAIL=91 # Failed to execute SGX driver binary
|
||||
ERR_APT_DAILY_TIMEOUT=98 # Timeout waiting for apt daily updates
|
||||
ERR_APT_UPDATE_TIMEOUT=99 # Timeout waiting for apt-get update to complete
|
||||
ERR_CSE_PROVISION_SCRIPT_NOT_READY_TIMEOUT=100 # Timeout waiting for cloud-init to place this script on the vm
|
||||
ERR_APT_DIST_UPGRADE_TIMEOUT=101 # Timeout waiting for apt-get dist-upgrade to complete
|
||||
ERR_APT_PURGE_FAIL=102 # Error purging distro packages
|
||||
ERR_SYSCTL_RELOAD=103 # Error reloading sysctl config
|
||||
ERR_CIS_ASSIGN_ROOT_PW=111 # Error assigning root password in CIS enforcement
|
||||
ERR_CIS_ASSIGN_FILE_PERMISSION=112 # Error assigning permission to a file in CIS enforcement
|
||||
ERR_PACKER_COPY_FILE=113 # Error writing a file to disk during VHD CI
|
||||
ERR_CIS_APPLY_PASSWORD_CONFIG=115 # Error applying CIS-recommended passwd configuration
|
||||
ERR_SYSTEMD_DOCKER_STOP_FAIL=116 # Error stopping dockerd
|
||||
ERR_CRICTL_DOWNLOAD_TIMEOUT=117 # Timeout waiting for crictl downloads
|
||||
ERR_CRICTL_OPERATION_ERROR=118 # Error executing a crictl operation
|
||||
ERR_CTR_OPERATION_ERROR=119 # Error executing a ctr containerd cli operation
|
||||
|
||||
# Azure Stack specific errors
|
||||
ERR_AZURE_STACK_GET_ARM_TOKEN=120 # Error generating a token to use with Azure Resource Manager
|
||||
ERR_AZURE_STACK_GET_NETWORK_CONFIGURATION=121 # Error fetching the network configuration for the node
|
||||
ERR_AZURE_STACK_GET_SUBNET_PREFIX=122 # Error fetching the subnet address prefix for a subnet ID
|
||||
|
||||
# Error code 124 is returned when a `timeout` command times out, and --preserve-status is not specified: https://man7.org/linux/man-pages/man1/timeout.1.html
|
||||
ERR_VHD_BUILD_ERROR=125 # Reserved for VHD CI exit conditions
|
||||
|
||||
ERR_SWAP_CREATE_FAIL=130 # Error allocating swap file
|
||||
ERR_SWAP_CREATE_INSUFFICIENT_DISK_SPACE=131 # Error insufficient disk space for swap file creation
|
||||
|
||||
ERR_TELEPORTD_DOWNLOAD_ERR=150 # Error downloading teleportd binary
|
||||
ERR_TELEPORTD_INSTALL_ERR=151 # Error installing teleportd binary
|
||||
ERR_ARTIFACT_STREAMING_DOWNLOAD=152 # Error downloading mirror proxy and overlaybd components
|
||||
ERR_ARTIFACT_STREAMING_INSTALL=153 # Error installing mirror proxy and overlaybd components
|
||||
|
||||
ERR_HTTP_PROXY_CA_CONVERT=160 # Error converting http proxy ca cert from pem to crt format
|
||||
ERR_UPDATE_CA_CERTS=161 # Error updating ca certs to include user-provided certificates
|
||||
|
||||
ERR_DISBALE_IPTABLES=170 # Error disabling iptables service
|
||||
|
||||
ERR_KRUSTLET_DOWNLOAD_TIMEOUT=171 # Timeout waiting for krustlet downloads
|
||||
ERR_DISABLE_SSH=172 # Error disabling ssh service
|
||||
|
||||
ERR_VHD_REBOOT_REQUIRED=200 # Reserved for VHD reboot required exit condition
|
||||
ERR_NO_PACKAGES_FOUND=201 # Reserved for no security packages found exit condition
|
||||
|
||||
ERR_SYSTEMCTL_MASK_FAIL=2 # Service could not be masked by systemctl
|
||||
|
||||
OS=$(sort -r /etc/*-release | gawk 'match($0, /^(ID_LIKE=(coreos)|ID=(.*))$/, a) { print toupper(a[2] a[3]); exit }')
|
||||
OS_VERSION=$(sort -r /etc/*-release | gawk 'match($0, /^(VERSION_ID=(.*))$/, a) { print toupper(a[2] a[3]); exit }' | tr -d '"')
|
||||
UBUNTU_OS_NAME="UBUNTU"
|
||||
MARINER_OS_NAME="MARINER"
|
||||
AZURELINUX_OS_NAME="AZURELINUX"
|
||||
KUBECTL=/usr/local/bin/kubectl
|
||||
DOCKER=/usr/bin/docker
|
||||
# this will be empty during VHD build
|
||||
# but vhd build runs with `set -o nounset`
|
||||
# so needs a default value
|
||||
# prefer empty string to avoid potential "it works but did something weird" scenarios
|
||||
export GPU_DV="${GPU_DRIVER_VERSION:=}"
|
||||
export GPU_DEST=/usr/local/nvidia
|
||||
NVIDIA_DOCKER_VERSION=2.8.0-1
|
||||
DOCKER_VERSION=1.13.1-1
|
||||
NVIDIA_CONTAINER_RUNTIME_VERSION="3.6.0"
|
||||
export NVIDIA_DRIVER_IMAGE_SHA="sha-e8873b"
|
||||
export NVIDIA_DRIVER_IMAGE_TAG="${GPU_DV}-${NVIDIA_DRIVER_IMAGE_SHA}"
|
||||
export NVIDIA_DRIVER_IMAGE="mcr.microsoft.com/aks/aks-gpu"
|
||||
export CTR_GPU_INSTALL_CMD="ctr run --privileged --rm --net-host --with-ns pid:/proc/1/ns/pid --mount type=bind,src=/opt/gpu,dst=/mnt/gpu,options=rbind --mount type=bind,src=/opt/actions,dst=/mnt/actions,options=rbind"
|
||||
export DOCKER_GPU_INSTALL_CMD="docker run --privileged --net=host --pid=host -v /opt/gpu:/mnt/gpu -v /opt/actions:/mnt/actions --rm"
|
||||
APT_CACHE_DIR=/var/cache/apt/archives/
|
||||
PERMANENT_CACHE_DIR=/root/aptcache/
|
||||
EVENTS_LOGGING_DIR=/var/log/azure/Microsoft.Azure.Extensions.CustomScript/events/
|
||||
CURL_OUTPUT=/tmp/curl_verbose.out
|
||||
|
||||
retrycmd_if_failure() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3; shift && shift && shift
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout "${@}" && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
echo Executed \"$@\" $i times;
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
echo Executed \"$@\" $i times;
|
||||
}
|
||||
retrycmd_if_failure_no_stats() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3; shift && shift && shift
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout ${@} && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
retrycmd_get_tarball() {
|
||||
tar_retries=$1; wait_sleep=$2; tarball=$3; url=$4
|
||||
echo "${tar_retries} retries"
|
||||
for i in $(seq 1 $tar_retries); do
|
||||
tar -tzf $tarball && break || \
|
||||
if [ $i -eq $tar_retries ]; then
|
||||
return 1
|
||||
else
|
||||
timeout 60 curl -fsSLv $url -o $tarball > $CURL_OUTPUT 2>&1
|
||||
if [[ $? != 0 ]]; then
|
||||
cat $CURL_OUTPUT
|
||||
fi
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
retrycmd_get_tarball_from_registry_with_oras() {
|
||||
tar_retries=$1; wait_sleep=$2; tarball=$3; url=$4
|
||||
tar_folder=$(dirname "$tarball")
|
||||
echo "${tar_retries} retries"
|
||||
for i in $(seq 1 $tar_retries); do
|
||||
tar -tzf $tarball && break || \
|
||||
if [ $i -eq $tar_retries ]; then
|
||||
return 1
|
||||
else
|
||||
# TODO: support private acr via kubelet identity
|
||||
timeout 60 oras pull $url -o $tar_folder --registry-config ${ORAS_REGISTRY_CONFIG_FILE} > $ORAS_OUTPUT 2>&1
|
||||
if [[ $? != 0 ]]; then
|
||||
cat $ORAS_OUTPUT
|
||||
fi
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
retrycmd_get_binary_from_registry_with_oras() {
|
||||
binary_retries=$1; wait_sleep=$2; binary_path=$3; url=$4
|
||||
binary_folder=$(dirname "$binary_path")
|
||||
echo "${binary_retries} retries"
|
||||
|
||||
for i in $(seq 1 $binary_retries); do
|
||||
if [ -f "$binary_path" ]; then
|
||||
break
|
||||
else
|
||||
if [ $i -eq $binary_retries ]; then
|
||||
return 1
|
||||
else
|
||||
# TODO: support private acr via kubelet identity
|
||||
timeout 60 oras pull $url -o $binary_folder --registry-config ${ORAS_REGISTRY_CONFIG_FILE} > $ORAS_OUTPUT 2>&1
|
||||
if [[ $? != 0 ]]; then
|
||||
cat $ORAS_OUTPUT
|
||||
fi
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
retrycmd_curl_file() {
|
||||
curl_retries=$1; wait_sleep=$2; timeout=$3; filepath=$4; url=$5
|
||||
echo "${curl_retries} retries"
|
||||
for i in $(seq 1 $curl_retries); do
|
||||
[[ -f $filepath ]] && break
|
||||
if [ $i -eq $curl_retries ]; then
|
||||
return 1
|
||||
else
|
||||
timeout $timeout curl -fsSLv $url -o $filepath 2>&1 | tee $CURL_OUTPUT >/dev/null
|
||||
if [[ $? != 0 ]]; then
|
||||
cat $CURL_OUTPUT
|
||||
fi
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
wait_for_file() {
|
||||
retries=$1; wait_sleep=$2; filepath=$3
|
||||
paved=/opt/azure/cloud-init-files.paved
|
||||
grep -Fq "${filepath}" $paved && return 0
|
||||
for i in $(seq 1 $retries); do
|
||||
grep -Fq '#EOF' $filepath && break
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
sed -i "/#EOF/d" $filepath
|
||||
echo $filepath >> $paved
|
||||
}
|
||||
systemctl_restart() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3 svcname=$4
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout systemctl daemon-reload
|
||||
timeout $timeout systemctl restart $svcname && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
systemctl status $svcname --no-pager -l
|
||||
journalctl -u $svcname
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
systemctl_stop() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3 svcname=$4
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout systemctl daemon-reload
|
||||
timeout $timeout systemctl stop $svcname && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
systemctl_disable() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3 svcname=$4
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout systemctl daemon-reload
|
||||
timeout $timeout systemctl disable $svcname && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
sysctl_reload() {
|
||||
retries=$1; wait_sleep=$2; timeout=$3
|
||||
for i in $(seq 1 $retries); do
|
||||
timeout $timeout sysctl --system && break || \
|
||||
if [ $i -eq $retries ]; then
|
||||
return 1
|
||||
else
|
||||
sleep $wait_sleep
|
||||
fi
|
||||
done
|
||||
}
|
||||
version_gte() {
|
||||
test "$(printf '%s\n' "$@" | sort -rV | head -n 1)" == "$1"
|
||||
}
|
||||
|
||||
systemctlEnableAndStart() {
|
||||
systemctl_restart 100 5 30 $1
|
||||
RESTART_STATUS=$?
|
||||
systemctl status $1 --no-pager -l > /var/log/azure/$1-status.log
|
||||
if [ $RESTART_STATUS -ne 0 ]; then
|
||||
echo "$1 could not be started"
|
||||
return 1
|
||||
fi
|
||||
if ! retrycmd_if_failure 120 5 25 systemctl enable $1; then
|
||||
echo "$1 could not be enabled by systemctl"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
systemctlDisableAndStop() {
|
||||
if systemctl list-units --full --all | grep -q "$1.service"; then
|
||||
systemctl_stop 20 5 25 $1 || echo "$1 could not be stopped"
|
||||
systemctl_disable 20 5 25 $1 || echo "$1 could not be disabled"
|
||||
fi
|
||||
}
|
||||
|
||||
# return true if a >= b
|
||||
semverCompare() {
|
||||
VERSION_A=$(echo $1 | cut -d "+" -f 1)
|
||||
VERSION_B=$(echo $2 | cut -d "+" -f 1)
|
||||
[[ "${VERSION_A}" == "${VERSION_B}" ]] && return 0
|
||||
sorted=$(echo ${VERSION_A} ${VERSION_B} | tr ' ' '\n' | sort -V )
|
||||
highestVersion=$(IFS= echo "${sorted}" | cut -d$'\n' -f2)
|
||||
[[ "${VERSION_A}" == ${highestVersion} ]] && return 0
|
||||
return 1
|
||||
}
|
||||
downloadDebPkgToFile() {
|
||||
PKG_NAME=$1
|
||||
PKG_VERSION=$2
|
||||
PKG_DIRECTORY=$3
|
||||
mkdir -p $PKG_DIRECTORY
|
||||
# shellcheck disable=SC2164
|
||||
pushd ${PKG_DIRECTORY}
|
||||
retrycmd_if_failure 10 5 600 apt-get download ${PKG_NAME}=${PKG_VERSION}*
|
||||
# shellcheck disable=SC2164
|
||||
popd
|
||||
}
|
||||
apt_get_download() {
|
||||
retries=$1; wait_sleep=$2; shift && shift;
|
||||
local ret=0
|
||||
pushd $APT_CACHE_DIR || return 1
|
||||
for i in $(seq 1 $retries); do
|
||||
dpkg --configure -a --force-confdef
|
||||
wait_for_apt_locks
|
||||
apt-get -o Dpkg::Options::=--force-confold download -y "${@}" && break
|
||||
if [ $i -eq $retries ]; then ret=1; else sleep $wait_sleep; fi
|
||||
done
|
||||
popd || return 1
|
||||
return $ret
|
||||
}
|
||||
getCPUArch() {
|
||||
arch=$(uname -m)
|
||||
if [[ ${arch,,} == "aarch64" || ${arch,,} == "arm64" ]]; then
|
||||
echo "arm64"
|
||||
else
|
||||
echo "amd64"
|
||||
fi
|
||||
}
|
||||
isARM64() {
|
||||
if [[ $(getCPUArch) == "arm64" ]]; then
|
||||
echo 1
|
||||
else
|
||||
echo 0
|
||||
fi
|
||||
}
|
||||
|
||||
logs_to_events() {
|
||||
# local vars here allow for nested function tracking
|
||||
# installContainerRuntime for example
|
||||
local task=$1; shift
|
||||
local eventsFileName=$(date +%s%3N)
|
||||
|
||||
local startTime=$(date +"%F %T.%3N")
|
||||
${@}
|
||||
ret=$?
|
||||
local endTime=$(date +"%F %T.%3N")
|
||||
|
||||
# arg names are defined by GA and all these are required to be correctly read by GA
|
||||
# EventPid, EventTid are required to be int. No use case for them at this point.
|
||||
json_string=$( jq -n \
|
||||
--arg Timestamp "${startTime}" \
|
||||
--arg OperationId "${endTime}" \
|
||||
--arg Version "1.23" \
|
||||
--arg TaskName "${task}" \
|
||||
--arg EventLevel "Informational" \
|
||||
--arg Message "Completed: ${@}" \
|
||||
--arg EventPid "0" \
|
||||
--arg EventTid "0" \
|
||||
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
|
||||
)
|
||||
echo ${json_string} > ${EVENTS_LOGGING_DIR}${eventsFileName}.json
|
||||
|
||||
# this allows an error from the command at ${@} to be returned and correct code assigned in cse_main
|
||||
if [ "$ret" != "0" ]; then
|
||||
return $ret
|
||||
fi
|
||||
}
|
||||
|
||||
should_skip_nvidia_drivers() {
|
||||
set -x
|
||||
body=$(curl -fsSL -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2021-02-01")
|
||||
ret=$?
|
||||
if [ "$ret" != "0" ]; then
|
||||
return $ret
|
||||
fi
|
||||
should_skip=$(echo "$body" | jq -e '.compute.tagsList | map(select(.name | test("SkipGpuDriverInstall"; "i")))[0].value // "false" | test("true"; "i")')
|
||||
echo "$should_skip" # true or false
|
||||
}
|
||||
#HELPERSEOF
|
|
@ -1,531 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
CC_SERVICE_IN_TMP=/opt/azure/containers/cc-proxy.service.in
|
||||
CC_SOCKET_IN_TMP=/opt/azure/containers/cc-proxy.socket.in
|
||||
CNI_CONFIG_DIR="/etc/cni/net.d"
|
||||
CNI_BIN_DIR="/opt/cni/bin"
|
||||
CNI_DOWNLOADS_DIR="/opt/cni/downloads"
|
||||
CRICTL_DOWNLOAD_DIR="/opt/crictl/downloads"
|
||||
CRICTL_BIN_DIR="/usr/local/bin"
|
||||
CONTAINERD_DOWNLOADS_DIR="/opt/containerd/downloads"
|
||||
RUNC_DOWNLOADS_DIR="/opt/runc/downloads"
|
||||
K8S_DOWNLOADS_DIR="/opt/kubernetes/downloads"
|
||||
UBUNTU_RELEASE=$(lsb_release -r -s)
|
||||
OS=$(sort -r /etc/*-release | gawk 'match($0, /^(ID_LIKE=(coreos)|ID=(.*))$/, a) { print toupper(a[2] a[3]); exit }')
|
||||
TELEPORTD_PLUGIN_DOWNLOAD_DIR="/opt/teleportd/downloads"
|
||||
TELEPORTD_PLUGIN_BIN_DIR="/usr/local/bin"
|
||||
CONTAINERD_WASM_VERSIONS="v0.3.0 v0.5.1 v0.8.0"
|
||||
SPIN_KUBE_VERSIONS="v0.15.1"
|
||||
MANIFEST_FILEPATH="/opt/azure/manifest.json"
|
||||
MAN_DB_AUTO_UPDATE_FLAG_FILEPATH="/var/lib/man-db/auto-update"
|
||||
CURL_OUTPUT=/tmp/curl_verbose.out
|
||||
|
||||
removeManDbAutoUpdateFlagFile() {
|
||||
rm -f $MAN_DB_AUTO_UPDATE_FLAG_FILEPATH
|
||||
}
|
||||
|
||||
createManDbAutoUpdateFlagFile() {
|
||||
touch $MAN_DB_AUTO_UPDATE_FLAG_FILEPATH
|
||||
}
|
||||
|
||||
cleanupContainerdDlFiles() {
|
||||
rm -rf $CONTAINERD_DOWNLOADS_DIR
|
||||
}
|
||||
|
||||
installContainerRuntime() {
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
echo "in installContainerRuntime - KUBERNETES_VERSION = ${KUBERNETES_VERSION}"
|
||||
local containerd_version
|
||||
if [ -f "$MANIFEST_FILEPATH" ]; then
|
||||
containerd_version="$(jq -r .containerd.edge "$MANIFEST_FILEPATH")"
|
||||
if [ "${UBUNTU_RELEASE}" == "18.04" ]; then
|
||||
containerd_version="$(jq -r '.containerd.pinned."1804"' "$MANIFEST_FILEPATH")"
|
||||
fi
|
||||
else
|
||||
echo "WARNING: containerd version not found in manifest, defaulting to hardcoded."
|
||||
fi
|
||||
|
||||
containerd_patch_version="$(echo "$containerd_version" | cut -d- -f1)"
|
||||
containerd_revision="$(echo "$containerd_version" | cut -d- -f2)"
|
||||
if [ -z "$containerd_patch_version" ] || [ "$containerd_patch_version" == "null" ] || [ "$containerd_revision" == "null" ]; then
|
||||
echo "invalid container version: $containerd_version"
|
||||
exit $ERR_CONTAINERD_INSTALL_TIMEOUT
|
||||
fi
|
||||
|
||||
logs_to_events "AKS.CSE.installContainerRuntime.installStandaloneContainerd" "installStandaloneContainerd ${containerd_patch_version} ${containerd_revision}"
|
||||
echo "in installContainerRuntime - CONTAINERD_VERION = ${containerd_patch_version}"
|
||||
else
|
||||
installMoby
|
||||
fi
|
||||
}
|
||||
|
||||
installNetworkPlugin() {
|
||||
if [[ "${NETWORK_PLUGIN}" = "azure" ]]; then
|
||||
installAzureCNI
|
||||
fi
|
||||
installCNI #reference plugins. Mostly for kubenet but loop back used by contaierd until containerd 2
|
||||
rm -rf $CNI_DOWNLOADS_DIR &
|
||||
}
|
||||
|
||||
wasmFilesExist() {
|
||||
local containerd_wasm_filepath=${1}
|
||||
local shim_version=${2}
|
||||
local version_suffix=${3}
|
||||
local shims_to_download=("${@:4}") # Capture all arguments starting from the fourth indx
|
||||
|
||||
local binary_version="$(echo "${shim_version}" | tr . -)"
|
||||
for shim in "${shims_to_download[@]}"; do
|
||||
if [ ! -f "${containerd_wasm_filepath}/containerd-shim-${shim}-${binary_version}-${version_suffix}" ]; then
|
||||
return 1 # file is missing
|
||||
fi
|
||||
done
|
||||
echo "all wasm files exist for ${containerd_wasm_filepath}/containerd-shim-*-${binary_version}-${version_suffix}"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Install, download, update wasm must all be run from the same function call
|
||||
# in order to ensure WASMSHIMPIDS persists correctly since in bash a new
|
||||
# function call from install-dependnecies will create a new shell process.
|
||||
installContainerdWasmShims(){
|
||||
local download_location=${1}
|
||||
PACKAGE_DOWNLOAD_URL=${2}
|
||||
local package_versions=("${@:3}") # Capture all arguments starting from the third indx
|
||||
|
||||
for version in "${package_versions[@]}"; do
|
||||
local shims_to_download=("spin" "slight")
|
||||
if [[ "$version" == "0.8.0" ]]; then
|
||||
shims_to_download+=("wws")
|
||||
fi
|
||||
containerd_wasm_url=$(evalPackageDownloadURL ${PACKAGE_DOWNLOAD_URL})
|
||||
downloadContainerdWasmShims $download_location $containerd_wasm_url "v$version" "${shims_to_download[@]}" # adding v to version for simplicity
|
||||
done
|
||||
# wait for file downloads to complete before updating file permissions
|
||||
wait ${WASMSHIMPIDS[@]}
|
||||
for version in "${package_versions[@]}"; do
|
||||
local shims_to_download=("spin" "slight")
|
||||
if [[ "$version" == "0.8.0" ]]; then
|
||||
shims_to_download+=("wws")
|
||||
fi
|
||||
updateContainerdWasmShimsPermissions $download_location "v$version" "${shims_to_download[@]}"
|
||||
done
|
||||
}
|
||||
|
||||
downloadContainerdWasmShims() {
|
||||
local containerd_wasm_filepath=${1}
|
||||
local containerd_wasm_url=${2}
|
||||
local shim_version=${3}
|
||||
local shims_to_download=("${@:4}") # Capture all arguments starting from the fourth indx
|
||||
|
||||
local binary_version="$(echo "${shim_version}" | tr . -)" # replaces . with - == 1.2.3 -> 1-2-3
|
||||
|
||||
if wasmFilesExist "$containerd_wasm_filepath" "$shim_version" "-v1" "${shims_to_download[@]}"; then
|
||||
echo "containerd-wasm-shims already exists in $containerd_wasm_filepath, will not be downloading."
|
||||
return
|
||||
fi
|
||||
|
||||
# Oras download for WASM for Network Isolated Clusters
|
||||
BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER="${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER:=}"
|
||||
if [[ ! -z ${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER} ]]; then
|
||||
local registry_url="${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER}/oss/binaries/deislabs/containerd-wasm-shims:${shim_version}-linux-${CPU_ARCH}"
|
||||
local wasm_shims_tgz_tmp=$containerd_wasm_filepath/containerd-wasm-shims-linux-${CPU_ARCH}.tar.gz
|
||||
|
||||
retrycmd_get_tarball_from_registry_with_oras 120 5 "${wasm_shims_tgz_tmp}" ${registry_url} || exit $ERR_ORAS_PULL_CONTAINERD_WASM
|
||||
tar -zxf "$wasm_shims_tgz_tmp" -C $containerd_wasm_filepath
|
||||
mv "$containerd_wasm_filepath/containerd-shim-*-${shim_version}-v1" "$containerd_wasm_filepath/containerd-shim-*-${binary_version}-v1"
|
||||
rm -f "$wasm_shims_tgz_tmp"
|
||||
return
|
||||
fi
|
||||
|
||||
for shim in "${shims_to_download[@]}"; do
|
||||
retrycmd_if_failure 30 5 60 curl -fSLv -o "$containerd_wasm_filepath/containerd-shim-${shim}-${binary_version}-v1" "$containerd_wasm_url/containerd-shim-${shim}-v1" 2>&1 | tee $CURL_OUTPUT >/dev/null | grep -E "^(curl:.*)|([eE]rr.*)$" && (cat $CURL_OUTPUT && exit $ERR_KRUSTLET_DOWNLOAD_TIMEOUT) &
|
||||
WASMSHIMPIDS+=($!)
|
||||
done
|
||||
}
|
||||
|
||||
updateContainerdWasmShimsPermissions() {
|
||||
local containerd_wasm_filepath=${1}
|
||||
local shim_version=${2}
|
||||
local shims_to_download=("${@:3}") # Capture all arguments starting from the third indx
|
||||
|
||||
local binary_version="$(echo "${shim_version}" | tr . -)"
|
||||
|
||||
for shim in "${shims_to_download[@]}"; do
|
||||
chmod 755 "$containerd_wasm_filepath/containerd-shim-${shim}-${binary_version}-v1"
|
||||
done
|
||||
}
|
||||
|
||||
installSpinKube(){
|
||||
local download_location=${1}
|
||||
PACKAGE_DOWNLOAD_URL=${2}
|
||||
local package_versions=("${@:3}") # Capture all arguments starting from the third indx
|
||||
|
||||
for version in "${package_versions[@]}"; do
|
||||
containerd_spinkube_url=$(evalPackageDownloadURL ${PACKAGE_DOWNLOAD_URL})
|
||||
downloadSpinKube $download_location $containerd_spinkube_url "v$version" # adding v to version for simplicity
|
||||
done
|
||||
wait ${SPINKUBEPIDS[@]}
|
||||
for version in "${package_versions[@]}"; do
|
||||
chmod 755 "$download_location/containerd-shim-spin-v2"
|
||||
done
|
||||
}
|
||||
|
||||
downloadSpinKube(){
|
||||
local containerd_spinkube_filepath=${1}
|
||||
local containerd_spinkube_url=${2}
|
||||
local shim_version=${3}
|
||||
local shims_to_download=("${@:4}") # Capture all arguments starting from the fourth indx
|
||||
|
||||
if [ -f "$containerd_spinkube_filepath/containerd-shim-spin-v2" ]; then
|
||||
echo "containerd-shim-spin-v2 already exists in $containerd_spinkube_filepath, will not be downloading."
|
||||
return
|
||||
fi
|
||||
|
||||
BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER="${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER:=}"
|
||||
if [[ ! -z ${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER} ]]; then
|
||||
local registry_url="${BOOTSTRAP_PROFILE_CONTAINER_REGISTRY_SERVER}/oss/binaries/spinkube/containerd-shim-spin:${shim_version}-linux-${CPU_ARCH}"
|
||||
local wasm_shims_tgz_tmp="${containerd_spinkube_filepath}/containerd-shim-spin-v2"
|
||||
retrycmd_get_binary_from_registry_with_oras 120 5 "${wasm_shims_tgz_tmp}" "${registry_url}" || exit $ERR_ORAS_PULL_CONTAINERD_WASM
|
||||
rm -f "$wasm_shims_tgz_tmp"
|
||||
return
|
||||
fi
|
||||
|
||||
retrycmd_if_failure 30 5 60 curl -fSLv -o "$containerd_spinkube_filepath/containerd-shim-spin-v2" "$containerd_spinkube_url/containerd-shim-spin-v2" 2>&1 | tee $CURL_OUTPUT >/dev/null | grep -E "^(curl:.*)|([eE]rr.*)$" && (cat $CURL_OUTPUT && exit $ERR_KRUSTLET_DOWNLOAD_TIMEOUT) &
|
||||
SPINKUBEPIDS+=($!)
|
||||
}
|
||||
|
||||
downloadAzureCNI() {
|
||||
mkdir -p $CNI_DOWNLOADS_DIR
|
||||
CNI_TGZ_TMP=${VNET_CNI_PLUGINS_URL##*/} # Use bash builtin ## to remove all chars ("*") up to the final "/"
|
||||
retrycmd_get_tarball 120 5 "$CNI_DOWNLOADS_DIR/${CNI_TGZ_TMP}" ${VNET_CNI_PLUGINS_URL} || exit $ERR_CNI_DOWNLOAD_TIMEOUT
|
||||
}
|
||||
|
||||
downloadCrictl() {
|
||||
CRICTL_VERSION=$1
|
||||
CPU_ARCH=$(getCPUArch) #amd64 or arm64
|
||||
mkdir -p $CRICTL_DOWNLOAD_DIR
|
||||
CRICTL_DOWNLOAD_URL="https://acs-mirror.azureedge.net/cri-tools/v${CRICTL_VERSION}/binaries/crictl-v${CRICTL_VERSION}-linux-${CPU_ARCH}.tar.gz"
|
||||
CRICTL_TGZ_TEMP=${CRICTL_DOWNLOAD_URL##*/}
|
||||
retrycmd_curl_file 10 5 60 "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" ${CRICTL_DOWNLOAD_URL}
|
||||
}
|
||||
|
||||
installCrictl() {
|
||||
CPU_ARCH=$(getCPUArch) #amd64 or arm64
|
||||
currentVersion=$(crictl --version 2>/dev/null | sed 's/crictl version //g')
|
||||
if [[ "${currentVersion}" != "" ]]; then
|
||||
echo "version ${currentVersion} of crictl already installed. skipping installCrictl of target version ${KUBERNETES_VERSION%.*}.0"
|
||||
else
|
||||
# this is only called during cse. VHDs should have crictl binaries pre-cached so no need to download.
|
||||
# if the vhd does not have crictl pre-baked, return early
|
||||
CRICTL_TGZ_TEMP="crictl-v${CRICTL_VERSION}-linux-${CPU_ARCH}.tar.gz"
|
||||
if [[ ! -f "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" ]]; then
|
||||
rm -rf ${CRICTL_DOWNLOAD_DIR}
|
||||
echo "pre-cached crictl not found: skipping installCrictl"
|
||||
return 1
|
||||
fi
|
||||
echo "Unpacking crictl into ${CRICTL_BIN_DIR}"
|
||||
tar zxvf "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" -C ${CRICTL_BIN_DIR}
|
||||
chown root:root $CRICTL_BIN_DIR/crictl
|
||||
chmod 755 $CRICTL_BIN_DIR/crictl
|
||||
fi
|
||||
}
|
||||
|
||||
downloadTeleportdPlugin() {
|
||||
DOWNLOAD_URL=$1
|
||||
TELEPORTD_VERSION=$2
|
||||
if [[ $(isARM64) == 1 ]]; then
|
||||
# no arm64 teleport binaries according to owner
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ -z ${DOWNLOAD_URL} ]]; then
|
||||
echo "download url parameter for downloadTeleportdPlugin was not given"
|
||||
exit $ERR_TELEPORTD_DOWNLOAD_ERR
|
||||
fi
|
||||
if [[ -z ${TELEPORTD_VERSION} ]]; then
|
||||
echo "teleportd version not given"
|
||||
exit $ERR_TELEPORTD_DOWNLOAD_ERR
|
||||
fi
|
||||
mkdir -p $TELEPORTD_PLUGIN_DOWNLOAD_DIR
|
||||
retrycmd_curl_file 10 5 60 "${TELEPORTD_PLUGIN_DOWNLOAD_DIR}/teleportd-v${TELEPORTD_VERSION}" "${DOWNLOAD_URL}/v${TELEPORTD_VERSION}/teleportd" || exit ${ERR_TELEPORTD_DOWNLOAD_ERR}
|
||||
}
|
||||
|
||||
installTeleportdPlugin() {
|
||||
if [[ $(isARM64) == 1 ]]; then
|
||||
# no arm64 teleport binaries according to owner
|
||||
return
|
||||
fi
|
||||
|
||||
CURRENT_VERSION=$(teleportd --version 2>/dev/null | sed 's/teleportd version v//g')
|
||||
local TARGET_VERSION="0.8.0"
|
||||
if semverCompare ${CURRENT_VERSION:-"0.0.0"} ${TARGET_VERSION}; then
|
||||
echo "currently installed teleportd version ${CURRENT_VERSION} is greater than (or equal to) target base version ${TARGET_VERSION}. skipping installTeleportdPlugin."
|
||||
else
|
||||
downloadTeleportdPlugin ${TELEPORTD_PLUGIN_DOWNLOAD_URL} ${TARGET_VERSION}
|
||||
mv "${TELEPORTD_PLUGIN_DOWNLOAD_DIR}/teleportd-v${TELEPORTD_VERSION}" "${TELEPORTD_PLUGIN_BIN_DIR}/teleportd" || exit ${ERR_TELEPORTD_INSTALL_ERR}
|
||||
chmod 755 "${TELEPORTD_PLUGIN_BIN_DIR}/teleportd" || exit ${ERR_TELEPORTD_INSTALL_ERR}
|
||||
fi
|
||||
rm -rf ${TELEPORTD_PLUGIN_DOWNLOAD_DIR}
|
||||
}
|
||||
|
||||
setupCNIDirs() {
|
||||
mkdir -p $CNI_BIN_DIR
|
||||
chown -R root:root $CNI_BIN_DIR
|
||||
chmod -R 755 $CNI_BIN_DIR
|
||||
|
||||
mkdir -p $CNI_CONFIG_DIR
|
||||
chown -R root:root $CNI_CONFIG_DIR
|
||||
chmod 755 $CNI_CONFIG_DIR
|
||||
}
|
||||
|
||||
# Reference CNI plugins is used by kubenet and the loopback plugin used by containerd 1.0 (dependency gone in 2.0)
|
||||
# The version used to be deteremined by RP/toggle but are now just hadcoded in vhd as they rarely change and require a node image upgrade anyways
|
||||
# Latest VHD should have the untar, older should have the tgz. And who knows will have neither.
|
||||
installCNI() {
|
||||
#always just use what is listed in components.json so we don't have to sync.
|
||||
cniPackage=$(jq ".Packages" "$COMPONENTS_FILEPATH" | jq ".[] | select(.name == \"cni-plugins\")") || exit $ERR_CNI_VERSION_INVALID
|
||||
|
||||
#CNI doesn't really care about this but wanted to reuse updatePackageVersions which requires it.
|
||||
os=${UBUNTU_OS_NAME}
|
||||
if [[ -z "$UBUNTU_RELEASE" ]]; then
|
||||
os=${OS}
|
||||
os_version="current"
|
||||
fi
|
||||
os_version="${UBUNTU_RELEASE}"
|
||||
PACKAGE_VERSIONS=()
|
||||
updatePackageVersions "${cniPackage}" "${os}" "${os_version}"
|
||||
|
||||
#should change to ne
|
||||
if [[ ${#PACKAGE_VERSIONS[@]} -gt 1 ]]; then
|
||||
echo "WARNING: containerd package versions array has more than one element. Installing the last element in the array."
|
||||
exit $ERR_CONTAINERD_VERSION_INVALID
|
||||
fi
|
||||
packageVersion=${PACKAGE_VERSIONS[0]}
|
||||
|
||||
# Is there a ${arch} variable I can use instead of the iff
|
||||
if [[ $(isARM64) == 1 ]]; then
|
||||
CNI_DIR_TMP="cni-plugins-linux-arm64-v${packageVersion}"
|
||||
else
|
||||
CNI_DIR_TMP="cni-plugins-linux-amd64-v${packageVersion}"
|
||||
fi
|
||||
|
||||
if [[ -d "$CNI_DOWNLOADS_DIR/${CNI_DIR_TMP}" ]]; then
|
||||
#not clear to me when this would ever happen. assume its related to the line above Latest VHD should have the untar, older should have the tgz.
|
||||
mv ${CNI_DOWNLOADS_DIR}/${CNI_DIR_TMP}/* $CNI_BIN_DIR
|
||||
else
|
||||
echo "CNI tarball should already be unzipped by components.json"
|
||||
exit $ERR_CNI_VERSION_INVALID
|
||||
fi
|
||||
|
||||
chown -R root:root $CNI_BIN_DIR
|
||||
}
|
||||
|
||||
installAzureCNI() {
|
||||
CNI_TGZ_TMP=${VNET_CNI_PLUGINS_URL##*/} # Use bash builtin ## to remove all chars ("*") up to the final "/"
|
||||
CNI_DIR_TMP=${CNI_TGZ_TMP%.tgz} # Use bash builtin % to remove the .tgz to look for a folder rather than tgz
|
||||
|
||||
# We want to use the untar azurecni reference first. And if that doesn't exist on the vhd does the tgz?
|
||||
# And if tgz is already on the vhd then just untar into CNI_BIN_DIR
|
||||
# Latest VHD should have the untar, older should have the tgz. And who knows will have neither.
|
||||
if [[ -d "$CNI_DOWNLOADS_DIR/${CNI_DIR_TMP}" ]]; then
|
||||
mv ${CNI_DOWNLOADS_DIR}/${CNI_DIR_TMP}/* $CNI_BIN_DIR
|
||||
else
|
||||
if [[ ! -f "$CNI_DOWNLOADS_DIR/${CNI_TGZ_TMP}" ]]; then
|
||||
logs_to_events "AKS.CSE.installAzureCNI.downloadAzureCNI" downloadAzureCNI
|
||||
fi
|
||||
|
||||
tar -xzf "$CNI_DOWNLOADS_DIR/${CNI_TGZ_TMP}" -C $CNI_BIN_DIR
|
||||
fi
|
||||
|
||||
chown -R root:root $CNI_BIN_DIR
|
||||
}
|
||||
|
||||
extractKubeBinaries() {
|
||||
K8S_VERSION=$1
|
||||
KUBE_BINARY_URL=$2
|
||||
|
||||
mkdir -p ${K8S_DOWNLOADS_DIR}
|
||||
K8S_TGZ_TMP=${KUBE_BINARY_URL##*/}
|
||||
retrycmd_get_tarball 120 5 "$K8S_DOWNLOADS_DIR/${K8S_TGZ_TMP}" ${KUBE_BINARY_URL} || exit $ERR_K8S_DOWNLOAD_TIMEOUT
|
||||
tar --transform="s|.*|&-${K8S_VERSION}|" --show-transformed-names -xzvf "$K8S_DOWNLOADS_DIR/${K8S_TGZ_TMP}" \
|
||||
--strip-components=3 -C /usr/local/bin kubernetes/node/bin/kubelet kubernetes/node/bin/kubectl
|
||||
rm -f "$K8S_DOWNLOADS_DIR/${K8S_TGZ_TMP}"
|
||||
}
|
||||
|
||||
installKubeletKubectlAndKubeProxy() {
|
||||
|
||||
CUSTOM_KUBE_BINARY_DOWNLOAD_URL="${CUSTOM_KUBE_BINARY_URL:=}"
|
||||
if [[ ! -z ${CUSTOM_KUBE_BINARY_DOWNLOAD_URL} ]]; then
|
||||
# remove the kubelet binaries to make sure the only binary left is from the CUSTOM_KUBE_BINARY_DOWNLOAD_URL
|
||||
rm -rf /usr/local/bin/kubelet-* /usr/local/bin/kubectl-*
|
||||
|
||||
# NOTE(mainred): we expect kubelet binary to be under `kubernetes/node/bin`. This suits the current setting of
|
||||
# kube binaries used by AKS and Kubernetes upstream.
|
||||
# TODO(mainred): let's see if necessary to auto-detect the path of kubelet
|
||||
logs_to_events "AKS.CSE.installKubeletKubectlAndKubeProxy.extractKubeBinaries" extractKubeBinaries ${KUBERNETES_VERSION} ${CUSTOM_KUBE_BINARY_DOWNLOAD_URL}
|
||||
|
||||
else
|
||||
if [[ ! -f "/usr/local/bin/kubectl-${KUBERNETES_VERSION}" ]]; then
|
||||
#TODO: remove the condition check on KUBE_BINARY_URL once RP change is released
|
||||
if (($(echo ${KUBERNETES_VERSION} | cut -d"." -f2) >= 17)) && [ -n "${KUBE_BINARY_URL}" ]; then
|
||||
logs_to_events "AKS.CSE.installKubeletKubectlAndKubeProxy.extractKubeBinaries" extractKubeBinaries ${KUBERNETES_VERSION} ${KUBE_BINARY_URL}
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
mv "/usr/local/bin/kubelet-${KUBERNETES_VERSION}" "/usr/local/bin/kubelet"
|
||||
mv "/usr/local/bin/kubectl-${KUBERNETES_VERSION}" "/usr/local/bin/kubectl"
|
||||
|
||||
chmod a+x /usr/local/bin/kubelet /usr/local/bin/kubectl
|
||||
rm -rf /usr/local/bin/kubelet-* /usr/local/bin/kubectl-* /home/hyperkube-downloads &
|
||||
}
|
||||
|
||||
pullContainerImage() {
|
||||
CLI_TOOL=$1
|
||||
CONTAINER_IMAGE_URL=$2
|
||||
echo "pulling the image ${CONTAINER_IMAGE_URL} using ${CLI_TOOL}"
|
||||
if [[ ${CLI_TOOL} == "ctr" ]]; then
|
||||
logs_to_events "AKS.CSE.imagepullctr.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 ctr --namespace k8s.io image pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via ctr" && exit $ERR_CONTAINERD_CTR_IMG_PULL_TIMEOUT)
|
||||
elif [[ ${CLI_TOOL} == "crictl" ]]; then
|
||||
logs_to_events "AKS.CSE.imagepullcrictl.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 crictl pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via crictl" && exit $ERR_CONTAINERD_CRICTL_IMG_PULL_TIMEOUT)
|
||||
else
|
||||
logs_to_events "AKS.CSE.imagepull.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 docker pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via docker" && exit $ERR_DOCKER_IMG_PULL_TIMEOUT)
|
||||
fi
|
||||
}
|
||||
|
||||
retagContainerImage() {
|
||||
CLI_TOOL=$1
|
||||
CONTAINER_IMAGE_URL=$2
|
||||
RETAG_IMAGE_URL=$3
|
||||
echo "retaging from ${CONTAINER_IMAGE_URL} to ${RETAG_IMAGE_URL} using ${CLI_TOOL}"
|
||||
if [[ ${CLI_TOOL} == "ctr" ]]; then
|
||||
ctr --namespace k8s.io image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
|
||||
elif [[ ${CLI_TOOL} == "crictl" ]]; then
|
||||
crictl image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
|
||||
else
|
||||
docker image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
|
||||
fi
|
||||
}
|
||||
|
||||
retagMCRImagesForChina() {
|
||||
# retag all the mcr for mooncake
|
||||
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
|
||||
# shellcheck disable=SC2016
|
||||
allMCRImages=($(ctr --namespace k8s.io images list | grep '^mcr.microsoft.com/' | awk '{print $1}'))
|
||||
else
|
||||
# shellcheck disable=SC2016
|
||||
allMCRImages=($(docker images | grep '^mcr.microsoft.com/' | awk '{str = sprintf("%s:%s", $1, $2)} {print str}'))
|
||||
fi
|
||||
if [[ "${allMCRImages}" == "" ]]; then
|
||||
echo "failed to find mcr images for retag"
|
||||
return
|
||||
fi
|
||||
for mcrImage in ${allMCRImages[@]+"${allMCRImages[@]}"}; do
|
||||
# in mooncake, the mcr endpoint is: mcr.azk8s.cn
|
||||
# shellcheck disable=SC2001
|
||||
retagMCRImage=$(echo ${mcrImage} | sed -e 's/^mcr.microsoft.com/mcr.azk8s.cn/g')
|
||||
# can't use CLI_TOOL because crictl doesn't support retagging.
|
||||
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
|
||||
retagContainerImage "ctr" ${mcrImage} ${retagMCRImage}
|
||||
else
|
||||
retagContainerImage "docker" ${mcrImage} ${retagMCRImage}
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
removeContainerImage() {
|
||||
CLI_TOOL=$1
|
||||
CONTAINER_IMAGE_URL=$2
|
||||
if [[ "${CLI_TOOL}" == "docker" ]]; then
|
||||
docker image rm $CONTAINER_IMAGE_URL
|
||||
else
|
||||
# crictl should always be present
|
||||
crictl rmi $CONTAINER_IMAGE_URL
|
||||
fi
|
||||
}
|
||||
|
||||
cleanUpImages() {
|
||||
local targetImage=$1
|
||||
export targetImage
|
||||
function cleanupImagesRun() {
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
if [[ "${CLI_TOOL}" == "crictl" ]]; then
|
||||
images_to_delete=$(crictl images | awk '{print $1":"$2}' | grep -vE "${KUBERNETES_VERSION}$|${KUBERNETES_VERSION}.[0-9]+$|${KUBERNETES_VERSION}-|${KUBERNETES_VERSION}_" | grep ${targetImage} | tr ' ' '\n')
|
||||
else
|
||||
images_to_delete=$(ctr --namespace k8s.io images list | awk '{print $1}' | grep -vE "${KUBERNETES_VERSION}$|${KUBERNETES_VERSION}.[0-9]+$|${KUBERNETES_VERSION}-|${KUBERNETES_VERSION}_" | grep ${targetImage} | tr ' ' '\n')
|
||||
fi
|
||||
else
|
||||
images_to_delete=$(docker images --format '{{OpenBraces}}.Repository{{CloseBraces}}:{{OpenBraces}}.Tag{{CloseBraces}}' | grep -vE "${KUBERNETES_VERSION}$|${KUBERNETES_VERSION}.[0-9]+$|${KUBERNETES_VERSION}-|${KUBERNETES_VERSION}_" | grep ${targetImage} | tr ' ' '\n')
|
||||
fi
|
||||
local exit_code=$?
|
||||
if [[ $exit_code != 0 ]]; then
|
||||
exit $exit_code
|
||||
elif [[ "${images_to_delete}" != "" ]]; then
|
||||
echo "${images_to_delete}" | while read image; do
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
removeContainerImage ${CLI_TOOL} ${image}
|
||||
else
|
||||
removeContainerImage "docker" ${image}
|
||||
fi
|
||||
done
|
||||
fi
|
||||
}
|
||||
export -f cleanupImagesRun
|
||||
retrycmd_if_failure 10 5 120 bash -c cleanupImagesRun
|
||||
}
|
||||
|
||||
cleanUpKubeProxyImages() {
|
||||
echo $(date),$(hostname), startCleanUpKubeProxyImages
|
||||
cleanUpImages "kube-proxy"
|
||||
echo $(date),$(hostname), endCleanUpKubeProxyImages
|
||||
}
|
||||
|
||||
cleanupRetaggedImages() {
|
||||
if [[ "${TARGET_CLOUD}" != "AzureChinaCloud" ]]; then
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
if [[ "${CLI_TOOL}" == "crictl" ]]; then
|
||||
images_to_delete=$(crictl images | awk '{print $1":"$2}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
|
||||
else
|
||||
images_to_delete=$(ctr --namespace k8s.io images list | awk '{print $1}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
|
||||
fi
|
||||
else
|
||||
images_to_delete=$(docker images --format '{{OpenBraces}}.Repository{{CloseBraces}}:{{OpenBraces}}.Tag{{CloseBraces}}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
|
||||
fi
|
||||
if [[ "${images_to_delete}" != "" ]]; then
|
||||
echo "${images_to_delete}" | while read image; do
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
# always use ctr, even if crictl is installed.
|
||||
# crictl will remove *ALL* references to a given imageID (SHA), which removes too much.
|
||||
removeContainerImage "ctr" ${image}
|
||||
else
|
||||
removeContainerImage "docker" ${image}
|
||||
fi
|
||||
done
|
||||
fi
|
||||
else
|
||||
echo "skipping container cleanup for AzureChinaCloud"
|
||||
fi
|
||||
}
|
||||
|
||||
cleanUpContainerImages() {
|
||||
export KUBERNETES_VERSION
|
||||
export CLI_TOOL
|
||||
export -f retrycmd_if_failure
|
||||
export -f removeContainerImage
|
||||
export -f cleanUpImages
|
||||
export -f cleanUpKubeProxyImages
|
||||
bash -c cleanUpKubeProxyImages &
|
||||
}
|
||||
|
||||
cleanUpContainerd() {
|
||||
rm -Rf $CONTAINERD_DOWNLOADS_DIR
|
||||
}
|
||||
|
||||
overrideNetworkConfig() {
|
||||
CONFIG_FILEPATH="/etc/cloud/cloud.cfg.d/80_azure_net_config.cfg"
|
||||
touch ${CONFIG_FILEPATH}
|
||||
cat <<EOF >>${CONFIG_FILEPATH}
|
||||
datasource:
|
||||
Azure:
|
||||
apply_network_config: false
|
||||
EOF
|
||||
}
|
||||
#EOF
|
|
@ -1,424 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Timeout waiting for a file
|
||||
ERR_FILE_WATCH_TIMEOUT=6
|
||||
set -x
|
||||
if [ -f /opt/azure/containers/provision.complete ]; then
|
||||
echo "Already ran to success exiting..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
aptmarkWALinuxAgent hold &
|
||||
|
||||
# Setup logs for upload to host
|
||||
LOG_DIR=/var/log/azure/aks
|
||||
mkdir -p ${LOG_DIR}
|
||||
ln -s /var/log/azure/cluster-provision.log \
|
||||
/var/log/azure/cluster-provision-cse-output.log \
|
||||
/opt/azure/*.json \
|
||||
/opt/azure/cloud-init-files.paved \
|
||||
/opt/azure/vhd-install.complete \
|
||||
${LOG_DIR}/
|
||||
|
||||
# Redact the necessary secrets from cloud-config.txt so we don't expose any sensitive information
|
||||
# when cloud-config.txt gets included within log bundles
|
||||
python3 /opt/azure/containers/provision_redact_cloud_config.py \
|
||||
--cloud-config-path /var/lib/cloud/instance/cloud-config.txt \
|
||||
--output-path ${LOG_DIR}/cloud-config.txt
|
||||
|
||||
UBUNTU_RELEASE=$(lsb_release -r -s)
|
||||
if [[ ${UBUNTU_RELEASE} == "16.04" ]]; then
|
||||
sudo apt-get -y autoremove chrony
|
||||
echo $?
|
||||
sudo systemctl restart systemd-timesyncd
|
||||
fi
|
||||
|
||||
echo $(date),$(hostname), startcustomscript>>/opt/m
|
||||
|
||||
for i in $(seq 1 3600); do
|
||||
if [ -s "${CSE_HELPERS_FILEPATH}" ]; then
|
||||
grep -Fq '#HELPERSEOF' "${CSE_HELPERS_FILEPATH}" && break
|
||||
fi
|
||||
if [ $i -eq 3600 ]; then
|
||||
exit $ERR_FILE_WATCH_TIMEOUT
|
||||
else
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
sed -i "/#HELPERSEOF/d" "${CSE_HELPERS_FILEPATH}"
|
||||
source "${CSE_HELPERS_FILEPATH}"
|
||||
|
||||
source "${CSE_DISTRO_HELPERS_FILEPATH}"
|
||||
source "${CSE_INSTALL_FILEPATH}"
|
||||
source "${CSE_DISTRO_INSTALL_FILEPATH}"
|
||||
source "${CSE_CONFIG_FILEPATH}"
|
||||
|
||||
if [[ "${DISABLE_SSH}" == "true" ]]; then
|
||||
disableSSH || exit $ERR_DISABLE_SSH
|
||||
fi
|
||||
|
||||
# This involes using proxy, log the config before fetching packages
|
||||
echo "private egress proxy address is '${PRIVATE_EGRESS_PROXY_ADDRESS}'"
|
||||
# TODO update to use proxy
|
||||
|
||||
if [[ "${SHOULD_CONFIGURE_HTTP_PROXY}" == "true" ]]; then
|
||||
if [[ "${SHOULD_CONFIGURE_HTTP_PROXY_CA}" == "true" ]]; then
|
||||
configureHTTPProxyCA || exit $ERR_UPDATE_CA_CERTS
|
||||
fi
|
||||
configureEtcEnvironment
|
||||
fi
|
||||
|
||||
|
||||
if [[ "${SHOULD_CONFIGURE_CUSTOM_CA_TRUST}" == "true" ]]; then
|
||||
configureCustomCaCertificate || exit $ERR_UPDATE_CA_CERTS
|
||||
fi
|
||||
|
||||
if [[ -n "${OUTBOUND_COMMAND}" ]]; then
|
||||
if [[ -n "${PROXY_VARS}" ]]; then
|
||||
eval $PROXY_VARS
|
||||
fi
|
||||
retrycmd_if_failure 50 1 5 $OUTBOUND_COMMAND >> /var/log/azure/cluster-provision-cse-output.log 2>&1 || exit $ERR_OUTBOUND_CONN_FAIL;
|
||||
fi
|
||||
|
||||
# Bring in OS-related vars
|
||||
source /etc/os-release
|
||||
|
||||
# Mandb is not currently available on MarinerV1
|
||||
if [[ ${ID} != "mariner" ]] && [[ ${ID} != "azurelinux" ]]; then
|
||||
echo "Removing man-db auto-update flag file..."
|
||||
logs_to_events "AKS.CSE.removeManDbAutoUpdateFlagFile" removeManDbAutoUpdateFlagFile
|
||||
fi
|
||||
|
||||
export -f should_skip_nvidia_drivers
|
||||
skip_nvidia_driver_install=$(retrycmd_if_failure_no_stats 10 1 10 bash -cx should_skip_nvidia_drivers)
|
||||
ret=$?
|
||||
if [[ "$ret" != "0" ]]; then
|
||||
echo "Failed to determine if nvidia driver install should be skipped"
|
||||
exit $ERR_NVIDIA_DRIVER_INSTALL
|
||||
fi
|
||||
|
||||
if [[ "${GPU_NODE}" != "true" ]] || [[ "${skip_nvidia_driver_install}" == "true" ]]; then
|
||||
logs_to_events "AKS.CSE.cleanUpGPUDrivers" cleanUpGPUDrivers
|
||||
fi
|
||||
|
||||
logs_to_events "AKS.CSE.disableSystemdResolved" disableSystemdResolved
|
||||
|
||||
logs_to_events "AKS.CSE.configureAdminUser" configureAdminUser
|
||||
|
||||
VHD_LOGS_FILEPATH=/opt/azure/vhd-install.complete
|
||||
if [ -f $VHD_LOGS_FILEPATH ]; then
|
||||
echo "detected golden image pre-install"
|
||||
logs_to_events "AKS.CSE.cleanUpContainerImages" cleanUpContainerImages
|
||||
FULL_INSTALL_REQUIRED=false
|
||||
else
|
||||
if [[ "${IS_VHD}" = true ]]; then
|
||||
echo "Using VHD distro but file $VHD_LOGS_FILEPATH not found"
|
||||
exit $ERR_VHD_FILE_NOT_FOUND
|
||||
fi
|
||||
FULL_INSTALL_REQUIRED=true
|
||||
fi
|
||||
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]] && [ "$FULL_INSTALL_REQUIRED" = "true" ]; then
|
||||
logs_to_events "AKS.CSE.installDeps" installDeps
|
||||
else
|
||||
echo "Golden image; skipping dependencies installation"
|
||||
fi
|
||||
|
||||
logs_to_events "AKS.CSE.installContainerRuntime" installContainerRuntime
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ] && [ "${TELEPORT_ENABLED}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.installTeleportdPlugin" installTeleportdPlugin
|
||||
fi
|
||||
|
||||
setupCNIDirs
|
||||
|
||||
logs_to_events "AKS.CSE.installNetworkPlugin" installNetworkPlugin
|
||||
|
||||
if [ "${IS_KRUSTLET}" == "true" ]; then
|
||||
local versionsWasm=$(jq -r '.Packages[] | select(.name == "containerd-wasm-shims") | .downloadURIs.default.current.versionsV2[].latestVersion' "$COMPONENTS_FILEPATH")
|
||||
local downloadLocationWasm=$(jq -r '.Packages[] | select(.name == "containerd-wasm-shims") | .downloadLocation' "$COMPONENTS_FILEPATH")
|
||||
local downloadURLWasm=$(jq -r '.Packages[] | select(.name == "containerd-wasm-shims") | .downloadURIs.default.current.downloadURL' "$COMPONENTS_FILEPATH")
|
||||
logs_to_events "AKS.CSE.installContainerdWasmShims" installContainerdWasmShims "$downloadLocationWasm" "$downloadURLWasm" "$versionsWasm"
|
||||
|
||||
local versionsSpinKube=$(jq -r '.Packages[] | select(.name == spinkube") | .downloadURIs.default.current.versionsV2[].latestVersion' "$COMPONENTS_FILEPATH")
|
||||
local downloadLocationSpinKube=$(jq -r '.Packages[] | select(.name == "spinkube) | .downloadLocation' "$COMPONENTS_FILEPATH")
|
||||
local downloadURLSpinKube=$(jq -r '.Packages[] | select(.name == "spinkube") | .downloadURIs.default.current.downloadURL' "$COMPONENTS_FILEPATH")
|
||||
logs_to_events "AKS.CSE.installSpinKube" installSpinKube "$downloadURSpinKube" "$downloadLocationSpinKube" "$versionsSpinKube"
|
||||
fi
|
||||
|
||||
# By default, never reboot new nodes.
|
||||
REBOOTREQUIRED=false
|
||||
|
||||
echo $(date),$(hostname), "Start configuring GPU drivers"
|
||||
if [[ "${GPU_NODE}" = true ]] && [[ "${skip_nvidia_driver_install}" != "true" ]]; then
|
||||
logs_to_events "AKS.CSE.ensureGPUDrivers" ensureGPUDrivers
|
||||
if [[ "${ENABLE_GPU_DEVICE_PLUGIN_IF_NEEDED}" = true ]]; then
|
||||
if [[ "${MIG_NODE}" == "true" ]] && [[ -f "/etc/systemd/system/nvidia-device-plugin.service" ]]; then
|
||||
mkdir -p "/etc/systemd/system/nvidia-device-plugin.service.d"
|
||||
tee "/etc/systemd/system/nvidia-device-plugin.service.d/10-mig_strategy.conf" > /dev/null <<'EOF'
|
||||
[Service]
|
||||
Environment="MIG_STRATEGY=--mig-strategy single"
|
||||
ExecStart=
|
||||
ExecStart=/usr/local/nvidia/bin/nvidia-device-plugin $MIG_STRATEGY
|
||||
EOF
|
||||
fi
|
||||
logs_to_events "AKS.CSE.start.nvidia-device-plugin" "systemctlEnableAndStart nvidia-device-plugin" || exit $ERR_GPU_DEVICE_PLUGIN_START_FAIL
|
||||
else
|
||||
logs_to_events "AKS.CSE.stop.nvidia-device-plugin" "systemctlDisableAndStop nvidia-device-plugin"
|
||||
fi
|
||||
|
||||
if [[ "${GPU_NEEDS_FABRIC_MANAGER}" == "true" ]]; then
|
||||
# fabric manager trains nvlink connections between multi instance gpus.
|
||||
# it appears this is only necessary for systems with *multiple cards*.
|
||||
# i.e., an A100 can be partitioned a maximum of 7 ways.
|
||||
# An NC24ads_A100_v4 has one A100.
|
||||
# An ND96asr_v4 has eight A100, for a maximum of 56 partitions.
|
||||
# ND96 seems to require fabric manager *even when not using mig partitions*
|
||||
# while it fails to install on NC24.
|
||||
if isMarinerOrAzureLinux "$OS"; then
|
||||
logs_to_events "AKS.CSE.installNvidiaFabricManager" installNvidiaFabricManager
|
||||
fi
|
||||
logs_to_events "AKS.CSE.nvidia-fabricmanager" "systemctlEnableAndStart nvidia-fabricmanager" || exit $ERR_GPU_DRIVERS_START_FAIL
|
||||
fi
|
||||
|
||||
# This will only be true for multi-instance capable VM sizes
|
||||
# for which the user has specified a partitioning profile.
|
||||
# it is valid to use mig-capable gpus without a partitioning profile.
|
||||
if [[ "${MIG_NODE}" == "true" ]]; then
|
||||
# A100 GPU has a bit in the physical card (infoROM) to enable mig mode.
|
||||
# Changing this bit in either direction requires a VM reboot on Azure (hypervisor/plaform stuff).
|
||||
# Commands such as `nvidia-smi --gpu-reset` may succeed,
|
||||
# while commands such as `nvidia-smi -q` will show mismatched current/pending mig mode.
|
||||
# this will not be required per nvidia for next gen H100.
|
||||
REBOOTREQUIRED=true
|
||||
|
||||
# this service applies the partitioning scheme with nvidia-smi.
|
||||
# we should consider moving to mig-parted which is simpler/newer.
|
||||
# we couldn't because of old drivers but that has long been fixed.
|
||||
logs_to_events "AKS.CSE.ensureMigPartition" ensureMigPartition
|
||||
fi
|
||||
fi
|
||||
|
||||
echo $(date),$(hostname), "End configuring GPU drivers"
|
||||
|
||||
logs_to_events "AKS.CSE.installKubeletKubectlAndKubeProxy" installKubeletKubectlAndKubeProxy
|
||||
|
||||
createKubeManifestDir
|
||||
|
||||
if [ "${HAS_CUSTOM_SEARCH_DOMAIN}" == "true" ]; then
|
||||
"${CUSTOM_SEARCH_DOMAIN_FILEPATH}" > /opt/azure/containers/setup-custom-search-domain.log 2>&1 || exit $ERR_CUSTOM_SEARCH_DOMAINS_FAIL
|
||||
fi
|
||||
|
||||
|
||||
# for drop ins, so they don't all have to check/create the dir
|
||||
mkdir -p "/etc/systemd/system/kubelet.service.d"
|
||||
|
||||
logs_to_events "AKS.CSE.configureK8s" configureK8s
|
||||
|
||||
logs_to_events "AKS.CSE.configureCNI" configureCNI
|
||||
|
||||
# configure and enable dhcpv6 for dual stack feature
|
||||
if [ "${IPV6_DUAL_STACK_ENABLED}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.ensureDHCPv6" ensureDHCPv6
|
||||
fi
|
||||
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
# containerd should not be configured until cni has been configured first
|
||||
logs_to_events "AKS.CSE.ensureContainerd" ensureContainerd
|
||||
else
|
||||
logs_to_events "AKS.CSE.ensureDocker" ensureDocker
|
||||
fi
|
||||
|
||||
if [[ "${MESSAGE_OF_THE_DAY}" != "" ]]; then
|
||||
echo "${MESSAGE_OF_THE_DAY}" | base64 -d > /etc/motd
|
||||
fi
|
||||
|
||||
# must run before kubelet starts to avoid race in container status using wrong image
|
||||
# https://github.com/kubernetes/kubernetes/issues/51017
|
||||
# can remove when fixed
|
||||
if [[ "${TARGET_CLOUD}" == "AzureChinaCloud" ]]; then
|
||||
retagMCRImagesForChina
|
||||
fi
|
||||
|
||||
if [[ "${ENABLE_HOSTS_CONFIG_AGENT}" == "true" ]]; then
|
||||
logs_to_events "AKS.CSE.configPrivateClusterHosts" configPrivateClusterHosts
|
||||
fi
|
||||
|
||||
if [ "${SHOULD_CONFIG_TRANSPARENT_HUGE_PAGE}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.configureTransparentHugePage" configureTransparentHugePage
|
||||
fi
|
||||
|
||||
if [ "${SHOULD_CONFIG_SWAP_FILE}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.configureSwapFile" configureSwapFile
|
||||
fi
|
||||
|
||||
if [ "${NEEDS_CGROUPV2}" == "true" ]; then
|
||||
tee "/etc/systemd/system/kubelet.service.d/10-cgroupv2.conf" > /dev/null <<EOF
|
||||
[Service]
|
||||
Environment="KUBELET_CGROUP_FLAGS=--cgroup-driver=systemd"
|
||||
EOF
|
||||
fi
|
||||
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
|
||||
# gross, but the backticks make it very hard to do in Go
|
||||
# TODO: move entirely into vhd.
|
||||
# alternatively, can we verify this is safe with docker?
|
||||
# or just do it even if not because docker is out of support?
|
||||
mkdir -p /etc/containerd
|
||||
echo "${KUBENET_TEMPLATE}" | base64 -d > /etc/containerd/kubenet_template.conf
|
||||
|
||||
# In k8s 1.27, the flag --container-runtime was removed.
|
||||
# We now have 2 drop-in's, one with the still valid flags that will be applied to all k8s versions,
|
||||
# the flags are --runtime-request-timeout, --container-runtime-endpoint, --runtime-cgroups
|
||||
# For k8s >= 1.27, the flag --container-runtime will not be passed.
|
||||
tee "/etc/systemd/system/kubelet.service.d/10-containerd-base-flag.conf" > /dev/null <<'EOF'
|
||||
[Service]
|
||||
Environment="KUBELET_CONTAINERD_FLAGS=--runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock --runtime-cgroups=/system.slice/containerd.service"
|
||||
EOF
|
||||
|
||||
# if k8s version < 1.27.0, add the drop in for --container-runtime flag
|
||||
if ! semverCompare ${KUBERNETES_VERSION:-"0.0.0"} "1.27.0"; then
|
||||
tee "/etc/systemd/system/kubelet.service.d/10-container-runtime-flag.conf" > /dev/null <<'EOF'
|
||||
[Service]
|
||||
Environment="KUBELET_CONTAINER_RUNTIME_FLAG=--container-runtime=remote"
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${HAS_KUBELET_DISK_TYPE}" == "true" ]; then
|
||||
tee "/etc/systemd/system/kubelet.service.d/10-bindmount.conf" > /dev/null <<EOF
|
||||
[Unit]
|
||||
Requires=bind-mount.service
|
||||
After=bind-mount.service
|
||||
EOF
|
||||
fi
|
||||
|
||||
logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl
|
||||
|
||||
if [ "${NEEDS_CONTAINERD}" == "true" ] && [ "${SHOULD_CONFIG_CONTAINERD_ULIMITS}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.setContainerdUlimits" configureContainerdUlimits
|
||||
fi
|
||||
|
||||
logs_to_events "AKS.CSE.ensureKubelet" ensureKubelet
|
||||
if [ "${ENSURE_NO_DUPE_PROMISCUOUS_BRIDGE}" == "true" ]; then
|
||||
logs_to_events "AKS.CSE.ensureNoDupOnPromiscuBridge" ensureNoDupOnPromiscuBridge
|
||||
fi
|
||||
|
||||
if $FULL_INSTALL_REQUIRED; then
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]]; then
|
||||
# mitigation for bug https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1676635
|
||||
echo 2dd1ce17-079e-403c-b352-a1921ee207ee > /sys/bus/vmbus/drivers/hv_util/unbind
|
||||
sed -i "13i\echo 2dd1ce17-079e-403c-b352-a1921ee207ee > /sys/bus/vmbus/drivers/hv_util/unbind\n" /etc/rc.local
|
||||
fi
|
||||
fi
|
||||
|
||||
VALIDATION_ERR=0
|
||||
|
||||
# TODO(djsly): Look at leveraging the `aks-check-network.sh` script for this validation instead of duplicating the logic here
|
||||
|
||||
# Edge case scenarios:
|
||||
# high retry times to wait for new API server DNS record to replicate (e.g. stop and start cluster)
|
||||
# high timeout to address high latency for private dns server to forward request to Azure DNS
|
||||
# dns check will be done only if we use FQDN for API_SERVER_NAME
|
||||
API_SERVER_CONN_RETRIES=50
|
||||
if [[ $API_SERVER_NAME == *.privatelink.* ]]; then
|
||||
API_SERVER_CONN_RETRIES=100
|
||||
fi
|
||||
if ! [[ ${API_SERVER_NAME} =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
API_SERVER_DNS_RETRIES=100
|
||||
if [[ $API_SERVER_NAME == *.privatelink.* ]]; then
|
||||
API_SERVER_DNS_RETRIES=200
|
||||
fi
|
||||
if [[ "${ENABLE_HOSTS_CONFIG_AGENT}" != "true" ]]; then
|
||||
RES=$(logs_to_events "AKS.CSE.apiserverNslookup" "retrycmd_if_failure ${API_SERVER_DNS_RETRIES} 1 20 nslookup -timeout=5 -retry=0 ${API_SERVER_NAME}")
|
||||
STS=$?
|
||||
else
|
||||
STS=0
|
||||
fi
|
||||
if [[ $STS != 0 ]]; then
|
||||
time nslookup ${API_SERVER_NAME}
|
||||
if [[ $RES == *"168.63.129.16"* ]]; then
|
||||
VALIDATION_ERR=$ERR_K8S_API_SERVER_AZURE_DNS_LOOKUP_FAIL
|
||||
else
|
||||
VALIDATION_ERR=$ERR_K8S_API_SERVER_DNS_LOOKUP_FAIL
|
||||
fi
|
||||
else
|
||||
if [ "${UBUNTU_RELEASE}" == "18.04" ]; then
|
||||
#TODO (djsly): remove this once 18.04 isn't supported anymore
|
||||
logs_to_events "AKS.CSE.apiserverNC" "retrycmd_if_failure ${API_SERVER_CONN_RETRIES} 1 10 nc -vz ${API_SERVER_NAME} 443" || time nc -vz ${API_SERVER_NAME} 443 || VALIDATION_ERR=$ERR_K8S_API_SERVER_CONN_FAIL
|
||||
else
|
||||
logs_to_events "AKS.CSE.apiserverCurl" "retrycmd_if_failure ${API_SERVER_CONN_RETRIES} 1 10 curl -v --cacert /etc/kubernetes/certs/ca.crt https://${API_SERVER_NAME}:443" || time curl -v --cacert /etc/kubernetes/certs/ca.crt "https://${API_SERVER_NAME}:443" || VALIDATION_ERR=$ERR_K8S_API_SERVER_CONN_FAIL
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if [ "${UBUNTU_RELEASE}" == "18.04" ]; then
|
||||
#TODO (djsly): remove this once 18.04 isn't supported anymore
|
||||
logs_to_events "AKS.CSE.apiserverNC" "retrycmd_if_failure ${API_SERVER_CONN_RETRIES} 1 10 nc -vz ${API_SERVER_NAME} 443" || time nc -vz ${API_SERVER_NAME} 443 || VALIDATION_ERR=$ERR_K8S_API_SERVER_CONN_FAIL
|
||||
else
|
||||
logs_to_events "AKS.CSE.apiserverCurl" "retrycmd_if_failure ${API_SERVER_CONN_RETRIES} 1 10 curl -v --cacert /etc/kubernetes/certs/ca.crt https://${API_SERVER_NAME}:443" || time curl -v --cacert /etc/kubernetes/certs/ca.crt "https://${API_SERVER_NAME}:443" || VALIDATION_ERR=$ERR_K8S_API_SERVER_CONN_FAIL
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${ID} != "mariner" ]] && [[ ${ID} != "azurelinux" ]]; then
|
||||
echo "Recreating man-db auto-update flag file and kicking off man-db update process at $(date)"
|
||||
createManDbAutoUpdateFlagFile
|
||||
/usr/bin/mandb && echo "man-db finished updates at $(date)" &
|
||||
fi
|
||||
|
||||
if $REBOOTREQUIRED; then
|
||||
echo 'reboot required, rebooting node in 1 minute'
|
||||
/bin/bash -c "shutdown -r 1 &"
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]]; then
|
||||
# logs_to_events should not be run on & commands
|
||||
aptmarkWALinuxAgent unhold &
|
||||
fi
|
||||
else
|
||||
if [[ $OS == $UBUNTU_OS_NAME ]]; then
|
||||
# logs_to_events should not be run on & commands
|
||||
if [ "${ENABLE_UNATTENDED_UPGRADES}" == "true" ]; then
|
||||
UU_CONFIG_DIR="/etc/apt/apt.conf.d/99periodic"
|
||||
mkdir -p "$(dirname "${UU_CONFIG_DIR}")"
|
||||
touch "${UU_CONFIG_DIR}"
|
||||
chmod 0644 "${UU_CONFIG_DIR}"
|
||||
echo 'APT::Periodic::Update-Package-Lists "1";' >> "${UU_CONFIG_DIR}"
|
||||
echo 'APT::Periodic::Unattended-Upgrade "1";' >> "${UU_CONFIG_DIR}"
|
||||
systemctl unmask apt-daily.service apt-daily-upgrade.service
|
||||
systemctl enable apt-daily.service apt-daily-upgrade.service
|
||||
systemctl enable apt-daily.timer apt-daily-upgrade.timer
|
||||
systemctl restart --no-block apt-daily.timer apt-daily-upgrade.timer
|
||||
# this is the DOWNLOAD service
|
||||
# meaning we are wasting IO without even triggering an upgrade
|
||||
# -________________-
|
||||
systemctl restart --no-block apt-daily.service
|
||||
|
||||
fi
|
||||
aptmarkWALinuxAgent unhold &
|
||||
elif isMarinerOrAzureLinux "$OS"; then
|
||||
if [ "${ENABLE_UNATTENDED_UPGRADES}" == "true" ]; then
|
||||
if [ "${IS_KATA}" == "true" ]; then
|
||||
# Currently kata packages must be updated as a unit (including the kernel which requires a reboot). This can
|
||||
# only be done reliably via image updates as of now so never enable automatic updates.
|
||||
echo 'EnableUnattendedUpgrade is not supported by kata images, will not be enabled'
|
||||
else
|
||||
# By default the dnf-automatic is service is notify only in Mariner/AzureLinux.
|
||||
# Enable the automatic install timer and the check-restart timer.
|
||||
# Stop the notify only dnf timer since we've enabled the auto install one.
|
||||
# systemctlDisableAndStop adds .service to the end which doesn't work on timers.
|
||||
systemctl disable dnf-automatic-notifyonly.timer
|
||||
systemctl stop dnf-automatic-notifyonly.timer
|
||||
# At 6:00:00 UTC (1 hour random fuzz) download and install package updates.
|
||||
systemctl unmask dnf-automatic-install.service || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
systemctl unmask dnf-automatic-install.timer || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
systemctlEnableAndStart dnf-automatic-install.timer || exit $ERR_SYSTEMCTL_START_FAIL
|
||||
# The check-restart service which will inform kured of required restarts should already be running
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Custom script finished. API server connection check code:" $VALIDATION_ERR
|
||||
echo $(date),$(hostname), endcustomscript>>/opt/m
|
||||
mkdir -p /opt/azure/containers && touch /opt/azure/containers/provision.complete
|
||||
|
||||
exit $VALIDATION_ERR
|
||||
|
||||
|
||||
#EOF
|
|
@ -1,96 +0,0 @@
|
|||
CSE_STARTTIME=$(date)
|
||||
CSE_STARTTIME_FORMATTED=$(date +"%F %T.%3N")
|
||||
timeout -k5s 15m /bin/bash /opt/azure/containers/provision.sh >> /var/log/azure/cluster-provision.log 2>&1
|
||||
EXIT_CODE=$?
|
||||
systemctl --no-pager -l status kubelet >> /var/log/azure/cluster-provision-cse-output.log 2>&1
|
||||
OUTPUT=$(tail -c 3000 "/var/log/azure/cluster-provision.log")
|
||||
KERNEL_STARTTIME=$(systemctl show -p KernelTimestamp | sed -e "s/KernelTimestamp=//g" || true)
|
||||
KERNEL_STARTTIME_FORMATTED=$(date -d "${KERNEL_STARTTIME}" +"%F %T.%3N" )
|
||||
CLOUDINITLOCAL_STARTTIME=$(systemctl show cloud-init-local -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
CLOUDINITLOCAL_STARTTIME_FORMATTED=$(date -d "${CLOUDINITLOCAL_STARTTIME}" +"%F %T.%3N" )
|
||||
CLOUDINIT_STARTTIME=$(systemctl show cloud-init -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
CLOUDINIT_STARTTIME_FORMATTED=$(date -d "${CLOUDINIT_STARTTIME}" +"%F %T.%3N" )
|
||||
CLOUDINITFINAL_STARTTIME=$(systemctl show cloud-final -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
CLOUDINITFINAL_STARTTIME_FORMATTED=$(date -d "${CLOUDINITFINAL_STARTTIME}" +"%F %T.%3N" )
|
||||
NETWORKD_STARTTIME=$(systemctl show systemd-networkd -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
NETWORKD_STARTTIME_FORMATTED=$(date -d "${NETWORKD_STARTTIME}" +"%F %T.%3N" )
|
||||
GUEST_AGENT_STARTTIME=$(systemctl show walinuxagent.service -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
GUEST_AGENT_STARTTIME_FORMATTED=$(date -d "${GUEST_AGENT_STARTTIME}" +"%F %T.%3N" )
|
||||
KUBELET_START_TIME=$(systemctl show kubelet.service -p ExecMainStartTimestamp | sed -e "s/ExecMainStartTimestamp=//g" || true)
|
||||
KUBELET_START_TIME_FORMATTED=$(date -d "${KUBELET_START_TIME}" +"%F %T.%3N" )
|
||||
KUBELET_READY_TIME_FORMATTED="$(date -d "$(journalctl -u kubelet | grep NodeReady | cut -d' ' -f1-3)" +"%F %T.%3N")"
|
||||
SYSTEMD_SUMMARY=$(systemd-analyze || true)
|
||||
CSE_ENDTIME_FORMATTED=$(date +"%F %T.%3N")
|
||||
EVENTS_LOGGING_DIR=/var/log/azure/Microsoft.Azure.Extensions.CustomScript/events/
|
||||
EVENTS_FILE_NAME=$(date +%s%3N)
|
||||
EXECUTION_DURATION=$(echo $(($(date +%s) - $(date -d "$CSE_STARTTIME" +%s))))
|
||||
|
||||
JSON_STRING=$( jq -n \
|
||||
--arg ec "$EXIT_CODE" \
|
||||
--arg op "$OUTPUT" \
|
||||
--arg er "" \
|
||||
--arg ed "$EXECUTION_DURATION" \
|
||||
--arg ks "$KERNEL_STARTTIME" \
|
||||
--arg cinitl "$CLOUDINITLOCAL_STARTTIME" \
|
||||
--arg cinit "$CLOUDINIT_STARTTIME" \
|
||||
--arg cf "$CLOUDINITFINAL_STARTTIME" \
|
||||
--arg ns "$NETWORKD_STARTTIME" \
|
||||
--arg cse "$CSE_STARTTIME" \
|
||||
--arg ga "$GUEST_AGENT_STARTTIME" \
|
||||
--arg ss "$SYSTEMD_SUMMARY" \
|
||||
--arg kubelet "$KUBELET_START_TIME" \
|
||||
'{ExitCode: $ec, Output: $op, Error: $er, ExecDuration: $ed, KernelStartTime: $ks, CloudInitLocalStartTime: $cinitl, CloudInitStartTime: $cinit, CloudFinalStartTime: $cf, NetworkdStartTime: $ns, CSEStartTime: $cse, GuestAgentStartTime: $ga, SystemdSummary: $ss, BootDatapoints: { KernelStartTime: $ks, CSEStartTime: $cse, GuestAgentStartTime: $ga, KubeletStartTime: $kubelet }}' )
|
||||
mkdir -p /var/log/azure/aks
|
||||
echo $JSON_STRING | tee /var/log/azure/aks/provision.json
|
||||
|
||||
# messsage_string is here because GA only accepts strings in Message.
|
||||
message_string=$( jq -n \
|
||||
--arg EXECUTION_DURATION "${EXECUTION_DURATION}" \
|
||||
--arg EXIT_CODE "${EXIT_CODE}" \
|
||||
--arg KERNEL_STARTTIME_FORMATTED "${KERNEL_STARTTIME_FORMATTED}" \
|
||||
--arg CLOUDINITLOCAL_STARTTIME_FORMATTED "${CLOUDINITLOCAL_STARTTIME_FORMATTED}" \
|
||||
--arg CLOUDINIT_STARTTIME_FORMATTED "${CLOUDINIT_STARTTIME_FORMATTED}" \
|
||||
--arg CLOUDINITFINAL_STARTTIME_FORMATTED "${CLOUDINITFINAL_STARTTIME_FORMATTED}" \
|
||||
--arg NETWORKD_STARTTIME_FORMATTED "${NETWORKD_STARTTIME_FORMATTED}" \
|
||||
--arg GUEST_AGENT_STARTTIME_FORMATTED "${GUEST_AGENT_STARTTIME_FORMATTED}" \
|
||||
--arg KUBELET_START_TIME_FORMATTED "${KUBELET_START_TIME_FORMATTED}" \
|
||||
--arg KUBELET_READY_TIME_FORMATTED "${KUBELET_READY_TIME_FORMATTED}" \
|
||||
'{ExitCode: $EXIT_CODE, E2E: $EXECUTION_DURATION, KernelStartTime: $KERNEL_STARTTIME_FORMATTED, CloudInitLocalStartTime: $CLOUDINITLOCAL_STARTTIME_FORMATTED, CloudInitStartTime: $CLOUDINIT_STARTTIME_FORMATTED, CloudFinalStartTime: $CLOUDINITFINAL_STARTTIME_FORMATTED, NetworkdStartTime: $NETWORKD_STARTTIME_FORMATTED, GuestAgentStartTime: $GUEST_AGENT_STARTTIME_FORMATTED, KubeletStartTime: $KUBELET_START_TIME_FORMATTED, KubeletReadyTime: $KUBELET_READY_TIME_FORMATTED } | tostring'
|
||||
)
|
||||
# this clean up brings me no joy, but removing extra "\" and then removing quotes at the end of the string
|
||||
# allows parsing to happening without additional manipulation
|
||||
message_string=$(echo $message_string | sed 's/\\//g' | sed 's/^.\(.*\).$/\1/')
|
||||
|
||||
# arg names are defined by GA and all these are required to be correctly read by GA
|
||||
# EventPid, EventTid are required to be int. No use case for them at this point.
|
||||
EVENT_JSON=$( jq -n \
|
||||
--arg Timestamp "${CSE_STARTTIME_FORMATTED}" \
|
||||
--arg OperationId "${CSE_ENDTIME_FORMATTED}" \
|
||||
--arg Version "1.23" \
|
||||
--arg TaskName "AKS.CSE.cse_start" \
|
||||
--arg EventLevel "${eventlevel}" \
|
||||
--arg Message "${message_string}" \
|
||||
--arg EventPid "0" \
|
||||
--arg EventTid "0" \
|
||||
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
|
||||
)
|
||||
echo ${EVENT_JSON} > ${EVENTS_LOGGING_DIR}${EVENTS_FILE_NAME}.json
|
||||
|
||||
# force a log upload to the host after the provisioning script finishes
|
||||
# if we failed, wait for the upload to complete so that we don't remove
|
||||
# the VM before it finishes. if we succeeded, upload in the background
|
||||
# so that the provisioning script returns success more quickly
|
||||
upload_logs() {
|
||||
# find the most recent version of WALinuxAgent and use it to collect logs per
|
||||
# https://supportability.visualstudio.com/AzureIaaSVM/_wiki/wikis/AzureIaaSVM/495009/Log-Collection_AGEX?anchor=manually-collect-logs
|
||||
PYTHONPATH=$(find /var/lib/waagent -name WALinuxAgent\*.egg | sort -rV | head -n1)
|
||||
python3 $PYTHONPATH -collect-logs -full >/dev/null 2>&1
|
||||
python3 /opt/azure/containers/provision_send_logs.py >/dev/null 2>&1
|
||||
}
|
||||
if [ $EXIT_CODE -ne 0 ]; then
|
||||
upload_logs
|
||||
else
|
||||
upload_logs &
|
||||
fi
|
||||
|
||||
exit $EXIT_CODE
|
Загрузка…
Ссылка в новой задаче