зеркало из https://github.com/Azure/hpcpack.git
Merge pull request #55 from Azure/tianyiliu/deploy-Kubernetes-script
Deploy-Kubernetes scripts and KubenetesWrapper application
This commit is contained in:
Коммит
6b4e48fa4b
|
@ -0,0 +1,12 @@
|
|||
param (
|
||||
[string]$password,
|
||||
[string[]]$servers
|
||||
)
|
||||
|
||||
$joinedServers = $servers -join ' '
|
||||
|
||||
clusrun /nodes:$($servers[0]) "git clone https://github.com/Azure/hpcpack.git && cd hpcpack && chmod +x Scripts/Deploy-Kubernetes.sh"
|
||||
clusrun /nodes:$($servers[0]) "hpcpack/Scripts/Deploy-Kubernetes.sh -p '$password' $joinedServers"
|
||||
|
||||
New-HpcGroup -Name "Kubernetes" -ErrorAction SilentlyContinue
|
||||
Add-HpcGroup -Name "Kubernetes" -NodeName $servers
|
|
@ -0,0 +1,69 @@
|
|||
param (
|
||||
[string]$password,
|
||||
[string[]]$servers
|
||||
)
|
||||
|
||||
function Install-Kubernetes {
|
||||
param (
|
||||
[string]$server
|
||||
)
|
||||
Write-Host "Installing Kubernetes on $server"
|
||||
clusrun /nodes:$server 'sudo apt-get update && sudo apt-get install -y apt-transport-https curl'
|
||||
clusrun /nodes:$server 'echo deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.28/deb/ / | sudo tee /etc/apt/sources.list.d/kubernetes.list'
|
||||
clusrun /nodes:$server 'sudo mkdir /etc/apt/keyrings'
|
||||
clusrun /nodes:$server 'curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg'
|
||||
clusrun /nodes:$server 'sudo apt-get update && sudo apt-get install -y kubelet kubeadm kubectl docker.io'
|
||||
|
||||
clusrun /nodes:$server "wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb"
|
||||
clusrun /nodes:$server "sudo dpkg -i packages-microsoft-prod.deb"
|
||||
clusrun /nodes:$server "rm packages-microsoft-prod.deb"
|
||||
|
||||
clusrun /nodes:$server 'mkdir -p $HOME/.kube'
|
||||
clusrun /nodes:$server 'echo ''export PATH=$PATH:$HOME/KubernetesWrapper/net8.0'' >> ~/.profile'
|
||||
}
|
||||
|
||||
function Init-Master-Node {
|
||||
param (
|
||||
[string]$server
|
||||
)
|
||||
clusrun /nodes:$server 'rm -f ~/.ssh/kube_key*'
|
||||
clusrun /nodes:$server 'ssh-keygen -t rsa -N """" -f ~/.ssh/kube_key'
|
||||
|
||||
clusrun /nodes:$server 'sudo apt-get update && sudo apt-get install -y dotnet-sdk-8.0 && sudo apt install sshpass -y'
|
||||
clusrun /nodes:$server 'git clone https://github.com/Azure/hpcpack.git && cd hpcpack'
|
||||
clusrun /nodes:$server 'dotnet build ~/hpcpack/code/KubernetesWrapper/KubernetesWrapper.sln'
|
||||
clusrun /nodes:$server 'mkdir KubernetesWrapper && cp -r ~/hpcpack/code/KubernetesWrapper/KubernetesWrapper/bin/Debug/net8.0 ~/KubernetesWrapper'
|
||||
|
||||
clusrun /nodes:$server 'sudo kubeadm init --pod-network-cidr=10.1.0.0/16'
|
||||
clusrun /nodes:$server 'sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config'
|
||||
clusrun /nodes:$server 'sudo chown $(id -u):$(id -g) $HOME/.kube/config'
|
||||
clusrun /nodes:$server 'kubectl create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.1/manifests/tigera-operator.yaml'
|
||||
clusrun /nodes:$server 'curl https://raw.githubusercontent.com/projectcalico/calico/v3.26.1/manifests/custom-resources.yaml -O'
|
||||
clusrun /nodes:$server 'sed -i ''s/cidr: 192\.168\.0\.0\/16/cidr: 10.1.0.0\/16/g'' custom-resources.yaml'
|
||||
clusrun /nodes:$server 'kubectl apply -f custom-resources.yaml'
|
||||
sleep 100
|
||||
clusrun /nodes:$server 'kubectl taint nodes --all node-role.kubernetes.io/control-plane-'
|
||||
}
|
||||
|
||||
foreach ($server in $servers) {
|
||||
Install-Kubernetes -server $server
|
||||
}
|
||||
|
||||
Init-Master-Node -server $servers[0]
|
||||
$joinClusterCommand = clusrun /nodes:$($servers[0]) "sudo kubeadm token create --print-join-command"
|
||||
$joinClusterCommand = "sudo " + $joinClusterCommand[1]
|
||||
Write-Host joinClusterCommand
|
||||
Write-Host $joinClusterCommand
|
||||
|
||||
for ($i = 1; $i -lt $servers.Length; $i++) {
|
||||
clusrun /nodes:$($servers[$i]) 'mkdir -p $HOME/.kube'
|
||||
clusrun /nodes:$($servers[0]) "sshpass -p $password ssh-copy-id -i ~/.ssh/kube_key.pub -o StrictHostKeyChecking=no $($servers[$i])"
|
||||
clusrun /nodes:$($servers[0]) "sshpass -p $password scp ~/.kube/config hpcadmin@$($servers[$i]):~/.kube"
|
||||
clusrun /nodes:$($servers[$i]) $joinClusterCommand
|
||||
|
||||
clusrun /nodes:$($servers[$i]) "sudo apt-get update && sudo apt-get install -y dotnet-runtime-8.0"
|
||||
clusrun /nodes:$($servers[0]) "sshpass -p $password scp -r ~/KubernetesWrapper hpcadmin@$($servers[$i]):~/KubernetesWrapper"
|
||||
}
|
||||
|
||||
New-HpcGroup -Name "Kubernetes" -ErrorAction SilentlyContinue
|
||||
Add-HpcGroup -Name "Kubernetes" -NodeName $servers
|
|
@ -0,0 +1,156 @@
|
|||
#!/bin/bash
|
||||
|
||||
install_package() {
|
||||
# install python and pip
|
||||
sudo apt update
|
||||
echo "Installing Python 3.10 and Pip"
|
||||
sudo apt install software-properties-common -y
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa -y
|
||||
sudo apt update
|
||||
sudo apt install python3.10 python3.10-venv python3.10-dev -y
|
||||
sudo rm /usr/bin/python3
|
||||
sudo ln -s python3.10 /usr/bin/python3
|
||||
sudo ln -s /usr/lib/python3/dist-packages/apt_inst.cpython-38-x86_64-linux-gnu.so /usr/lib/python3/dist-packages/apt_inst.so
|
||||
sudo ln -s /usr/lib/python3/dist-packages/apt_pkg.cpython-38-x86_64-linux-gnu.so /usr/lib/python3/dist-packages/apt_pkg.so
|
||||
curl https://bootstrap.pypa.io/get-pip.py | sudo python3
|
||||
|
||||
python3 -V
|
||||
pip3 -V
|
||||
|
||||
# install .net8 sdk
|
||||
wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb
|
||||
sudo dpkg -i packages-microsoft-prod.deb
|
||||
rm packages-microsoft-prod.deb
|
||||
sudo apt-get update && sudo apt-get install -y dotnet-sdk-8.0
|
||||
echo 'export PATH=$PATH:$HOME/KubernetesWrapper/net8.0' >> ~/.profile
|
||||
}
|
||||
|
||||
# Function to display usage information
|
||||
usage() {
|
||||
echo "Usage: $0 [-p password] [server1 server2 ...]" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to resolve hostname to IP address
|
||||
resolve_hostname() {
|
||||
hostname=$1
|
||||
ip=$(dig +short $hostname)
|
||||
echo $ip
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":p:" opt; do
|
||||
case ${opt} in
|
||||
p)
|
||||
password=$OPTARG
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG" >&2
|
||||
usage
|
||||
;;
|
||||
:)
|
||||
echo "Option -$OPTARG requires an argument." >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND -1))
|
||||
|
||||
# Check if at least one IP address is provided
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Please provide at least one IP address." >&2
|
||||
usage
|
||||
fi
|
||||
|
||||
# Check if password is provided
|
||||
if [ -z "$password" ]; then
|
||||
read -s -p "Enter password for SSH authentication: " password
|
||||
echo
|
||||
fi
|
||||
|
||||
install_package
|
||||
|
||||
IPS=()
|
||||
result=""
|
||||
|
||||
# Loop through each IP address and copy SSH key
|
||||
# Kubernetes nodes must be in lowercase
|
||||
for hostname in "$@"
|
||||
do
|
||||
# Resolve the hostname to IP address and append to the array
|
||||
ip=$(resolve_hostname $hostname)
|
||||
IPS+=($ip)
|
||||
lowercase_hostname=$(echo "$hostname" | tr '[:upper:]' '[:lower:]')
|
||||
result+="$lowercase_hostname,$ip "
|
||||
done
|
||||
echo "${result% }"
|
||||
|
||||
echo "Generating ssh key and copying to all nodes"
|
||||
sudo apt install sshpass -y
|
||||
|
||||
rm -f ~/.ssh/kube_key*
|
||||
ssh-keygen -t rsa -N "" -f ~/.ssh/kube_key
|
||||
|
||||
for ip in "${IPS[@]}";
|
||||
do
|
||||
echo "Copying SSH key to $ip..."
|
||||
# Copy SSH key to the IP address
|
||||
sshpass -p $password ssh-copy-id -i ~/.ssh/kube_key.pub -o StrictHostKeyChecking=no $ip
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "SSH key copied successfully to $ip."
|
||||
else
|
||||
echo "Failed to copy SSH key to $ip. Please check the password or connectivity."
|
||||
fi
|
||||
done
|
||||
|
||||
echo "------------------------------------------"
|
||||
echo "Installing and disabling firewalld"
|
||||
sudo apt install firewalld -y
|
||||
sudo systemctl disable --now firewalld
|
||||
|
||||
echo "Installing and configuring Kubernetes via kubespray"
|
||||
git clone https://github.com/kubernetes-sigs/kubespray
|
||||
cd kubespray
|
||||
# We may customize the version of kubespray here
|
||||
git checkout release-2.24
|
||||
sudo pip3 install -r requirements.txt
|
||||
echo "------------------------------------------"
|
||||
cp -rfp inventory/sample inventory/mycluster
|
||||
CONFIG_FILE=inventory/mycluster/hosts.yaml python3 contrib/inventory_builder/inventory.py ${result% }
|
||||
|
||||
echo "------------------------------------------"
|
||||
ansible-playbook -i inventory/mycluster/hosts.yaml --become --become-user=root reset.yml --extra-vars reset_confirmation=yes --private-key=~/.ssh/kube_key
|
||||
echo "------------------------------------------"
|
||||
ansible-playbook -i inventory/mycluster/hosts.yaml --become --become-user=root cluster.yml --private-key=~/.ssh/kube_key
|
||||
echo "------------------------------------------"
|
||||
mkdir -p $HOME/.kube
|
||||
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||
|
||||
kubectl version
|
||||
|
||||
dotnet build ~/hpcpack/code/KubernetesWrapper/KubernetesWrapper.sln
|
||||
cd ~ && mkdir KubernetesWrapper
|
||||
cp -r ~/hpcpack/code/KubernetesWrapper/KubernetesWrapper/bin/Debug/net8.0 ~/KubernetesWrapper
|
||||
|
||||
# Install kubectl, .net8 runtime, KubernetesWrapper on other nodes
|
||||
ip_length=${#IPS[@]}
|
||||
for ((i=1; i<ip_length; i++))
|
||||
do
|
||||
# Install kubectl
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'mkdir .kube'
|
||||
sshpass -p $password scp ~/.kube/config hpcadmin@${IPS[$i]}:~/.kube
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'curl -LO "https://dl.k8s.io/release/v1.28.10/bin/linux/amd64/kubectl"'
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl'
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'kubectl version'
|
||||
|
||||
# Install .net8 runtime
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb'
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'sudo dpkg -i packages-microsoft-prod.deb'
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'rm packages-microsoft-prod.deb'
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'sudo apt-get update && sudo apt-get install -y dotnet-runtime-8.0'
|
||||
|
||||
# Copy KubernetesWrapper
|
||||
sshpass -p $password scp -r ~/KubernetesWrapper hpcadmin@${IPS[$i]}:~/KubernetesWrapper
|
||||
sshpass -p $password ssh hpcadmin@${IPS[$i]} 'echo 'export PATH=\\\$PATH:\\\$HOME/KubernetesWrapper/net8.0' >> ~/.profile'
|
||||
done
|
|
@ -0,0 +1,40 @@
|
|||
## How to Deploy a Kubernetes Cluster in HPC Pack
|
||||
|
||||
### Usage
|
||||
There are two scripts for deploying a Kubernetes cluster: `Deploy-Kubernetes-Kubespray.ps1` and `Deploy-Kubernetes-Manually.ps1`. You can run either script on your headnode, as their usage is the same.
|
||||
```powershell
|
||||
.\Deploy-Kubernetes-Kubespray.ps1 -password {password} -servers {servers}
|
||||
```
|
||||
or
|
||||
```powershell
|
||||
.\Deploy-Kubernetes-Manually.ps1 -password {password} -servers {servers}
|
||||
```
|
||||
Replace `{password}` with your password and `{servers}` with your node names seperated by commas.
|
||||
|
||||
### Differences
|
||||
| | Deploy-Kubernetes-Kubespray.ps1 | Deploy-Kubernetes-Manually.ps1 |
|
||||
|------------|---------------------------------|--------------------------------|
|
||||
| **Deployment** | Uses [Kubespray](https://github.com/kubernetes-sigs/kubespray) to deploy a Kubernetes cluster, providing flexibility to modify the script and install plugins. | Uses [kubeadm](https://github.com/kubernetes/kubeadm) to deploy a Kubernetes cluster, which does not support customization. |
|
||||
| **Output** | Runs a shell within a `clusrun` command, so there will be no output until the whole shell script completes. | Provides output as soon as a single `clusrun` command finishes. |
|
||||
| **Master Node Selection** | Selects the control-plane nodes based on its own algorithm. To configure this selection manually, modify the `inventory/mycluster/hosts.yaml` file. | Selects the first server in `servers` as the control-plane node. |
|
||||
| **Time** | Takes more time to deploy a cluster of the same scale. | Takes less time to deploy a cluster of the same scale. |
|
||||
|
||||
## KubernetesWrapper
|
||||
|
||||
KubernetsWrapper is a C# application to monitor [Kubernetes Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/). It will create a pod and a job, run the job, remove both the pod and the job, and then print logs from the pod. After running the PowerShell scipt in the previous part, KubernetsWrapper will be installed on each node in the cluster.
|
||||
|
||||
### Usage
|
||||
The path has been added to `~/.profile`, so you can run it directly. It's a good practice to submit a HPC Pack job that starts `KubernetesWrapper`, then pass the parameters to this application.
|
||||
```powershell
|
||||
job submit /nodegroup:{nodegroup_name} /numcores:{numcores} bash -lc 'KubernetesWrapper --job {job_name} --container {container_name} --image {image_name} --namespace {namespace} --ttl {ttl_for_job} --argument {argument_list}'
|
||||
```
|
||||
|
||||
For example, the following command will create a Kubernetes Job that takes up CPU usage.
|
||||
```powershell
|
||||
job submit /nodegroup:Kubernetes /numcores:6 bash -lc 'KubernetesWrapper --job cpu-stress-job --container stress --image progrium/stress --namespace default --ttl 5 --argument --cpu 2 --timeout 5s'
|
||||
```
|
||||
|
||||
## Limitations
|
||||
1. These scripts only work for Ubuntu distributions. Other distributions are not supported at this time.
|
||||
2. `KubernetsWrapper` only monitors `job`. Other resouce types are not supported at this time.
|
||||
3. As stated in the [kubespray documentation](https://kubespray.io/#/?id=requirements), the master node should have at least 1500 MB of memory and worker nodes should have at least 1024 MB of memory.
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.10.35004.147
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KubernetesWrapper", "KubernetesWrapper\KubernetesWrapper.csproj", "{52627667-22E7-4C89-B802-5E5BE6141AE5}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{52627667-22E7-4C89-B802-5E5BE6141AE5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{52627667-22E7-4C89-B802-5E5BE6141AE5}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{52627667-22E7-4C89-B802-5E5BE6141AE5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{52627667-22E7-4C89-B802-5E5BE6141AE5}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {3AA6CF6B-BD56-4644-ABB9-28E52FB45D3D}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,14 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="KubernetesClient" Version="14.0.2" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
|
@ -0,0 +1,311 @@
|
|||
using k8s;
|
||||
using k8s.Autorest;
|
||||
using k8s.Models;
|
||||
using System.Net;
|
||||
|
||||
namespace KubernetesWrapper
|
||||
{
|
||||
internal class PodList
|
||||
{
|
||||
private static async Task Main(string[] args)
|
||||
{
|
||||
var (jobName, containerName, imageName, namespaceName, ttlSecondsAfterFinished, command, arguments) = Util.ProcessArgs(args);
|
||||
Console.WriteLine("Information about the job:");
|
||||
Console.WriteLine($"Job Name: {jobName}");
|
||||
Console.WriteLine($"Container Name: {containerName}");
|
||||
Console.WriteLine($"Image Name: {imageName}");
|
||||
Console.WriteLine($"Namespace Name: {namespaceName}");
|
||||
Console.WriteLine("---------");
|
||||
|
||||
Console.WriteLine("Command: ");
|
||||
Console.WriteLine($"length: {command.Count}");
|
||||
foreach (var item in command)
|
||||
{
|
||||
Console.WriteLine(item);
|
||||
}
|
||||
Console.WriteLine("---------");
|
||||
|
||||
Console.WriteLine("Arguments: ");
|
||||
Console.WriteLine($"length: {arguments.Count}");
|
||||
foreach (var item in arguments)
|
||||
{
|
||||
Console.WriteLine(item);
|
||||
}
|
||||
Console.WriteLine("---------");
|
||||
|
||||
string? homeDirectory = Environment.GetEnvironmentVariable("HOME");
|
||||
string kubeConfigPath = $"{homeDirectory}/.kube/config";
|
||||
Console.WriteLine($"Home directory: {homeDirectory}");
|
||||
Console.WriteLine($"Kube config path: {kubeConfigPath}");
|
||||
|
||||
var config = KubernetesClientConfiguration.BuildConfigFromConfigFile(kubeConfigPath);
|
||||
IKubernetes client = new Kubernetes(config);
|
||||
|
||||
var nodes = Environment.GetEnvironmentVariable("CCP_NODES");
|
||||
var nodeList = Util.GetNodeList(nodes);
|
||||
|
||||
if (nodeList.Count == 0)
|
||||
{
|
||||
Console.WriteLine("Node list is empty. Exiting...");
|
||||
return;
|
||||
}
|
||||
|
||||
Console.WriteLine("node list: ");
|
||||
foreach (var item in nodeList)
|
||||
{
|
||||
Console.WriteLine(item);
|
||||
}
|
||||
Console.WriteLine("---------");
|
||||
|
||||
CancellationTokenSource source = new();
|
||||
CancellationToken token = source.Token;
|
||||
|
||||
Console.CancelKeyPress += async (sender, e) =>
|
||||
{
|
||||
e.Cancel = true; // Prevent the process from terminating immediately
|
||||
Console.WriteLine("interrupt!!");
|
||||
|
||||
try
|
||||
{
|
||||
var job = await client.BatchV1.ReadNamespacedJobAsync(jobName, namespaceName).ConfigureAwait(false);
|
||||
Console.WriteLine($"Job '{jobName}' found.");
|
||||
|
||||
// Job exists, so delete it
|
||||
await client.BatchV1.DeleteNamespacedJobAsync(name: jobName, namespaceParameter: namespaceName).ConfigureAwait(false);
|
||||
Console.WriteLine($"Job '{jobName}' deleted successfully.");
|
||||
|
||||
}
|
||||
catch (k8s.Autorest.HttpOperationException ex) when (ex.Response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
Console.WriteLine($"Job '{jobName}' does not exist.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"Error: {ex.Message}");
|
||||
}
|
||||
|
||||
var podList = await client.CoreV1.ListNamespacedPodAsync(namespaceName, labelSelector: $"app={containerName}").ConfigureAwait(false);
|
||||
Console.WriteLine($"Pod list count: {podList.Items.Count}");
|
||||
foreach (var pod in podList.Items)
|
||||
{
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"Pod: {pod.Metadata.Name}");
|
||||
await client.CoreV1.DeleteNamespacedPodAsync(pod.Metadata.Name, namespaceName, new V1DeleteOptions()).ConfigureAwait(false);
|
||||
Console.WriteLine($"Deleted pod: {pod.Metadata.Name}");
|
||||
}
|
||||
catch (k8s.Autorest.HttpOperationException ex) when (ex.Response.StatusCode == System.Net.HttpStatusCode.NotFound)
|
||||
{
|
||||
Console.WriteLine($"Pod '{pod.Metadata.Name}' does not exist.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"Error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
source.Cancel();
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var existingJob = await client.BatchV1.ReadNamespacedJobAsync(jobName, namespaceName).ConfigureAwait(false);
|
||||
if (existingJob != null)
|
||||
{
|
||||
Console.WriteLine($"Job '{jobName}' already exists in namespace '{namespaceName}'.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
catch (HttpOperationException ex) when (ex.Response.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
Console.WriteLine($"Job '{jobName}' does not exist in namespace '{namespaceName}'. Proceeding to create job.");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var job = await CreateJob(client, jobName, containerName, imageName, namespaceName,
|
||||
ttlSecondsAfterFinished, command, arguments, nodeList, token);
|
||||
|
||||
var jobWatcher = client.BatchV1.ListNamespacedJobWithHttpMessagesAsync(
|
||||
namespaceName,
|
||||
labelSelector: $"app={containerName}",
|
||||
watch: true,
|
||||
cancellationToken: token);
|
||||
|
||||
await foreach (var (type, item) in jobWatcher.WatchAsync<V1Job, V1JobList>(
|
||||
onError: e =>
|
||||
{
|
||||
Console.WriteLine($"Watcher error: {e.Message}");
|
||||
},
|
||||
cancellationToken: token))
|
||||
{
|
||||
if (item.Status.Succeeded == nodeList.Count)
|
||||
{
|
||||
var pods = await GetPodsForJobAsync(client, jobName, namespaceName).ConfigureAwait(false);
|
||||
if (pods.Count == 0)
|
||||
{
|
||||
Console.WriteLine($"No pods found for job '{jobName}' in namespace '{namespaceName}'.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Retrieve logs from each Pod
|
||||
foreach (var pod in pods)
|
||||
{
|
||||
Console.WriteLine($"Found Pod: {pod.Metadata.Name}. Retrieving logs...");
|
||||
|
||||
string logs = await GetPodLogsAsync(client, pod.Metadata.Name, namespaceName).ConfigureAwait(false);
|
||||
Console.WriteLine($"=== Logs from Pod: {pod.Metadata.Name} ===");
|
||||
Console.WriteLine(logs);
|
||||
}
|
||||
|
||||
Console.WriteLine($"All pods reach Success state. About to exit in {ttlSecondsAfterFinished} seconds.");
|
||||
|
||||
if (type == WatchEventType.Deleted)
|
||||
{
|
||||
Console.WriteLine("Job reaches Deleted state. Exit monitoring now.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (TaskCanceledException ex)
|
||||
{
|
||||
Console.WriteLine($"Stop watching. Task was canceled: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<V1Job?> CreateJob(IKubernetes client, string jobName, string containerName, string imageName,
|
||||
string namespaceName, int ttlSecondsAfterFinished, List<string> command, List<string> arguments, List<string> nodeList,
|
||||
CancellationToken token)
|
||||
{
|
||||
V1Container? container = new()
|
||||
{
|
||||
Name = containerName,
|
||||
Image = imageName,
|
||||
};
|
||||
|
||||
if (command.Count != 0)
|
||||
{
|
||||
container.Command = command;
|
||||
}
|
||||
|
||||
if (arguments.Count != 0)
|
||||
{
|
||||
container.Args = arguments;
|
||||
}
|
||||
|
||||
var job = new V1Job
|
||||
{
|
||||
ApiVersion = "batch/v1",
|
||||
Kind = "Job",
|
||||
Metadata = new V1ObjectMeta
|
||||
{
|
||||
Name = jobName,
|
||||
Labels = new System.Collections.Generic.Dictionary<string, string>
|
||||
{
|
||||
{ "app", containerName }
|
||||
}
|
||||
},
|
||||
Spec = new V1JobSpec
|
||||
{
|
||||
Completions = nodeList.Count,
|
||||
Parallelism = nodeList.Count,
|
||||
TtlSecondsAfterFinished = ttlSecondsAfterFinished,
|
||||
Template = new V1PodTemplateSpec
|
||||
{
|
||||
Metadata = new V1ObjectMeta
|
||||
{
|
||||
Labels = new System.Collections.Generic.Dictionary<string, string>
|
||||
{
|
||||
{ "app", containerName }
|
||||
}
|
||||
},
|
||||
Spec = new V1PodSpec
|
||||
{
|
||||
Affinity = new V1Affinity
|
||||
{
|
||||
NodeAffinity = new V1NodeAffinity
|
||||
{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution = new V1NodeSelector
|
||||
{
|
||||
NodeSelectorTerms =
|
||||
[
|
||||
new V1NodeSelectorTerm
|
||||
{
|
||||
MatchExpressions =
|
||||
[
|
||||
new()
|
||||
{
|
||||
Key = "kubernetes.io/hostname",
|
||||
OperatorProperty = "In",
|
||||
Values = nodeList
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
Containers =
|
||||
[
|
||||
container
|
||||
],
|
||||
RestartPolicy = "Never"
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
V1Job? result = null;
|
||||
try
|
||||
{
|
||||
result = await client.BatchV1.CreateNamespacedJobAsync(job, namespaceName, cancellationToken: token).ConfigureAwait(false);
|
||||
Console.WriteLine($"Job '{jobName}' created successfully.");
|
||||
}
|
||||
catch (TaskCanceledException ex)
|
||||
{
|
||||
Console.WriteLine($"Job will not be created. Task was canceled: {ex.Message}");
|
||||
}
|
||||
catch (k8s.Autorest.HttpOperationException ex) when (ex.Response.StatusCode == HttpStatusCode.Conflict)
|
||||
{
|
||||
Console.WriteLine($"Job '{jobName}' already exists. Error: {ex.Message}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"Error creating deployment: {ex.Message}");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Method to get all Pods associated with a Job
|
||||
static async Task<List<V1Pod>> GetPodsForJobAsync(IKubernetes client, string jobName, string namespaceName)
|
||||
{
|
||||
var podList = await client.CoreV1.ListNamespacedPodAsync(namespaceName, labelSelector: $"job-name={jobName}").ConfigureAwait(false);
|
||||
return podList.Items.ToList(); // Return all Pods as a List
|
||||
}
|
||||
|
||||
// Method to get the logs of a specific Pod
|
||||
static async Task<string> GetPodLogsAsync(IKubernetes client, string podName, string namespaceName)
|
||||
{
|
||||
try
|
||||
{
|
||||
var logs = await client.CoreV1.ReadNamespacedPodLogAsync(podName, namespaceName).ConfigureAwait(false);
|
||||
return await ConvertStreamToStringAsync(logs).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"Failed to get logs for pod '{podName}': {ex.Message}");
|
||||
return string.Empty;
|
||||
}
|
||||
}
|
||||
|
||||
static async Task<string> ConvertStreamToStringAsync(Stream stream)
|
||||
{
|
||||
using (var reader = new StreamReader(stream))
|
||||
{
|
||||
return await reader.ReadToEndAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
namespace KubernetesWrapper
|
||||
{
|
||||
public class Util
|
||||
{
|
||||
public static string[] SplitStringBySpaces(string? input)
|
||||
{
|
||||
if (string.IsNullOrEmpty(input))
|
||||
{
|
||||
return []; // Return an empty array if the input is null or empty
|
||||
}
|
||||
|
||||
// Split the input string by spaces and return the array of words
|
||||
return input.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
|
||||
}
|
||||
|
||||
public static (string jobName, string containerName, string imageName, string namespaceName, int ttl,
|
||||
List<string> command, List<string> argument) ProcessArgs(string[] args)
|
||||
{
|
||||
// Initialize variables to store the parsed arguments
|
||||
string jobName = string.Empty;
|
||||
string containerName = string.Empty;
|
||||
string imageName = string.Empty;
|
||||
string namespaceName = string.Empty;
|
||||
int ttl = 0;
|
||||
List<string> command = [];
|
||||
List<string> argument = [];
|
||||
|
||||
// Parse the command-line arguments
|
||||
for (int i = 0; i < args.Length; i++)
|
||||
{
|
||||
switch (args[i])
|
||||
{
|
||||
case "--job":
|
||||
if (i + 1 < args.Length)
|
||||
{
|
||||
jobName = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--container":
|
||||
if (i + 1 < args.Length)
|
||||
{
|
||||
containerName = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--image":
|
||||
if (i + 1 < args.Length)
|
||||
{
|
||||
imageName = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--namespace":
|
||||
if (i + 1 < args.Length)
|
||||
{
|
||||
namespaceName = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--ttl":
|
||||
if (i + 1 < args.Length)
|
||||
{
|
||||
int.TryParse(args[i + 1], out ttl);
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--command":
|
||||
while (i + 1 < args.Length && args[i + 1] != "--argument")
|
||||
{
|
||||
command.Add(args[i + 1]);
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
case "--argument":
|
||||
while (i + 1 < args.Length && args[i + 1] != "--command")
|
||||
{
|
||||
argument.Add(args[i + 1]);
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return (jobName, containerName, imageName, namespaceName, ttl, command, argument);
|
||||
}
|
||||
|
||||
// input: 2 IAASCNxxx 4 IAASCNxxx 4
|
||||
public static List<string> GetNodeList(string? ccp_nodes)
|
||||
{
|
||||
string[] splitted = SplitStringBySpaces(ccp_nodes);
|
||||
int length = 0;
|
||||
if (splitted.Length != 0)
|
||||
{
|
||||
length = Int32.TryParse(splitted[0], out length) ? length : 0;
|
||||
}
|
||||
List<string> nodes = [];
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
nodes.Add(splitted[2 * i + 1].ToLower());
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче