зеркало из https://github.com/microsoft/KubeGPU.git
fix issue with "used" information in node info resetting
GPU allocation now works correctly
This commit is contained in:
Родитель
5b6bd3f3c2
Коммит
ef8515124e
|
@ -487,6 +487,7 @@ func containerFitsGroupConstraints(contName string, contReq *types.ContainerInfo
|
|||
}
|
||||
|
||||
func initNodeResource(n *types.NodeInfo) map[string]int64 {
|
||||
glog.V(5).Infof("Used resource %v", n.Used)
|
||||
nodeResource := make(map[string]int64)
|
||||
for resKey, resVal := range n.Used {
|
||||
nodeResource[string(resKey)] = resVal
|
||||
|
@ -615,6 +616,8 @@ func ComputePodGroupResources(n *types.NodeInfo, spec *types.PodInfo, bRemovePod
|
|||
}
|
||||
}
|
||||
|
||||
glog.V(5).Infof("PodGroupResourcesComputes: podResources: %v updateUsedByNode: %v removePod: %v", podResources, updatedUsedByNode, bRemovePod)
|
||||
|
||||
return podResources, updatedUsedByNode
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ func NodeInfoToAnnotation(meta *metav1.ObjectMeta, nodeInfo *types.NodeInfo) err
|
|||
}
|
||||
|
||||
// AnnotationToNodeInfo is used by scheduler to convert annotation to node info
|
||||
func AnnotationToNodeInfo(meta *metav1.ObjectMeta) (*types.NodeInfo, error) {
|
||||
func AnnotationToNodeInfo(meta *metav1.ObjectMeta, existingNodeInfo *types.NodeInfo) (*types.NodeInfo, error) {
|
||||
nodeInfo := types.NewNodeInfo()
|
||||
if meta.Annotations != nil {
|
||||
nodeInfoStr, ok := meta.Annotations["node.alpha/DeviceInformation"]
|
||||
|
@ -51,6 +51,11 @@ func AnnotationToNodeInfo(meta *metav1.ObjectMeta) (*types.NodeInfo, error) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if existingNodeInfo != nil && existingNodeInfo.Used != nil {
|
||||
for usedKey, usedVal := range existingNodeInfo.Used {
|
||||
nodeInfo.Used[usedKey] = usedVal
|
||||
}
|
||||
}
|
||||
glog.V(4).Infof("Annotations: %v converted to NodeInfo: %+v", meta.Annotations, nodeInfo)
|
||||
return nodeInfo, nil
|
||||
}
|
||||
|
|
|
@ -2,22 +2,23 @@ package kubeinterface
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
"fmt"
|
||||
|
||||
"github.com/Microsoft/KubeGPU/types"
|
||||
"github.com/Microsoft/KubeGPU/utils"
|
||||
kubev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func rq (i int64) resource.Quantity {
|
||||
func rq(i int64) resource.Quantity {
|
||||
return *resource.NewQuantity(i, resource.DecimalSI)
|
||||
}
|
||||
|
||||
func compareContainer(cont0 *types.ContainerInfo, cont1 *types.ContainerInfo) {
|
||||
if (true) {
|
||||
if true {
|
||||
if !reflect.DeepEqual(cont0.KubeRequests, cont1.KubeRequests) {
|
||||
fmt.Printf("KubeReqs don't match\n0:\n%v\n1:\n%v\n", cont0.KubeRequests, cont1.KubeRequests)
|
||||
}
|
||||
|
@ -67,19 +68,19 @@ func comparePod(pod0 *types.PodInfo, pod1 *types.PodInfo) {
|
|||
|
||||
func TestConvert(t *testing.T) {
|
||||
// test node conversion
|
||||
nodeMeta := &metav1.ObjectMeta{Annotations: map[string]string{"OtherAnnotation" : "OtherAnnotationValue"}}
|
||||
nodeMeta := &metav1.ObjectMeta{Annotations: map[string]string{"OtherAnnotation": "OtherAnnotationValue"}}
|
||||
nodeInfo := &types.NodeInfo{
|
||||
Name: "Node0",
|
||||
Capacity: types.ResourceList{"A": 245, "B": 300},
|
||||
Name: "Node0",
|
||||
Capacity: types.ResourceList{"A": 245, "B": 300},
|
||||
Allocatable: types.ResourceList{"A": 200, "B": 100},
|
||||
Used: types.ResourceList{"A": 0, "B": 0},
|
||||
Scorer: types.ResourceScorer{"A": 4}, // no scorer for resource "B" is provided
|
||||
Used: types.ResourceList{"A": 0, "B": 0},
|
||||
Scorer: types.ResourceScorer{"A": 4}, // no scorer for resource "B" is provided
|
||||
}
|
||||
NodeInfoToAnnotation(nodeMeta, nodeInfo)
|
||||
jsonNode, _ := json.Marshal(nodeInfo)
|
||||
annotationExpect := map[string]string{
|
||||
"OtherAnnotation": "OtherAnnotationValue",
|
||||
"node.alpha/DeviceInformation" : string(jsonNode),
|
||||
"OtherAnnotation": "OtherAnnotationValue",
|
||||
"node.alpha/DeviceInformation": string(jsonNode),
|
||||
// "NodeInfo/Name": "Node0",
|
||||
// "NodeInfo/Capacity/A": "245",
|
||||
// "NodeInfo/Capacity/B": "300",
|
||||
|
@ -87,12 +88,12 @@ func TestConvert(t *testing.T) {
|
|||
// "NodeInfo/Allocatable/B": "100",
|
||||
// "NodeInfo/Used/A": "0",
|
||||
// "NodeInfo/Used/B": "0",
|
||||
// "NodeInfo/Scorer/A": "4",
|
||||
// "NodeInfo/Scorer/A": "4",
|
||||
}
|
||||
if !reflect.DeepEqual(annotationExpect, nodeMeta.Annotations) {
|
||||
t.Errorf("Node info annotations not what is expected, expected: %+v, have: %+v", annotationExpect, nodeMeta.Annotations)
|
||||
}
|
||||
nodeInfoGet, err := AnnotationToNodeInfo(nodeMeta)
|
||||
nodeInfoGet, err := AnnotationToNodeInfo(nodeMeta, nil)
|
||||
if err != nil {
|
||||
t.Errorf("Error encountered when converting annotation to node info: %v", err)
|
||||
}
|
||||
|
@ -102,29 +103,29 @@ func TestConvert(t *testing.T) {
|
|||
|
||||
// test pod conversion
|
||||
init0 := types.ContainerInfo{
|
||||
Requests : types.ResourceList{"alpha/grpresource/gpu/0/cards" : 1, "alpha/grpresource/gpu/0/memory" : 100000},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
|
||||
}
|
||||
run0 := types.ContainerInfo{
|
||||
Requests : types.ResourceList{"alpha/grpresource/gpu/A/cards" : 4},
|
||||
AllocateFrom : types.ResourceLocation{"alpha/grpresource/gpu/0/cards": "CARD1"},
|
||||
DevRequests : types.ResourceList{"alpha/grpresource/gpugrp1/A/gpu/0/cards": 90},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
|
||||
AllocateFrom: types.ResourceLocation{"alpha/grpresource/gpu/0/cards": "CARD1"},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpugrp1/A/gpu/0/cards": 90},
|
||||
}
|
||||
run1 := types.ContainerInfo{
|
||||
Requests : types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
Scorer : types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
|
||||
}
|
||||
pod0 := types.PodInfo{
|
||||
NodeName : "NodeB",
|
||||
InitContainers : map[string]types.ContainerInfo{"Init0" : init0},
|
||||
RunningContainers : map[string]types.ContainerInfo{"Run0" : run0, "Run1" : run1},
|
||||
NodeName: "NodeB",
|
||||
InitContainers: map[string]types.ContainerInfo{"Init0": init0},
|
||||
RunningContainers: map[string]types.ContainerInfo{"Run0": run0, "Run1": run1},
|
||||
}
|
||||
jsonStr, _ := json.Marshal(pod0)
|
||||
kubePod := &kubev1.Pod{
|
||||
ObjectMeta : metav1.ObjectMeta{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "Pod0",
|
||||
Annotations: map[string]string{
|
||||
"ABCD": "EFGH",
|
||||
"pod.alpha/DeviceInformation" : string(jsonStr),
|
||||
"pod.alpha/DeviceInformation": string(jsonStr),
|
||||
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/cards": "1",
|
||||
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/memory": "100000",
|
||||
// "PodInfo/RunningContainer/Run0/Requests/alpha/grpresource/gpu/A/cards": "4",
|
||||
|
@ -135,27 +136,27 @@ func TestConvert(t *testing.T) {
|
|||
// "PodInfo/ValidForNode": "NodeB",
|
||||
},
|
||||
},
|
||||
Spec : kubev1.PodSpec{
|
||||
Spec: kubev1.PodSpec{
|
||||
InitContainers: []kubev1.Container{
|
||||
{
|
||||
Name: "Init0",
|
||||
Name: "Init0",
|
||||
Image: "BCDE",
|
||||
Resources: kubev1.ResourceRequirements{
|
||||
Requests: kubev1.ResourceList{"CPU": rq(4), "Memory": rq(100000), "Other": rq(20)},
|
||||
Limits: kubev1.ResourceList{"CPU": rq(10)},
|
||||
Limits: kubev1.ResourceList{"CPU": rq(10)},
|
||||
},
|
||||
},
|
||||
},
|
||||
Containers: []kubev1.Container{
|
||||
{
|
||||
Name: "Run0",
|
||||
Name: "Run0",
|
||||
Image: "RunBCDE",
|
||||
Resources: kubev1.ResourceRequirements{
|
||||
Requests: kubev1.ResourceList{"CPU": rq(8), "Memory": rq(200000)},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Run1",
|
||||
Name: "Run1",
|
||||
Image: "RunBCDE",
|
||||
Resources: kubev1.ResourceRequirements{
|
||||
Requests: kubev1.ResourceList{"CPU": rq(4), "Memory": rq(300000), "alpha.kubernetes.io/nvidia-gpu": rq(2)},
|
||||
|
@ -171,34 +172,31 @@ func TestConvert(t *testing.T) {
|
|||
t.Errorf("encounter error %v", err)
|
||||
}
|
||||
expectedPodInfo := &types.PodInfo{
|
||||
Name: "Pod0",
|
||||
Name: "Pod0",
|
||||
NodeName: "",
|
||||
InitContainers: map[string]types.ContainerInfo{
|
||||
"Init0" :
|
||||
{
|
||||
"Init0": {
|
||||
KubeRequests: types.ResourceList{"CPU": 4, "Memory": 100000, "Other": 20},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
|
||||
AllocateFrom: types.ResourceLocation{},
|
||||
Scorer: types.ResourceScorer{},
|
||||
Scorer: types.ResourceScorer{},
|
||||
},
|
||||
},
|
||||
RunningContainers: map[string]types.ContainerInfo{
|
||||
"Run0" :
|
||||
{
|
||||
"Run0": {
|
||||
KubeRequests: types.ResourceList{"CPU": 8, "Memory": 200000},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
|
||||
AllocateFrom: types.ResourceLocation{},
|
||||
Scorer: types.ResourceScorer{},
|
||||
Scorer: types.ResourceScorer{},
|
||||
},
|
||||
"Run1" :
|
||||
{
|
||||
"Run1": {
|
||||
KubeRequests: types.ResourceList{"CPU": 4, "Memory": 300000, "alpha.kubernetes.io/nvidia-gpu": 2},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
|
||||
AllocateFrom: types.ResourceLocation{},
|
||||
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
|
||||
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -211,7 +209,7 @@ func TestConvert(t *testing.T) {
|
|||
contCopy := podInfo.InitContainers["Init0"]
|
||||
contCopy.DevRequests = types.ResourceList{"alpha/grpresource/gpugrp/0/gpu/0/cards": 1, "alpha/grpresource/gpugrp/0/gpu/0/memory": 200000}
|
||||
contCopy.AllocateFrom = types.ResourceLocation{
|
||||
"alpha/grpresource/gpugrp/0/gpu/0/cards": "alpha/grpresource/gpugrp/A/gpu/12/cards",
|
||||
"alpha/grpresource/gpugrp/0/gpu/0/cards": "alpha/grpresource/gpugrp/A/gpu/12/cards",
|
||||
"alpha/grpresource/gpugrp/0/gpu/0/memory": "alpha/grpresource/gpugrp/A/gpu/12/memory",
|
||||
}
|
||||
podInfo.InitContainers["Init0"] = contCopy
|
||||
|
@ -237,7 +235,7 @@ func TestConvert(t *testing.T) {
|
|||
jsonStr, _ = json.Marshal(podInfo)
|
||||
expectedAnnotations := map[string]string{
|
||||
"ABCD": "EFGH", // existing
|
||||
"pod.alpha/DeviceInformation" : string(jsonStr),
|
||||
"pod.alpha/DeviceInformation": string(jsonStr),
|
||||
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/cards": "1",
|
||||
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/memory": "100000",
|
||||
// "PodInfo/RunningContainer/Run0/Requests/alpha/grpresource/gpu/A/cards": "4",
|
||||
|
@ -251,7 +249,7 @@ func TestConvert(t *testing.T) {
|
|||
// "PodInfo/InitContainer/Init0/AllocateFrom/alpha/grpresource/gpugrp/0/gpu/0/memory": "alpha/grpresource/gpugrp/A/gpu/12/memory",
|
||||
// "PodInfo/ValidForNode": "NodeNewD",
|
||||
}
|
||||
if !reflect.DeepEqual(kubePod.ObjectMeta.Annotations, expectedAnnotations) {
|
||||
if !reflect.DeepEqual(kubePod.ObjectMeta.Annotations, expectedAnnotations) {
|
||||
t.Errorf("Pod annotations are not what is expected\nexpect:\n%v\nhave:\n%v", expectedAnnotations, kubePod.ObjectMeta.Annotations)
|
||||
utils.CompareMapStringString(expectedAnnotations, kubePod.ObjectMeta.Annotations)
|
||||
}
|
||||
|
@ -266,4 +264,3 @@ func TestConvert(t *testing.T) {
|
|||
comparePod(podInfo, podInfo2)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -14,19 +14,20 @@ import (
|
|||
func GetPodAndNode(pod *v1.Pod, node *NodeInfo, invalidatePodAnnotations bool) (*extypes.PodInfo, *extypes.NodeInfo, error) {
|
||||
// grab node information
|
||||
nodeInfo := node.nodeEx
|
||||
if nodeInfo == nil {
|
||||
if node.Node() != nil {
|
||||
//return nil, nil, fmt.Errorf("node not found")
|
||||
nodeInfoGet, err := kubeinterface.AnnotationToNodeInfo(&node.Node().ObjectMeta)
|
||||
glog.V(3).Infof("Node Info not present yet, use annotations to recompute")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
nodeInfo = nodeInfoGet
|
||||
} else {
|
||||
nodeInfo = extypes.NewNodeInfo()
|
||||
}
|
||||
}
|
||||
// if nodeInfo == nil {
|
||||
// if node.Node() != nil {
|
||||
// //return nil, nil, fmt.Errorf("node not found")
|
||||
// nodeInfoGet, err := kubeinterface.AnnotationToNodeInfo(&node.Node().ObjectMeta)
|
||||
// glog.V(2).Infof("Node Info not present yet, use annotations to recompute")
|
||||
// if err != nil {
|
||||
// return nil, nil, err
|
||||
// }
|
||||
// nodeInfo = nodeInfoGet
|
||||
// } else {
|
||||
// nodeInfo = extypes.NewNodeInfo()
|
||||
// glog.V(2).Infof("Node Info not present yet, set to new struct")
|
||||
// }
|
||||
// }
|
||||
podInfo, err := kubeinterface.KubePodInfoToPodInfo(pod, invalidatePodAnnotations)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
|
|
@ -337,6 +337,7 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) {
|
|||
if err != nil {
|
||||
panic(fmt.Sprintf("Pod Info annotations are not correct and cannot be parsed %+v", pod))
|
||||
}
|
||||
glog.V(5).Infof("NodeInfo Exteded status = %v", n.nodeEx)
|
||||
|
||||
n.generation++
|
||||
}
|
||||
|
@ -454,7 +455,7 @@ func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, used bool) {
|
|||
func (n *NodeInfo) SetNode(node *v1.Node) error {
|
||||
n.node = node
|
||||
// extract annotations from node info
|
||||
exNodeInfo, err := kubeinterface.AnnotationToNodeInfo(&node.ObjectMeta)
|
||||
exNodeInfo, err := kubeinterface.AnnotationToNodeInfo(&node.ObjectMeta, n.nodeEx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче