fix issue with "used" information in node info resetting

GPU allocation now works correctly
This commit is contained in:
sanjeevm0 2018-02-14 16:01:06 -08:00
Родитель 5b6bd3f3c2
Коммит ef8515124e
5 изменённых файлов: 71 добавлений и 64 удалений

Просмотреть файл

@ -487,6 +487,7 @@ func containerFitsGroupConstraints(contName string, contReq *types.ContainerInfo
}
func initNodeResource(n *types.NodeInfo) map[string]int64 {
glog.V(5).Infof("Used resource %v", n.Used)
nodeResource := make(map[string]int64)
for resKey, resVal := range n.Used {
nodeResource[string(resKey)] = resVal
@ -615,6 +616,8 @@ func ComputePodGroupResources(n *types.NodeInfo, spec *types.PodInfo, bRemovePod
}
}
glog.V(5).Infof("PodGroupResourcesComputes: podResources: %v updateUsedByNode: %v removePod: %v", podResources, updatedUsedByNode, bRemovePod)
return podResources, updatedUsedByNode
}

Просмотреть файл

@ -40,7 +40,7 @@ func NodeInfoToAnnotation(meta *metav1.ObjectMeta, nodeInfo *types.NodeInfo) err
}
// AnnotationToNodeInfo is used by scheduler to convert annotation to node info
func AnnotationToNodeInfo(meta *metav1.ObjectMeta) (*types.NodeInfo, error) {
func AnnotationToNodeInfo(meta *metav1.ObjectMeta, existingNodeInfo *types.NodeInfo) (*types.NodeInfo, error) {
nodeInfo := types.NewNodeInfo()
if meta.Annotations != nil {
nodeInfoStr, ok := meta.Annotations["node.alpha/DeviceInformation"]
@ -51,6 +51,11 @@ func AnnotationToNodeInfo(meta *metav1.ObjectMeta) (*types.NodeInfo, error) {
}
}
}
if existingNodeInfo != nil && existingNodeInfo.Used != nil {
for usedKey, usedVal := range existingNodeInfo.Used {
nodeInfo.Used[usedKey] = usedVal
}
}
glog.V(4).Infof("Annotations: %v converted to NodeInfo: %+v", meta.Annotations, nodeInfo)
return nodeInfo, nil
}

Просмотреть файл

@ -2,22 +2,23 @@ package kubeinterface
import (
"encoding/json"
"fmt"
"reflect"
"testing"
"fmt"
"github.com/Microsoft/KubeGPU/types"
"github.com/Microsoft/KubeGPU/utils"
kubev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func rq (i int64) resource.Quantity {
func rq(i int64) resource.Quantity {
return *resource.NewQuantity(i, resource.DecimalSI)
}
func compareContainer(cont0 *types.ContainerInfo, cont1 *types.ContainerInfo) {
if (true) {
if true {
if !reflect.DeepEqual(cont0.KubeRequests, cont1.KubeRequests) {
fmt.Printf("KubeReqs don't match\n0:\n%v\n1:\n%v\n", cont0.KubeRequests, cont1.KubeRequests)
}
@ -67,19 +68,19 @@ func comparePod(pod0 *types.PodInfo, pod1 *types.PodInfo) {
func TestConvert(t *testing.T) {
// test node conversion
nodeMeta := &metav1.ObjectMeta{Annotations: map[string]string{"OtherAnnotation" : "OtherAnnotationValue"}}
nodeMeta := &metav1.ObjectMeta{Annotations: map[string]string{"OtherAnnotation": "OtherAnnotationValue"}}
nodeInfo := &types.NodeInfo{
Name: "Node0",
Capacity: types.ResourceList{"A": 245, "B": 300},
Name: "Node0",
Capacity: types.ResourceList{"A": 245, "B": 300},
Allocatable: types.ResourceList{"A": 200, "B": 100},
Used: types.ResourceList{"A": 0, "B": 0},
Scorer: types.ResourceScorer{"A": 4}, // no scorer for resource "B" is provided
Used: types.ResourceList{"A": 0, "B": 0},
Scorer: types.ResourceScorer{"A": 4}, // no scorer for resource "B" is provided
}
NodeInfoToAnnotation(nodeMeta, nodeInfo)
jsonNode, _ := json.Marshal(nodeInfo)
annotationExpect := map[string]string{
"OtherAnnotation": "OtherAnnotationValue",
"node.alpha/DeviceInformation" : string(jsonNode),
"OtherAnnotation": "OtherAnnotationValue",
"node.alpha/DeviceInformation": string(jsonNode),
// "NodeInfo/Name": "Node0",
// "NodeInfo/Capacity/A": "245",
// "NodeInfo/Capacity/B": "300",
@ -87,12 +88,12 @@ func TestConvert(t *testing.T) {
// "NodeInfo/Allocatable/B": "100",
// "NodeInfo/Used/A": "0",
// "NodeInfo/Used/B": "0",
// "NodeInfo/Scorer/A": "4",
// "NodeInfo/Scorer/A": "4",
}
if !reflect.DeepEqual(annotationExpect, nodeMeta.Annotations) {
t.Errorf("Node info annotations not what is expected, expected: %+v, have: %+v", annotationExpect, nodeMeta.Annotations)
}
nodeInfoGet, err := AnnotationToNodeInfo(nodeMeta)
nodeInfoGet, err := AnnotationToNodeInfo(nodeMeta, nil)
if err != nil {
t.Errorf("Error encountered when converting annotation to node info: %v", err)
}
@ -102,29 +103,29 @@ func TestConvert(t *testing.T) {
// test pod conversion
init0 := types.ContainerInfo{
Requests : types.ResourceList{"alpha/grpresource/gpu/0/cards" : 1, "alpha/grpresource/gpu/0/memory" : 100000},
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
}
run0 := types.ContainerInfo{
Requests : types.ResourceList{"alpha/grpresource/gpu/A/cards" : 4},
AllocateFrom : types.ResourceLocation{"alpha/grpresource/gpu/0/cards": "CARD1"},
DevRequests : types.ResourceList{"alpha/grpresource/gpugrp1/A/gpu/0/cards": 90},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
AllocateFrom: types.ResourceLocation{"alpha/grpresource/gpu/0/cards": "CARD1"},
DevRequests: types.ResourceList{"alpha/grpresource/gpugrp1/A/gpu/0/cards": 90},
}
run1 := types.ContainerInfo{
Requests : types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
Scorer : types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
}
pod0 := types.PodInfo{
NodeName : "NodeB",
InitContainers : map[string]types.ContainerInfo{"Init0" : init0},
RunningContainers : map[string]types.ContainerInfo{"Run0" : run0, "Run1" : run1},
NodeName: "NodeB",
InitContainers: map[string]types.ContainerInfo{"Init0": init0},
RunningContainers: map[string]types.ContainerInfo{"Run0": run0, "Run1": run1},
}
jsonStr, _ := json.Marshal(pod0)
kubePod := &kubev1.Pod{
ObjectMeta : metav1.ObjectMeta{
ObjectMeta: metav1.ObjectMeta{
Name: "Pod0",
Annotations: map[string]string{
"ABCD": "EFGH",
"pod.alpha/DeviceInformation" : string(jsonStr),
"pod.alpha/DeviceInformation": string(jsonStr),
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/cards": "1",
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/memory": "100000",
// "PodInfo/RunningContainer/Run0/Requests/alpha/grpresource/gpu/A/cards": "4",
@ -135,27 +136,27 @@ func TestConvert(t *testing.T) {
// "PodInfo/ValidForNode": "NodeB",
},
},
Spec : kubev1.PodSpec{
Spec: kubev1.PodSpec{
InitContainers: []kubev1.Container{
{
Name: "Init0",
Name: "Init0",
Image: "BCDE",
Resources: kubev1.ResourceRequirements{
Requests: kubev1.ResourceList{"CPU": rq(4), "Memory": rq(100000), "Other": rq(20)},
Limits: kubev1.ResourceList{"CPU": rq(10)},
Limits: kubev1.ResourceList{"CPU": rq(10)},
},
},
},
Containers: []kubev1.Container{
{
Name: "Run0",
Name: "Run0",
Image: "RunBCDE",
Resources: kubev1.ResourceRequirements{
Requests: kubev1.ResourceList{"CPU": rq(8), "Memory": rq(200000)},
},
},
{
Name: "Run1",
Name: "Run1",
Image: "RunBCDE",
Resources: kubev1.ResourceRequirements{
Requests: kubev1.ResourceList{"CPU": rq(4), "Memory": rq(300000), "alpha.kubernetes.io/nvidia-gpu": rq(2)},
@ -171,34 +172,31 @@ func TestConvert(t *testing.T) {
t.Errorf("encounter error %v", err)
}
expectedPodInfo := &types.PodInfo{
Name: "Pod0",
Name: "Pod0",
NodeName: "",
InitContainers: map[string]types.ContainerInfo{
"Init0" :
{
"Init0": {
KubeRequests: types.ResourceList{"CPU": 4, "Memory": 100000, "Other": 20},
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
Requests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/0/cards": 1, "alpha/grpresource/gpu/0/memory": 100000},
AllocateFrom: types.ResourceLocation{},
Scorer: types.ResourceScorer{},
Scorer: types.ResourceScorer{},
},
},
RunningContainers: map[string]types.ContainerInfo{
"Run0" :
{
"Run0": {
KubeRequests: types.ResourceList{"CPU": 8, "Memory": 200000},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 4},
AllocateFrom: types.ResourceLocation{},
Scorer: types.ResourceScorer{},
Scorer: types.ResourceScorer{},
},
"Run1" :
{
"Run1": {
KubeRequests: types.ResourceList{"CPU": 4, "Memory": 300000, "alpha.kubernetes.io/nvidia-gpu": 2},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
Requests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
DevRequests: types.ResourceList{"alpha/grpresource/gpu/A/cards": 6},
AllocateFrom: types.ResourceLocation{},
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
Scorer: types.ResourceScorer{"alpha/grpresource/gpu/A/cards": 10},
},
},
}
@ -211,7 +209,7 @@ func TestConvert(t *testing.T) {
contCopy := podInfo.InitContainers["Init0"]
contCopy.DevRequests = types.ResourceList{"alpha/grpresource/gpugrp/0/gpu/0/cards": 1, "alpha/grpresource/gpugrp/0/gpu/0/memory": 200000}
contCopy.AllocateFrom = types.ResourceLocation{
"alpha/grpresource/gpugrp/0/gpu/0/cards": "alpha/grpresource/gpugrp/A/gpu/12/cards",
"alpha/grpresource/gpugrp/0/gpu/0/cards": "alpha/grpresource/gpugrp/A/gpu/12/cards",
"alpha/grpresource/gpugrp/0/gpu/0/memory": "alpha/grpresource/gpugrp/A/gpu/12/memory",
}
podInfo.InitContainers["Init0"] = contCopy
@ -237,7 +235,7 @@ func TestConvert(t *testing.T) {
jsonStr, _ = json.Marshal(podInfo)
expectedAnnotations := map[string]string{
"ABCD": "EFGH", // existing
"pod.alpha/DeviceInformation" : string(jsonStr),
"pod.alpha/DeviceInformation": string(jsonStr),
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/cards": "1",
// "PodInfo/InitContainer/Init0/Requests/alpha/grpresource/gpu/0/memory": "100000",
// "PodInfo/RunningContainer/Run0/Requests/alpha/grpresource/gpu/A/cards": "4",
@ -251,7 +249,7 @@ func TestConvert(t *testing.T) {
// "PodInfo/InitContainer/Init0/AllocateFrom/alpha/grpresource/gpugrp/0/gpu/0/memory": "alpha/grpresource/gpugrp/A/gpu/12/memory",
// "PodInfo/ValidForNode": "NodeNewD",
}
if !reflect.DeepEqual(kubePod.ObjectMeta.Annotations, expectedAnnotations) {
if !reflect.DeepEqual(kubePod.ObjectMeta.Annotations, expectedAnnotations) {
t.Errorf("Pod annotations are not what is expected\nexpect:\n%v\nhave:\n%v", expectedAnnotations, kubePod.ObjectMeta.Annotations)
utils.CompareMapStringString(expectedAnnotations, kubePod.ObjectMeta.Annotations)
}
@ -266,4 +264,3 @@ func TestConvert(t *testing.T) {
comparePod(podInfo, podInfo2)
}
}

Просмотреть файл

@ -14,19 +14,20 @@ import (
func GetPodAndNode(pod *v1.Pod, node *NodeInfo, invalidatePodAnnotations bool) (*extypes.PodInfo, *extypes.NodeInfo, error) {
// grab node information
nodeInfo := node.nodeEx
if nodeInfo == nil {
if node.Node() != nil {
//return nil, nil, fmt.Errorf("node not found")
nodeInfoGet, err := kubeinterface.AnnotationToNodeInfo(&node.Node().ObjectMeta)
glog.V(3).Infof("Node Info not present yet, use annotations to recompute")
if err != nil {
return nil, nil, err
}
nodeInfo = nodeInfoGet
} else {
nodeInfo = extypes.NewNodeInfo()
}
}
// if nodeInfo == nil {
// if node.Node() != nil {
// //return nil, nil, fmt.Errorf("node not found")
// nodeInfoGet, err := kubeinterface.AnnotationToNodeInfo(&node.Node().ObjectMeta)
// glog.V(2).Infof("Node Info not present yet, use annotations to recompute")
// if err != nil {
// return nil, nil, err
// }
// nodeInfo = nodeInfoGet
// } else {
// nodeInfo = extypes.NewNodeInfo()
// glog.V(2).Infof("Node Info not present yet, set to new struct")
// }
// }
podInfo, err := kubeinterface.KubePodInfoToPodInfo(pod, invalidatePodAnnotations)
if err != nil {
return nil, nil, err

Просмотреть файл

@ -337,6 +337,7 @@ func (n *NodeInfo) AddPod(pod *v1.Pod) {
if err != nil {
panic(fmt.Sprintf("Pod Info annotations are not correct and cannot be parsed %+v", pod))
}
glog.V(5).Infof("NodeInfo Exteded status = %v", n.nodeEx)
n.generation++
}
@ -454,7 +455,7 @@ func (n *NodeInfo) updateUsedPorts(pod *v1.Pod, used bool) {
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
// extract annotations from node info
exNodeInfo, err := kubeinterface.AnnotationToNodeInfo(&node.ObjectMeta)
exNodeInfo, err := kubeinterface.AnnotationToNodeInfo(&node.ObjectMeta, n.nodeEx)
if err != nil {
return err
}