From f9f4a5393b48b8cf5cfd5bc8092db33836597d4f Mon Sep 17 00:00:00 2001 From: sanjeevm0 Date: Tue, 20 Feb 2018 11:39:17 -0800 Subject: [PATCH] specify autogenerate topology device request using podinfo --- .../gpu/nvidia/nvidia_gpu_scheduler.go | 38 +++++++++---------- types/types.go | 3 +- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/gpuextension/gpu/nvidia/nvidia_gpu_scheduler.go b/gpuextension/gpu/nvidia/nvidia_gpu_scheduler.go index 7cf7a7a2..b29ba808 100755 --- a/gpuextension/gpu/nvidia/nvidia_gpu_scheduler.go +++ b/gpuextension/gpu/nvidia/nvidia_gpu_scheduler.go @@ -17,35 +17,30 @@ func TranslateGPUContainerResources(alloc types.ResourceList, cont types.Contain return gpu.TranslateGPUResources(numGPUs, alloc, cont.DevRequests) } -func TranslateGPUResorces(nodeInfo *types.NodeInfo, podInfo *types.PodInfo) { - autoGenerateTopology := 0 // zero implies no topology desired, or it is explictly given - for contName, contCopy := range podInfo.InitContainers { - if contCopy.Requests[types.GPUTopologyGeneration] != int64(0) { - autoGenerateTopology = int(contCopy.Requests[types.GPUTopologyGeneration]) - break +func TranslateGPUResorces(nodeInfo *types.NodeInfo, podInfo *types.PodInfo) error { + if podInfo.Requests[types.GPUTopologyGeneration] == int64(0) { // zero implies no topology, or topology explictly given + for contName, contCopy := range podInfo.InitContainers { + contCopy.DevRequests = TranslateGPUContainerResources(nodeInfo.Allocatable, contCopy) + podInfo.InitContainers[contName] = contCopy } - contCopy.DevRequests = TranslateGPUContainerResources(nodeInfo.Allocatable, contCopy) - podInfo.InitContainers[contName] = contCopy - } - if autoGenerateTopology == 0 { for contName, contCopy := range podInfo.RunningContainers { - if contCopy.Requests[types.GPUTopologyGeneration] != int64(0) { - autoGenerateTopology = int(contCopy.Requests[types.GPUTopologyGeneration]) - break - } contCopy.DevRequests = TranslateGPUContainerResources(nodeInfo.Allocatable, contCopy) podInfo.RunningContainers[contName] = contCopy } - } - if autoGenerateTopology == 0 { - // nothing - } else if autoGenerateTopology == 1 { + return nil + } else if podInfo.Requests[types.GPUTopologyGeneration] == int64(1) { gpu.ConvertToBestGPURequests(podInfo) + return nil + } else { + return fmt.Errorf("Invalid topology generation request") } } func (ns *NvidiaGPUScheduler) PodFitsDevice(nodeInfo *types.NodeInfo, podInfo *types.PodInfo, fillAllocateFrom bool, runGrpScheduler bool) (bool, []types.PredicateFailureReason, float64) { - TranslateGPUResorces(nodeInfo, podInfo) + err := TranslateGPUResorces(nodeInfo, podInfo) + if err != nil { + panic("Unexpected error") + } if runGrpScheduler { glog.V(5).Infof("Running group scheduler on device requests %+v", podInfo) return grpalloc.PodFitsGroupConstraints(nodeInfo, podInfo, fillAllocateFrom) @@ -54,7 +49,10 @@ func (ns *NvidiaGPUScheduler) PodFitsDevice(nodeInfo *types.NodeInfo, podInfo *t } func (ns *NvidiaGPUScheduler) PodAllocate(nodeInfo *types.NodeInfo, podInfo *types.PodInfo, runGrpScheduler bool) error { - TranslateGPUResorces(nodeInfo, podInfo) + err := TranslateGPUResorces(nodeInfo, podInfo) + if err != nil { + return err + } if runGrpScheduler { fits, reasons, _ := grpalloc.PodFitsGroupConstraints(nodeInfo, podInfo, true) if !fits { diff --git a/types/types.go b/types/types.go index acd40bb7..491cdec6 100755 --- a/types/types.go +++ b/types/types.go @@ -55,12 +55,13 @@ func FillContainerInfo(fill *ContainerInfo) *ContainerInfo { type PodInfo struct { Name string `json:"podname,omitempty"` NodeName string `json:"nodename,omitempty"` // the node for which DevRequests and AllocateFrom on ContainerInfo are valid, the node for which PodInfo has been customized + Requests ResourceList `json:"requests,omitempty"` // pod level requests InitContainers map[string]ContainerInfo `json:"initcontainer,omitempty"` RunningContainers map[string]ContainerInfo `json:"runningcontainer,omitempty"` } func NewPodInfo() *PodInfo { - return &PodInfo{InitContainers: make(map[string]ContainerInfo), RunningContainers: make(map[string]ContainerInfo)} + return &PodInfo{Requests: make(ResourceList), InitContainers: make(map[string]ContainerInfo), RunningContainers: make(map[string]ContainerInfo)} } func (p *PodInfo) GetContainerInPod(name string) *ContainerInfo {