зеркало из https://github.com/microsoft/pai.git
HiveD: record cell type in pod annotation (#3962)
* record cell type (instead of level) in pod annotation * fix selectedNode * refine failed reason * minor fixes * minor fixes
This commit is contained in:
Родитель
c3341596b2
Коммит
965ae38e18
|
@ -389,10 +389,24 @@ func calculateGpuType(cellChainElements map[api.CellType]*cellChainElement, chai
|
|||
return gpuTypeToChain
|
||||
}
|
||||
|
||||
func calculateCellType(cellChainElements map[api.CellType]*cellChainElement, chains []CellChain) map[CellChain]map[CellLevel]api.CellType {
|
||||
cellLevelToType := map[CellChain]map[CellLevel]api.CellType{}
|
||||
for _, chain := range chains {
|
||||
cellLevelToType[chain] = map[CellLevel]api.CellType{}
|
||||
ce, ok := cellChainElements[api.CellType(chain)]
|
||||
for ok {
|
||||
cellLevelToType[chain][ce.level] = ce.cellType
|
||||
ce, ok = cellChainElements[ce.childCellType]
|
||||
}
|
||||
}
|
||||
return cellLevelToType
|
||||
}
|
||||
|
||||
func ParseConfig(sConfig *api.Config) (
|
||||
map[CellChain]ChainCellList, // chain:level:[]physicalCell
|
||||
map[CellChain]map[CellLevel]int32, // chain:level:gpuNumber
|
||||
map[string][]CellChain, // gpuType:[]chain
|
||||
map[CellChain]map[CellLevel]api.CellType, // chain:level:cellType
|
||||
map[api.VirtualClusterName]map[CellChain]ChainCellList, // non reserved virtual cells, vc:chain:level:[]virtualCell
|
||||
map[api.VirtualClusterName]map[api.ReservationId]ChainCellList, // reserved virtual cells, vc:reservationId:level:[]virtualCell
|
||||
map[api.VirtualClusterName]map[api.ReservationId]*PhysicalCell, // vc:reservationId:PhysicalCell
|
||||
|
@ -409,10 +423,11 @@ func ParseConfig(sConfig *api.Config) (
|
|||
}
|
||||
gpuNums := calculateGpuNumber(cellChainElements, cellChains)
|
||||
gpuTypeToChain := calculateGpuType(cellChainElements, cellChains)
|
||||
cellLevelToType := calculateCellType(cellChainElements, cellChains)
|
||||
|
||||
virtualSpecs := sConfig.VirtualClusters
|
||||
virtualNonReservedCellList, virtualReservedCellList, reservedPhysicalCells := newVirtualCellConstructor(
|
||||
cellChainElements, *virtualSpecs, rawReservedPhysicalCells).build()
|
||||
|
||||
return physicalCells, gpuNums, gpuTypeToChain, virtualNonReservedCellList, virtualReservedCellList, reservedPhysicalCells
|
||||
return physicalCells, gpuNums, gpuTypeToChain, cellLevelToType, virtualNonReservedCellList, virtualReservedCellList, reservedPhysicalCells
|
||||
}
|
||||
|
|
|
@ -50,6 +50,8 @@ type HivedAlgorithm struct {
|
|||
freeCellList map[CellChain]ChainCellList
|
||||
// map each GPU type to all chains that contain this type
|
||||
chains map[string][]CellChain
|
||||
// map each level in a chain to the specific cell type name
|
||||
cellTypes map[CellChain]map[CellLevel]api.CellType
|
||||
// all affinity groups that have been allocated cells
|
||||
allocatedAffinityGroups map[string]*AlgoAffinityGroup
|
||||
// all reserved physical cells (VC -> reservation ID -> cells)
|
||||
|
@ -60,13 +62,14 @@ type HivedAlgorithm struct {
|
|||
|
||||
// NewHivedAlgorithm initializes a HivedAlgorithm from the config file
|
||||
func NewHivedAlgorithm(sConfig *api.Config) *HivedAlgorithm {
|
||||
pcl, gpuNums, gpuTypeToChain, nonReservedVcl, reservedVcl, reservedPc := ParseConfig(sConfig)
|
||||
pcl, gpuNums, gpuTypeToChain, cellLevelToType, nonReservedVcl, reservedVcl, reservedPc := ParseConfig(sConfig)
|
||||
h := &HivedAlgorithm{
|
||||
vcSchedulers: make(map[api.VirtualClusterName]intraVCScheduler),
|
||||
opportunisticSchedulers: map[CellChain]*topologyAwareScheduler{},
|
||||
fullCellList: pcl,
|
||||
freeCellList: make(map[CellChain]ChainCellList),
|
||||
chains: gpuTypeToChain,
|
||||
cellTypes: cellLevelToType,
|
||||
allocatedAffinityGroups: make(map[string]*AlgoAffinityGroup),
|
||||
reservedCells: reservedPc,
|
||||
}
|
||||
|
@ -127,7 +130,15 @@ func (h *HivedAlgorithm) Schedule(pod *core.Pod, suggestedNodes []string) intern
|
|||
podIndex = int32(len(group.allocatedPods[s.GpuNumber]))
|
||||
}
|
||||
return generatePodScheduleResult(
|
||||
groupPhysicalPlacement, groupVirtualPlacement, s.GpuNumber, podIndex, newGroup, suggestedNodeSet, pod)
|
||||
groupPhysicalPlacement,
|
||||
groupVirtualPlacement,
|
||||
h.cellTypes,
|
||||
s.GpuNumber,
|
||||
podIndex,
|
||||
newGroup,
|
||||
suggestedNodeSet,
|
||||
s.VirtualCluster,
|
||||
pod)
|
||||
}
|
||||
|
||||
func (h *HivedAlgorithm) AddAllocatedPod(pod *core.Pod) {
|
||||
|
@ -160,23 +171,30 @@ func (h *HivedAlgorithm) AddAllocatedPod(pod *core.Pod) {
|
|||
newGroup.physicalGpuPlacement[gpuNumber][podIndex][gpuIndex] = pGpu
|
||||
var vGpu *VirtualCell
|
||||
if newGroup.virtualGpuPlacement != nil && !lazyPreempted {
|
||||
preassignedLevel := CellLevel(gms.PodPlacements[podIndex].PreassignedCellLevels[gpuIndex])
|
||||
if preassignedLevel >= 0 {
|
||||
preassignedType := gms.PodPlacements[podIndex].PreassignedCellTypes[gpuIndex]
|
||||
if preassignedType != "" {
|
||||
var preassignedLevel CellLevel
|
||||
typeFound := false
|
||||
for l, t := range h.cellTypes[pGpu.GetChain()] {
|
||||
if t == preassignedType {
|
||||
preassignedLevel = l
|
||||
typeFound = true
|
||||
}
|
||||
}
|
||||
var message string
|
||||
if vcs := h.vcSchedulers[s.VirtualCluster]; vcs == nil {
|
||||
message = fmt.Sprintf("VC %v has been deleted", s.VirtualCluster)
|
||||
if !typeFound {
|
||||
message = fmt.Sprintf("preassigned cell type %v not found in chain %v", preassignedType, pGpu.GetChain())
|
||||
} else if vcs := h.vcSchedulers[s.VirtualCluster]; vcs == nil {
|
||||
message = fmt.Sprintf("VC %v not found", s.VirtualCluster)
|
||||
} else {
|
||||
var vccl ChainCellList
|
||||
var str string
|
||||
vccl := vcs.getNonReservedCellList()[pGpu.GetChain()]
|
||||
str := string(pGpu.GetChain())
|
||||
if s.ReservationId != "" {
|
||||
vccl = vcs.getReservedCellList()[s.ReservationId]
|
||||
str = string(s.ReservationId)
|
||||
} else {
|
||||
vccl = vcs.getNonReservedCellList()[pGpu.GetChain()]
|
||||
str = string(pGpu.GetChain())
|
||||
}
|
||||
if vccl == nil {
|
||||
message = fmt.Sprintf("VC %v no longer has cells for %v", s.VirtualCluster, str)
|
||||
message = fmt.Sprintf("VC %v has no cell for %v", s.VirtualCluster, str)
|
||||
} else {
|
||||
vGpu, message = mapNonPreassignedCellToVirtual(pGpu, vccl, preassignedLevel, priority)
|
||||
}
|
||||
|
@ -536,7 +554,16 @@ func (h *HivedAlgorithm) scheduleOpportunisticAffinityGroup(
|
|||
sr schedulingRequest,
|
||||
suggestedNodeSet common.Set) map[int32][]CellList {
|
||||
|
||||
return h.opportunisticSchedulers[sr.chain].Schedule(sr.affinityGroupPodNums, opportunisticPriority, suggestedNodeSet)
|
||||
placement := h.opportunisticSchedulers[sr.chain].Schedule(
|
||||
sr.affinityGroupPodNums, opportunisticPriority, suggestedNodeSet)
|
||||
if placement == nil {
|
||||
klog.Infof("Insufficient capacity in PC for scheduling request: GPU numbers %v, priority %v",
|
||||
sr.affinityGroupPodNums, sr.priority)
|
||||
} else {
|
||||
klog.Infof("Succeeded in scheduling in PC for scheduling request: GPU numbers %v, priority %v",
|
||||
sr.affinityGroupPodNums, sr.priority)
|
||||
}
|
||||
return placement
|
||||
}
|
||||
|
||||
// getTmpFreeCellList returns a copy of the free cell list.
|
||||
|
@ -737,25 +764,129 @@ func (h *HivedAlgorithm) findPhysicalGpuInChain(
|
|||
func generatePodScheduleResult(
|
||||
groupPhysicalPlacement map[int32][]CellList,
|
||||
groupVirtualPlacement map[int32][]CellList,
|
||||
cellLevelToType map[CellChain]map[CellLevel]api.CellType,
|
||||
currentGpuNum int32,
|
||||
currentPodIndex int32,
|
||||
newGroup bool,
|
||||
suggestedNodeSet common.Set,
|
||||
vc api.VirtualClusterName,
|
||||
pod *core.Pod) internal.PodScheduleResult {
|
||||
|
||||
affinityGroupBindInfo, selectedNode, selectedGpuIndices := generateAffinityGroupBindInfo(
|
||||
groupPhysicalPlacement, groupVirtualPlacement, currentGpuNum, currentPodIndex)
|
||||
if affinityGroupBindInfo == nil {
|
||||
preemptionVictims, nodesHaveVictims := collectPreemptionVictims(groupPhysicalPlacement, newGroup)
|
||||
if len(preemptionVictims) > 0 {
|
||||
// we collect victims on a random node, as K8S preempts victims from only one node once.
|
||||
// random is to let different pods preempt victims on different nodes
|
||||
// (note that this randomness is not necessary for the eventual-completeness of preemption).
|
||||
nodeToPreempt := nodesHaveVictims[rand.Int31n(int32(len(nodesHaveVictims)))]
|
||||
var victimPods []*core.Pod
|
||||
var victimNames []string
|
||||
for v := range preemptionVictims[nodeToPreempt].Items() {
|
||||
victimPods = append(victimPods, v.(*core.Pod))
|
||||
victimNames = append(victimNames, internal.Key(v.(*core.Pod)))
|
||||
}
|
||||
klog.Infof("[%v]: need to preempt pods %v", internal.Key(pod), strings.Join(victimNames, ", "))
|
||||
return internal.PodScheduleResult{
|
||||
PodWaitInfo: &internal.PodWaitInfo{
|
||||
// TODO: Enrich the Pod Waiting Reason.
|
||||
Reason: "",
|
||||
PodPreemptInfo: &internal.PodPreemptInfo{VictimPods: victimPods},
|
||||
}
|
||||
} else {
|
||||
// we find the selected node after the preemption is done, otherwise the preemption victims
|
||||
// may cause the selected node to be excluded from the suggested nodes
|
||||
affinityGroupBindInfo, selectedNode, selectedGpuIndices := generateAffinityGroupBindInfo(
|
||||
groupPhysicalPlacement, groupVirtualPlacement, cellLevelToType, currentGpuNum, currentPodIndex, newGroup, suggestedNodeSet)
|
||||
var waitReason string
|
||||
if affinityGroupBindInfo == nil {
|
||||
waitReason = "insufficient capacity in physical cluster"
|
||||
if groupVirtualPlacement != nil {
|
||||
waitReason = fmt.Sprintf("insufficient quota in VC %v", vc)
|
||||
}
|
||||
} else if selectedNode == "" {
|
||||
waitReason = "cannot find a K8s candidate node within physical cluster"
|
||||
if groupVirtualPlacement != nil {
|
||||
waitReason = fmt.Sprintf("cannot find a K8s candidate node within VC %v's quota", vc)
|
||||
}
|
||||
}
|
||||
if waitReason != "" {
|
||||
return internal.PodScheduleResult{PodWaitInfo: &internal.PodWaitInfo{Reason: waitReason}}
|
||||
}
|
||||
klog.Infof("[%v]: scheduled to node %v, GPUs %v",
|
||||
internal.Key(pod), selectedNode, selectedGpuIndices)
|
||||
return internal.PodScheduleResult{
|
||||
PodBindInfo: &api.PodBindInfo{
|
||||
Node: selectedNode,
|
||||
GpuIsolation: selectedGpuIndices,
|
||||
CellChain: string(groupPhysicalPlacement[currentGpuNum][currentPodIndex][0].GetChain()),
|
||||
AffinityGroupBindInfo: affinityGroupBindInfo,
|
||||
},
|
||||
}
|
||||
}
|
||||
chain := string(groupPhysicalPlacement[currentGpuNum][currentPodIndex][0].GetChain())
|
||||
// collect preemption victims of the whole group
|
||||
preemptionVictims := map[string]common.Set{} // node -> pods
|
||||
}
|
||||
|
||||
// generateAffinityGroupBindInfo writes the physical and virtual placements of an affinity group
|
||||
// into a a series of AffinityGroupMemberBindInfos, and returns the allocated node and GPU addresses
|
||||
// of the current pod.
|
||||
func generateAffinityGroupBindInfo(
|
||||
groupPhysicalPlacement map[int32][]CellList,
|
||||
groupVirtualPlacement map[int32][]CellList,
|
||||
cellLevelToType map[CellChain]map[CellLevel]api.CellType,
|
||||
currentGpuNum int32,
|
||||
currentPodIndex int32,
|
||||
newGroup bool,
|
||||
suggestedNodeSet common.Set) ([]api.AffinityGroupMemberBindInfo, string, []int32) {
|
||||
|
||||
if groupPhysicalPlacement == nil {
|
||||
return nil, "", nil
|
||||
}
|
||||
affinityGroupBindInfo := make([]api.AffinityGroupMemberBindInfo, len(groupPhysicalPlacement))
|
||||
var selectedNode string
|
||||
var selectedGpuIndices []int32
|
||||
groupMemberIndex := 0
|
||||
for podGpuNum, podPhysicalPlacements := range groupPhysicalPlacement {
|
||||
mbi := api.AffinityGroupMemberBindInfo{
|
||||
PodPlacements: make([]api.PodPlacementInfo, len(podPhysicalPlacements)),
|
||||
}
|
||||
for podIndex := int32(0); podIndex < int32(len(podPhysicalPlacements)); podIndex++ {
|
||||
mbi.PodPlacements[podIndex].PhysicalGpuIndices = make(
|
||||
[]int32, len(podPhysicalPlacements[podIndex]))
|
||||
mbi.PodPlacements[podIndex].PreassignedCellTypes = make(
|
||||
[]api.CellType, len(podPhysicalPlacements[podIndex]))
|
||||
for gpuIndex := 0; gpuIndex < len(podPhysicalPlacements[podIndex]); gpuIndex++ {
|
||||
pGpu := podPhysicalPlacements[podIndex][gpuIndex]
|
||||
if pGpu == nil {
|
||||
// TODO: Should insist binding and continue to force bind, then the Pod may
|
||||
// run or retry, instead of stuck in PodBinding state forever here.
|
||||
panic("Resources previously allocated has been invalid; pod should wait")
|
||||
}
|
||||
nodes, gpuIndices := pGpu.(*PhysicalCell).GetPhysicalPlacement()
|
||||
// here each cell (i.e., pGpu) is only one GPU, hence we takes the first element
|
||||
// in its "nodes" and "gpuIndices" as the node and GPU address
|
||||
if mbi.PodPlacements[podIndex].PhysicalNode == "" {
|
||||
mbi.PodPlacements[podIndex].PhysicalNode = nodes[0]
|
||||
}
|
||||
mbi.PodPlacements[podIndex].PhysicalGpuIndices[gpuIndex] = gpuIndices[0]
|
||||
if groupVirtualPlacement != nil {
|
||||
vGpu := groupVirtualPlacement[podGpuNum][podIndex][gpuIndex].(*VirtualCell)
|
||||
mbi.PodPlacements[podIndex].PreassignedCellTypes[gpuIndex] = cellLevelToType[vGpu.GetChain()][vGpu.GetPreAssignedCell().GetLevel()]
|
||||
} else {
|
||||
mbi.PodPlacements[podIndex].PreassignedCellTypes[gpuIndex] = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
if podGpuNum == currentGpuNum &&
|
||||
(!newGroup || suggestedNodeSet.Contains(mbi.PodPlacements[currentPodIndex].PhysicalNode)) {
|
||||
selectedNode = mbi.PodPlacements[currentPodIndex].PhysicalNode
|
||||
selectedGpuIndices = mbi.PodPlacements[currentPodIndex].PhysicalGpuIndices
|
||||
}
|
||||
affinityGroupBindInfo[groupMemberIndex] = mbi
|
||||
groupMemberIndex++
|
||||
}
|
||||
return affinityGroupBindInfo, selectedNode, selectedGpuIndices
|
||||
}
|
||||
|
||||
// collectPreemptionVictims collects preemption victims of an affinity group.
|
||||
// If any of the GPUs allocated for the whole group is still used by a pod,
|
||||
// we will wait for the preemption, as the group is gang-scheduled.
|
||||
func collectPreemptionVictims(groupPhysicalPlacement map[int32][]CellList, newGroup bool) (map[string]common.Set, []string) {
|
||||
preemptionVictims := map[string]common.Set{}
|
||||
var nodesHaveVictims []string
|
||||
if newGroup {
|
||||
for gpuNum := range groupPhysicalPlacement {
|
||||
|
@ -778,100 +909,7 @@ func generatePodScheduleResult(
|
|||
}
|
||||
}
|
||||
}
|
||||
// if any of the GPUs allocated for the whole group is still used by a pod,
|
||||
// we will wait for the preemption, as the group is gang-scheduled.
|
||||
if len(nodesHaveVictims) > 0 {
|
||||
// we collect victims on a random node, as K8S preempts victims from only one node once.
|
||||
// random is to let different pods preempt victims on different nodes
|
||||
// (but we don't rely on this randomness for the eventual-completeness of preemption).
|
||||
nodeToPreempt := nodesHaveVictims[rand.Int31n(int32(len(nodesHaveVictims)))]
|
||||
var victimPods []*core.Pod
|
||||
var victimNames []string
|
||||
for v := range preemptionVictims[nodeToPreempt].Items() {
|
||||
victimPods = append(victimPods, v.(*core.Pod))
|
||||
victimNames = append(victimNames, internal.Key(v.(*core.Pod)))
|
||||
}
|
||||
klog.Infof("[%v]: need to preempt pods %v", internal.Key(pod), strings.Join(victimNames, ", "))
|
||||
return internal.PodScheduleResult{
|
||||
PodPreemptInfo: &internal.PodPreemptInfo{VictimPods: victimPods},
|
||||
}
|
||||
} else {
|
||||
// we check suggested nodes after the preemption is done, otherwise the preemption victims
|
||||
// may cause the selected node to be excluded from the suggested nodes
|
||||
// TODO: Keep selectedNode within suggestedNodeSet, so here should be Unreachable
|
||||
if !suggestedNodeSet.Contains(selectedNode) && newGroup {
|
||||
panic(fmt.Sprintf("[%v]: node %v picked by algorithm but not in K8S candidates",
|
||||
internal.Key(pod), selectedNode))
|
||||
}
|
||||
klog.Infof("[%v]: scheduled to node %v, GPUs %v",
|
||||
internal.Key(pod), selectedNode, selectedGpuIndices)
|
||||
return internal.PodScheduleResult{
|
||||
PodBindInfo: &api.PodBindInfo{
|
||||
Node: selectedNode,
|
||||
GpuIsolation: selectedGpuIndices,
|
||||
CellChain: chain,
|
||||
AffinityGroupBindInfo: affinityGroupBindInfo,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generateAffinityGroupBindInfo writes the physical and virtual placements of an affinity group
|
||||
// into a a series of AffinityGroupMemberBindInfos, and returns the allocated node and GPU addresses
|
||||
// of the current pod.
|
||||
func generateAffinityGroupBindInfo(
|
||||
groupPhysicalPlacement map[int32][]CellList,
|
||||
groupVirtualPlacement map[int32][]CellList,
|
||||
currentGpuNum int32,
|
||||
currentPodIndex int32) ([]api.AffinityGroupMemberBindInfo, string, []int32) {
|
||||
|
||||
if groupPhysicalPlacement == nil {
|
||||
return nil, "", nil
|
||||
}
|
||||
affinityGroupBindInfo := make([]api.AffinityGroupMemberBindInfo, len(groupPhysicalPlacement))
|
||||
var selectedNode string
|
||||
var selectedGpuIndices []int32
|
||||
groupMemberIndex := 0
|
||||
for podGpuNum, podPhysicalPlacements := range groupPhysicalPlacement {
|
||||
mbi := api.AffinityGroupMemberBindInfo{
|
||||
PodPlacements: make([]api.PodPlacementInfo, len(podPhysicalPlacements)),
|
||||
}
|
||||
for podIndex := int32(0); podIndex < int32(len(podPhysicalPlacements)); podIndex++ {
|
||||
mbi.PodPlacements[podIndex].PhysicalGpuIndices = make(
|
||||
[]int32, len(podPhysicalPlacements[podIndex]))
|
||||
mbi.PodPlacements[podIndex].PreassignedCellLevels = make(
|
||||
[]int32, len(podPhysicalPlacements[podIndex]))
|
||||
for gpuIndex := 0; gpuIndex < len(podPhysicalPlacements[podIndex]); gpuIndex++ {
|
||||
pGpu := podPhysicalPlacements[podIndex][gpuIndex]
|
||||
if pGpu == nil {
|
||||
// TODO: Should insist binding and continue to force bind, then the Pod may
|
||||
// run or retry, instead of stuck in PodBinding state forever here.
|
||||
panic("Resources previously allocated has been invalid; pod should wait")
|
||||
}
|
||||
nodes, gpuIndices := pGpu.(*PhysicalCell).GetPhysicalPlacement()
|
||||
// here each cell (i.e., pGpu) is only one GPU, hence we takes the first element
|
||||
// in its "nodes" and "gpuIndices" as the node and GPU address
|
||||
if mbi.PodPlacements[podIndex].PhysicalNode == "" {
|
||||
mbi.PodPlacements[podIndex].PhysicalNode = nodes[0]
|
||||
}
|
||||
mbi.PodPlacements[podIndex].PhysicalGpuIndices[gpuIndex] = gpuIndices[0]
|
||||
if groupVirtualPlacement != nil {
|
||||
vGpu := groupVirtualPlacement[podGpuNum][podIndex][gpuIndex].(*VirtualCell)
|
||||
mbi.PodPlacements[podIndex].PreassignedCellLevels[gpuIndex] = int32(
|
||||
vGpu.GetPreAssignedCell().GetLevel())
|
||||
} else {
|
||||
mbi.PodPlacements[podIndex].PreassignedCellLevels[gpuIndex] = -1
|
||||
}
|
||||
}
|
||||
}
|
||||
if podGpuNum == currentGpuNum {
|
||||
selectedNode = mbi.PodPlacements[currentPodIndex].PhysicalNode
|
||||
selectedGpuIndices = mbi.PodPlacements[currentPodIndex].PhysicalGpuIndices
|
||||
}
|
||||
affinityGroupBindInfo[groupMemberIndex] = mbi
|
||||
groupMemberIndex++
|
||||
}
|
||||
return affinityGroupBindInfo, selectedNode, selectedGpuIndices
|
||||
return preemptionVictims, nodesHaveVictims
|
||||
}
|
||||
|
||||
// buddyAlloc allocates a free cell at a certain level from a free list.
|
||||
|
|
|
@ -491,6 +491,12 @@ func testReconfiguration(t *testing.T, sConfig *api.Config) {
|
|||
}
|
||||
testCasesThatShouldSucceed(t, h)
|
||||
|
||||
// case: shorten cell chain
|
||||
(*sConfig.PhysicalCluster).CellTypes["DGX2-V100-NODE"] = api.CellTypeSpec{
|
||||
ChildCellType: "DGX2-V100",
|
||||
ChildCellNumber: 16,
|
||||
IsNodeLevel: true,
|
||||
}
|
||||
// case: physical cell not found
|
||||
(*sConfig.PhysicalCluster).PhysicalCells[7].CellChildren[0].CellChildren[0].CellAddress = "0.0.3.100"
|
||||
// case: insufficient VC quota
|
||||
|
|
|
@ -110,9 +110,9 @@ type AffinityGroupMemberBindInfo struct {
|
|||
type PodPlacementInfo struct {
|
||||
PhysicalNode string `yaml:"physicalNode"`
|
||||
PhysicalGpuIndices []int32 `yaml:"physicalGpuIndices"`
|
||||
// levels of the preassigned cells used by the pods. used to locate the virtual cells
|
||||
// preassigned cell types used by the pods. used to locate the virtual cells
|
||||
// when adding an allocated pod
|
||||
PreassignedCellLevels []int32 `yaml:"preassignedCellLevels"`
|
||||
PreassignedCellTypes []CellType `yaml:"preassignedCellTypes"`
|
||||
}
|
||||
|
||||
type WebServerPaths struct {
|
||||
|
|
|
@ -91,7 +91,7 @@ type SchedulerAlgorithm interface {
|
|||
// Notes:
|
||||
// 1. If the SchedulerAlgorithm found sufficient free resource, only PodBindInfo
|
||||
// should be set.
|
||||
// If the SchedulerAlgorithm found sufficient preemptable resource, only
|
||||
// If the SchedulerAlgorithm found sufficient preemptible resource, only
|
||||
// PodPreemptInfo should be set.
|
||||
// Otherwise, only PodWaitInfo can be optionally set.
|
||||
// 2. The selected node in PodBindInfo requires:
|
||||
|
@ -146,13 +146,13 @@ const PodUnknown PodState = "Unknown"
|
|||
|
||||
// [AllocatedState]: The Pod is considered to be allocated from the scheduler view.
|
||||
const (
|
||||
// Pod is waiting for preemptable or free resource to appear.
|
||||
// Pod is waiting for preemptible or free resource to appear.
|
||||
// [StartState]
|
||||
// -> PodPreempting
|
||||
// -> PodBinding
|
||||
PodWaiting PodState = "Waiting"
|
||||
|
||||
// Pod is waiting for the appeared preemptable resource to be free by preemption.
|
||||
// Pod is waiting for the appeared preemptible resource to be free by preemption.
|
||||
// -> PodBinding
|
||||
// -> PodWaiting
|
||||
PodPreempting PodState = "Preempting"
|
||||
|
@ -174,7 +174,7 @@ func IsAllocated(state PodState) bool {
|
|||
|
||||
// No need to use it recover scheduler waiting resource
|
||||
type PodWaitInfo struct {
|
||||
// The reason why no preemptable or free resource to allocate the Pod now.
|
||||
// The reason why no preemptible or free resource to allocate the Pod now.
|
||||
Reason string
|
||||
}
|
||||
|
||||
|
|
|
@ -552,7 +552,7 @@ func (s *HivedScheduler) filterRoutine(args ei.ExtenderArgs) *ei.ExtenderFilterR
|
|||
if result.PodWaitInfo != nil {
|
||||
return &ei.ExtenderFilterResult{
|
||||
Error: fmt.Sprintf(
|
||||
"Pod is waiting for preemptable or free resource to appear: %v",
|
||||
"Pod is waiting for preemptible or free resource to appear: %v",
|
||||
result.PodWaitInfo.Reason),
|
||||
}
|
||||
} else {
|
||||
|
@ -646,7 +646,7 @@ func (s *HivedScheduler) preemptRoutine(args ei.ExtenderPreemptionArgs) *ei.Exte
|
|||
// At this point, podState must be in:
|
||||
// {PodWaiting}
|
||||
|
||||
// The Pod should keep on waiting for preemptable or free resource to appear,
|
||||
// The Pod should keep on waiting for preemptible or free resource to appear,
|
||||
// so do not preempt any victim.
|
||||
return &ei.ExtenderPreemptionResult{}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче