KubeVirt: Add grace period for hotplug detach when hotplug pod is deleted (#5747)
* add hotplug patch and nedw kubevirt version * reduce verbosity, hook up patch file * update patch with Free call * update patch * fix for linter
This commit is contained in:
Родитель
2d5563f664
Коммит
d93dc7712d
|
@ -0,0 +1,194 @@
|
|||
diff --git a/pkg/virt-launcher/virtwrap/manager.go b/pkg/virt-launcher/virtwrap/manager.go
|
||||
index 63d8d43d2..0e608c4bb 100644
|
||||
--- a/pkg/virt-launcher/virtwrap/manager.go
|
||||
+++ b/pkg/virt-launcher/virtwrap/manager.go
|
||||
@@ -97,6 +97,11 @@ const (
|
||||
const maxConcurrentHotplugHostDevices = 1
|
||||
const maxConcurrentMemoryDumps = 1
|
||||
|
||||
+// TODO: Make this configurable from the VMI object?
|
||||
+const hotplugDetachmentGracePeriod = 5 * time.Second
|
||||
+
|
||||
+const hotplugDetachmentVerbosity = 4
|
||||
+
|
||||
type contextStore struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
@@ -142,6 +147,7 @@ type LibvirtDomainManager struct {
|
||||
|
||||
hotplugHostDevicesInProgress chan struct{}
|
||||
memoryDumpInProgress chan struct{}
|
||||
+ cancelDiskDetachChannels map[string]chan struct{}
|
||||
|
||||
virtShareDir string
|
||||
ephemeralDiskDir string
|
||||
@@ -204,6 +210,7 @@ func newLibvirtDomainManager(connection cli.Connection, virtShareDir, ephemeralD
|
||||
cancelSafetyUnfreezeChan: make(chan struct{}),
|
||||
migrateInfoStats: &stats.DomainJobInfo{},
|
||||
metadataCache: metadataCache,
|
||||
+ cancelDiskDetachChannels: make(map[string]chan struct{}),
|
||||
}
|
||||
|
||||
manager.hotplugHostDevicesInProgress = make(chan struct{}, maxConcurrentHotplugHostDevices)
|
||||
@@ -828,11 +835,11 @@ func (l *LibvirtDomainManager) generateConverterContext(vmi *v1.VirtualMachineIn
|
||||
}
|
||||
|
||||
func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmulation bool, options *cmdv1.VirtualMachineOptions) (*api.DomainSpec, error) {
|
||||
+
|
||||
l.domainModifyLock.Lock()
|
||||
defer l.domainModifyLock.Unlock()
|
||||
|
||||
logger := log.Log.Object(vmi)
|
||||
-
|
||||
domain := &api.Domain{}
|
||||
|
||||
c, err := l.generateConverterContext(vmi, allowEmulation, options, false)
|
||||
@@ -922,19 +929,94 @@ func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmul
|
||||
return nil, err
|
||||
}
|
||||
|
||||
+ // Get list of disks that should be detached
|
||||
+ detachedDisks := getDetachedDisks(vmi.Name, vmi.Namespace, oldSpec.Devices.Disks, domain.Spec.Devices.Disks)
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detached disks: %+v", len(detachedDisks))
|
||||
+
|
||||
+ // Check list of detached disks against the list of detachment goroutines. Cancel the goroutines
|
||||
+ // that are no longer needed.
|
||||
+ for cancelDiskDetachChannelName, cancelDiskDetachChannel := range l.cancelDiskDetachChannels {
|
||||
+ if _, ok := detachedDisks[cancelDiskDetachChannelName]; ok {
|
||||
+ // If disk is already a detach goroutine and in the detachedDisk list, do not cancel
|
||||
+ // the goroutine.
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detach stil requested, continue grace period = [%+v]", cancelDiskDetachChannelName)
|
||||
+ } else {
|
||||
+ // If disk is already a detach goroutine and is no longer in detachedDisk list, it no
|
||||
+ // longer needs to be detached. Signal detach cancellation and remove from detachedDisks
|
||||
+ // list.
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detach not longer needed, cancel it [%+v]", cancelDiskDetachChannelName)
|
||||
+ // Signal detach goutine to cancel
|
||||
+ cancelDiskDetachChannel <- struct{}{}
|
||||
+ // Remove from detachedDisks list so detach goutine is not recreated
|
||||
+ delete(detachedDisks, cancelDiskDetachChannelName)
|
||||
+ }
|
||||
+ }
|
||||
// Look up all the disks to detach
|
||||
- for _, detachDisk := range getDetachedDisks(oldSpec.Devices.Disks, domain.Spec.Devices.Disks) {
|
||||
- logger.V(1).Infof("Detaching disk %s, target %s", detachDisk.Alias.GetName(), detachDisk.Target.Device)
|
||||
+ for detachDiskKey, detachDisk := range detachedDisks {
|
||||
+
|
||||
detachBytes, err := xml.Marshal(detachDisk)
|
||||
if err != nil {
|
||||
logger.Reason(err).Error("marshalling detached disk failed")
|
||||
return nil, err
|
||||
}
|
||||
- err = dom.DetachDeviceFlags(strings.ToLower(string(detachBytes)), affectLiveAndConfigLibvirtFlags)
|
||||
- if err != nil {
|
||||
- logger.Reason(err).Error("detaching device")
|
||||
- return nil, err
|
||||
- }
|
||||
+
|
||||
+ // Create goroutine to do detachment so we can wait to see if the disk is reattached.
|
||||
+ go func(namespacedDiskName, detachDiskName, detachDiskTargetDevice, domainName string, detachBytes []byte) {
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Potentially detaching hotplug disk %s", namespacedDiskName)
|
||||
+ // Claim mutex again to work on protected data like the channels map and the domain.
|
||||
+ l.domainModifyLock.Lock()
|
||||
+ // If channel has already been created, a goroutine is already waiting for the disk to be detached. Exit
|
||||
+ // so there aren't multiple goroutines waiting to detach the same disk.
|
||||
+ if _, ok := l.cancelDiskDetachChannels[namespacedDiskName]; ok {
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Potential hotplug disk detaching already handled: %s", namespacedDiskName)
|
||||
+ // Unlock the mutex so protected data can be accessed
|
||||
+ l.domainModifyLock.Unlock()
|
||||
+ return
|
||||
+ }
|
||||
+ // There are no other goroutines waiting to detach this disk, create a channel enable subsequent calls to
|
||||
+ // SyncVMI to cancel this detach (if attach is requested within the grace period: 5 seconds)
|
||||
+ cancelChannel := make(chan struct{})
|
||||
+ l.cancelDiskDetachChannels[namespacedDiskName] = cancelChannel
|
||||
+ // Unlock the mutex during the grace period wait so SyncVMI can continue to run
|
||||
+ l.domainModifyLock.Unlock()
|
||||
+
|
||||
+ doDetach := false
|
||||
+ // Wait for either the grace period to expire (in which case: proceed to detach) or for the attach channel
|
||||
+ // to be triggered (in which case: cancel the detach)
|
||||
+ select {
|
||||
+ case <-cancelChannel:
|
||||
+ // Disk attach has been requested within the grace period, cancel the detach
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Potential hotplug disk detaching was cancelled: %s", namespacedDiskName)
|
||||
+ case <-time.After(hotplugDetachmentGracePeriod):
|
||||
+ // Grace period has expired without an attach request, proceed to detach
|
||||
+ doDetach = true
|
||||
+ }
|
||||
+
|
||||
+ // Claim mutex again to work on protected data like the channels map and the domain.
|
||||
+ l.domainModifyLock.Lock()
|
||||
+ // Ensure that the detach cancel channel is cleared and the mutex is unlocked before exiting
|
||||
+ defer func() {
|
||||
+ // Detach request has been handled, remove channel from map
|
||||
+ delete(l.cancelDiskDetachChannels, namespacedDiskName)
|
||||
+ // Unlock the mutex so protected data can be accessed
|
||||
+ l.domainModifyLock.Unlock()
|
||||
+ }()
|
||||
+
|
||||
+ if doDetach {
|
||||
+ // Detach the volume
|
||||
+ logger.V(1).Infof("Detaching disk %s, target %s", detachDiskName, detachDiskTargetDevice)
|
||||
+ dom, err := l.virConn.LookupDomainByName(domainName)
|
||||
+ if err != nil {
|
||||
+ logger.Reason(err).Error("getting domain for device detach")
|
||||
+ return
|
||||
+ }
|
||||
+ err = dom.DetachDeviceFlags(strings.ToLower(string(detachBytes)), affectLiveAndConfigLibvirtFlags)
|
||||
+ if err != nil {
|
||||
+ logger.Reason(err).Error("detaching device")
|
||||
+ }
|
||||
+ dom.Free()
|
||||
+ }
|
||||
+ }(detachDiskKey, detachDisk.Alias.GetName(), detachDisk.Target.Device, domain.Spec.Name, detachBytes)
|
||||
}
|
||||
// Look up all the disks to attach
|
||||
for _, attachDisk := range getAttachedDisks(oldSpec.Devices.Disks, domain.Spec.Devices.Disks) {
|
||||
@@ -945,7 +1027,18 @@ func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmul
|
||||
if !allowAttach {
|
||||
continue
|
||||
}
|
||||
- logger.V(1).Infof("Attaching disk %s, target %s", attachDisk.Alias.GetName(), attachDisk.Target.Device)
|
||||
+ attachDiskName := attachDisk.Alias.GetName()
|
||||
+ namespacedDiskName := fmt.Sprintf("%s_%s_%s", vmi.Namespace, vmi.Name, attachDiskName)
|
||||
+ if _, ok := l.cancelDiskDetachChannels[namespacedDiskName]; ok {
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug attach during detach grace period, signal detach goroutine: %s", namespacedDiskName)
|
||||
+ // a goroutine is waiting to detach this disk, signal it to stop
|
||||
+ l.cancelDiskDetachChannels[namespacedDiskName] <- struct{}{}
|
||||
+ // no need to reattach if the disk is already attached
|
||||
+ logger.V(hotplugDetachmentVerbosity).Infof("Skip hotplug detach/reattach disk: %s", namespacedDiskName)
|
||||
+ continue
|
||||
+ }
|
||||
+
|
||||
+ logger.V(1).Infof("Attaching disk %s, target %s", attachDiskName, attachDisk.Target.Device)
|
||||
// set drivers cache mode
|
||||
err = converter.SetDriverCacheMode(&attachDisk, l.directIOChecker)
|
||||
if err != nil {
|
||||
@@ -1032,7 +1125,7 @@ func isHotplugDisk(disk api.Disk) bool {
|
||||
return strings.HasPrefix(getSourceFile(disk), v1.HotplugDiskDir)
|
||||
}
|
||||
|
||||
-func getDetachedDisks(oldDisks, newDisks []api.Disk) []api.Disk {
|
||||
+func getDetachedDisks(vmiName, vmiNamespace string, oldDisks, newDisks []api.Disk) map[string]api.Disk {
|
||||
newDiskMap := make(map[string]api.Disk)
|
||||
for _, disk := range newDisks {
|
||||
file := getSourceFile(disk)
|
||||
@@ -1040,14 +1133,15 @@ func getDetachedDisks(oldDisks, newDisks []api.Disk) []api.Disk {
|
||||
newDiskMap[file] = disk
|
||||
}
|
||||
}
|
||||
- res := make([]api.Disk, 0)
|
||||
+ res := map[string]api.Disk{}
|
||||
for _, oldDisk := range oldDisks {
|
||||
if !isHotplugDisk(oldDisk) {
|
||||
continue
|
||||
}
|
||||
if _, ok := newDiskMap[getSourceFile(oldDisk)]; !ok {
|
||||
// This disk got detached, add it to the list
|
||||
- res = append(res, oldDisk)
|
||||
+ namespacedDiskName := fmt.Sprintf("%s_%s_%s", vmiNamespace, vmiName, oldDisk.Alias.GetName())
|
||||
+ res[namespacedDiskName] = oldDisk
|
||||
}
|
||||
}
|
||||
return res
|
|
@ -16,11 +16,10 @@
|
|||
#
|
||||
|
||||
|
||||
%global debug_package %{nil}
|
||||
Summary: Container native virtualization
|
||||
Name: kubevirt
|
||||
Version: 0.59.0
|
||||
Release: 3%{?dist}
|
||||
Release: 4%{?dist}
|
||||
License: ASL 2.0
|
||||
Vendor: Microsoft Corporation
|
||||
Distribution: Mariner
|
||||
|
@ -32,6 +31,8 @@ Source1: disks-images-provider.yaml
|
|||
# correctly.
|
||||
Patch0: Cleanup-housekeeping-cgroup-on-vm-del.patch
|
||||
Patch1: Allocate-2-cpu-for-the-emulator-thread.patch
|
||||
Patch2: Hotplug_detach_grace_period.patch
|
||||
%global debug_package %{nil}
|
||||
BuildRequires: glibc-devel
|
||||
BuildRequires: glibc-static >= 2.35-3%{?dist}
|
||||
BuildRequires: golang
|
||||
|
@ -210,6 +211,9 @@ install -p -m 0644 cmd/virt-handler/nsswitch.conf %{buildroot}%{_datadir}/kube-v
|
|||
%{_bindir}/virt-tests
|
||||
|
||||
%changelog
|
||||
* Fri Jun 30 2023 Brian Fjeldstad <bfjelds@microsoft.com> - 0.59.0-4
|
||||
- Patch 0.59.0 with Operator Nexus patch for hotplug volume detachment IO errors
|
||||
|
||||
* Thu Jun 15 2023 CBL-Mariner Servicing Account <cblmargh@microsoft.com> - 0.59.0-3
|
||||
- Bump release to rebuild with go 1.19.10
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче