KubeVirt: Add grace period for hotplug detach when hotplug pod is deleted (#5747)

* add hotplug patch and nedw kubevirt version

* reduce verbosity, hook up patch file

* update patch with Free call

* update patch

* fix for linter
This commit is contained in:
bfjelds 2023-07-07 16:35:36 -07:00 коммит произвёл GitHub
Родитель 2d5563f664
Коммит d93dc7712d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 200 добавлений и 2 удалений

Просмотреть файл

@ -0,0 +1,194 @@
diff --git a/pkg/virt-launcher/virtwrap/manager.go b/pkg/virt-launcher/virtwrap/manager.go
index 63d8d43d2..0e608c4bb 100644
--- a/pkg/virt-launcher/virtwrap/manager.go
+++ b/pkg/virt-launcher/virtwrap/manager.go
@@ -97,6 +97,11 @@ const (
const maxConcurrentHotplugHostDevices = 1
const maxConcurrentMemoryDumps = 1
+// TODO: Make this configurable from the VMI object?
+const hotplugDetachmentGracePeriod = 5 * time.Second
+
+const hotplugDetachmentVerbosity = 4
+
type contextStore struct {
ctx context.Context
cancel context.CancelFunc
@@ -142,6 +147,7 @@ type LibvirtDomainManager struct {
hotplugHostDevicesInProgress chan struct{}
memoryDumpInProgress chan struct{}
+ cancelDiskDetachChannels map[string]chan struct{}
virtShareDir string
ephemeralDiskDir string
@@ -204,6 +210,7 @@ func newLibvirtDomainManager(connection cli.Connection, virtShareDir, ephemeralD
cancelSafetyUnfreezeChan: make(chan struct{}),
migrateInfoStats: &stats.DomainJobInfo{},
metadataCache: metadataCache,
+ cancelDiskDetachChannels: make(map[string]chan struct{}),
}
manager.hotplugHostDevicesInProgress = make(chan struct{}, maxConcurrentHotplugHostDevices)
@@ -828,11 +835,11 @@ func (l *LibvirtDomainManager) generateConverterContext(vmi *v1.VirtualMachineIn
}
func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmulation bool, options *cmdv1.VirtualMachineOptions) (*api.DomainSpec, error) {
+
l.domainModifyLock.Lock()
defer l.domainModifyLock.Unlock()
logger := log.Log.Object(vmi)
-
domain := &api.Domain{}
c, err := l.generateConverterContext(vmi, allowEmulation, options, false)
@@ -922,19 +929,94 @@ func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmul
return nil, err
}
+ // Get list of disks that should be detached
+ detachedDisks := getDetachedDisks(vmi.Name, vmi.Namespace, oldSpec.Devices.Disks, domain.Spec.Devices.Disks)
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detached disks: %+v", len(detachedDisks))
+
+ // Check list of detached disks against the list of detachment goroutines. Cancel the goroutines
+ // that are no longer needed.
+ for cancelDiskDetachChannelName, cancelDiskDetachChannel := range l.cancelDiskDetachChannels {
+ if _, ok := detachedDisks[cancelDiskDetachChannelName]; ok {
+ // If disk is already a detach goroutine and in the detachedDisk list, do not cancel
+ // the goroutine.
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detach stil requested, continue grace period = [%+v]", cancelDiskDetachChannelName)
+ } else {
+ // If disk is already a detach goroutine and is no longer in detachedDisk list, it no
+ // longer needs to be detached. Signal detach cancellation and remove from detachedDisks
+ // list.
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug detach not longer needed, cancel it [%+v]", cancelDiskDetachChannelName)
+ // Signal detach goutine to cancel
+ cancelDiskDetachChannel <- struct{}{}
+ // Remove from detachedDisks list so detach goutine is not recreated
+ delete(detachedDisks, cancelDiskDetachChannelName)
+ }
+ }
// Look up all the disks to detach
- for _, detachDisk := range getDetachedDisks(oldSpec.Devices.Disks, domain.Spec.Devices.Disks) {
- logger.V(1).Infof("Detaching disk %s, target %s", detachDisk.Alias.GetName(), detachDisk.Target.Device)
+ for detachDiskKey, detachDisk := range detachedDisks {
+
detachBytes, err := xml.Marshal(detachDisk)
if err != nil {
logger.Reason(err).Error("marshalling detached disk failed")
return nil, err
}
- err = dom.DetachDeviceFlags(strings.ToLower(string(detachBytes)), affectLiveAndConfigLibvirtFlags)
- if err != nil {
- logger.Reason(err).Error("detaching device")
- return nil, err
- }
+
+ // Create goroutine to do detachment so we can wait to see if the disk is reattached.
+ go func(namespacedDiskName, detachDiskName, detachDiskTargetDevice, domainName string, detachBytes []byte) {
+ logger.V(hotplugDetachmentVerbosity).Infof("Potentially detaching hotplug disk %s", namespacedDiskName)
+ // Claim mutex again to work on protected data like the channels map and the domain.
+ l.domainModifyLock.Lock()
+ // If channel has already been created, a goroutine is already waiting for the disk to be detached. Exit
+ // so there aren't multiple goroutines waiting to detach the same disk.
+ if _, ok := l.cancelDiskDetachChannels[namespacedDiskName]; ok {
+ logger.V(hotplugDetachmentVerbosity).Infof("Potential hotplug disk detaching already handled: %s", namespacedDiskName)
+ // Unlock the mutex so protected data can be accessed
+ l.domainModifyLock.Unlock()
+ return
+ }
+ // There are no other goroutines waiting to detach this disk, create a channel enable subsequent calls to
+ // SyncVMI to cancel this detach (if attach is requested within the grace period: 5 seconds)
+ cancelChannel := make(chan struct{})
+ l.cancelDiskDetachChannels[namespacedDiskName] = cancelChannel
+ // Unlock the mutex during the grace period wait so SyncVMI can continue to run
+ l.domainModifyLock.Unlock()
+
+ doDetach := false
+ // Wait for either the grace period to expire (in which case: proceed to detach) or for the attach channel
+ // to be triggered (in which case: cancel the detach)
+ select {
+ case <-cancelChannel:
+ // Disk attach has been requested within the grace period, cancel the detach
+ logger.V(hotplugDetachmentVerbosity).Infof("Potential hotplug disk detaching was cancelled: %s", namespacedDiskName)
+ case <-time.After(hotplugDetachmentGracePeriod):
+ // Grace period has expired without an attach request, proceed to detach
+ doDetach = true
+ }
+
+ // Claim mutex again to work on protected data like the channels map and the domain.
+ l.domainModifyLock.Lock()
+ // Ensure that the detach cancel channel is cleared and the mutex is unlocked before exiting
+ defer func() {
+ // Detach request has been handled, remove channel from map
+ delete(l.cancelDiskDetachChannels, namespacedDiskName)
+ // Unlock the mutex so protected data can be accessed
+ l.domainModifyLock.Unlock()
+ }()
+
+ if doDetach {
+ // Detach the volume
+ logger.V(1).Infof("Detaching disk %s, target %s", detachDiskName, detachDiskTargetDevice)
+ dom, err := l.virConn.LookupDomainByName(domainName)
+ if err != nil {
+ logger.Reason(err).Error("getting domain for device detach")
+ return
+ }
+ err = dom.DetachDeviceFlags(strings.ToLower(string(detachBytes)), affectLiveAndConfigLibvirtFlags)
+ if err != nil {
+ logger.Reason(err).Error("detaching device")
+ }
+ dom.Free()
+ }
+ }(detachDiskKey, detachDisk.Alias.GetName(), detachDisk.Target.Device, domain.Spec.Name, detachBytes)
}
// Look up all the disks to attach
for _, attachDisk := range getAttachedDisks(oldSpec.Devices.Disks, domain.Spec.Devices.Disks) {
@@ -945,7 +1027,18 @@ func (l *LibvirtDomainManager) SyncVMI(vmi *v1.VirtualMachineInstance, allowEmul
if !allowAttach {
continue
}
- logger.V(1).Infof("Attaching disk %s, target %s", attachDisk.Alias.GetName(), attachDisk.Target.Device)
+ attachDiskName := attachDisk.Alias.GetName()
+ namespacedDiskName := fmt.Sprintf("%s_%s_%s", vmi.Namespace, vmi.Name, attachDiskName)
+ if _, ok := l.cancelDiskDetachChannels[namespacedDiskName]; ok {
+ logger.V(hotplugDetachmentVerbosity).Infof("Hotplug attach during detach grace period, signal detach goroutine: %s", namespacedDiskName)
+ // a goroutine is waiting to detach this disk, signal it to stop
+ l.cancelDiskDetachChannels[namespacedDiskName] <- struct{}{}
+ // no need to reattach if the disk is already attached
+ logger.V(hotplugDetachmentVerbosity).Infof("Skip hotplug detach/reattach disk: %s", namespacedDiskName)
+ continue
+ }
+
+ logger.V(1).Infof("Attaching disk %s, target %s", attachDiskName, attachDisk.Target.Device)
// set drivers cache mode
err = converter.SetDriverCacheMode(&attachDisk, l.directIOChecker)
if err != nil {
@@ -1032,7 +1125,7 @@ func isHotplugDisk(disk api.Disk) bool {
return strings.HasPrefix(getSourceFile(disk), v1.HotplugDiskDir)
}
-func getDetachedDisks(oldDisks, newDisks []api.Disk) []api.Disk {
+func getDetachedDisks(vmiName, vmiNamespace string, oldDisks, newDisks []api.Disk) map[string]api.Disk {
newDiskMap := make(map[string]api.Disk)
for _, disk := range newDisks {
file := getSourceFile(disk)
@@ -1040,14 +1133,15 @@ func getDetachedDisks(oldDisks, newDisks []api.Disk) []api.Disk {
newDiskMap[file] = disk
}
}
- res := make([]api.Disk, 0)
+ res := map[string]api.Disk{}
for _, oldDisk := range oldDisks {
if !isHotplugDisk(oldDisk) {
continue
}
if _, ok := newDiskMap[getSourceFile(oldDisk)]; !ok {
// This disk got detached, add it to the list
- res = append(res, oldDisk)
+ namespacedDiskName := fmt.Sprintf("%s_%s_%s", vmiNamespace, vmiName, oldDisk.Alias.GetName())
+ res[namespacedDiskName] = oldDisk
}
}
return res

Просмотреть файл

@ -16,11 +16,10 @@
#
%global debug_package %{nil}
Summary: Container native virtualization
Name: kubevirt
Version: 0.59.0
Release: 3%{?dist}
Release: 4%{?dist}
License: ASL 2.0
Vendor: Microsoft Corporation
Distribution: Mariner
@ -32,6 +31,8 @@ Source1: disks-images-provider.yaml
# correctly.
Patch0: Cleanup-housekeeping-cgroup-on-vm-del.patch
Patch1: Allocate-2-cpu-for-the-emulator-thread.patch
Patch2: Hotplug_detach_grace_period.patch
%global debug_package %{nil}
BuildRequires: glibc-devel
BuildRequires: glibc-static >= 2.35-3%{?dist}
BuildRequires: golang
@ -210,6 +211,9 @@ install -p -m 0644 cmd/virt-handler/nsswitch.conf %{buildroot}%{_datadir}/kube-v
%{_bindir}/virt-tests
%changelog
* Fri Jun 30 2023 Brian Fjeldstad <bfjelds@microsoft.com> - 0.59.0-4
- Patch 0.59.0 with Operator Nexus patch for hotplug volume detachment IO errors
* Thu Jun 15 2023 CBL-Mariner Servicing Account <cblmargh@microsoft.com> - 0.59.0-3
- Bump release to rebuild with go 1.19.10