From 58827523c7af8255029d967de830d0d9ed5a3fc2 Mon Sep 17 00:00:00 2001 From: tamilmani1989 Date: Thu, 30 Sep 2021 15:48:51 -0700 Subject: [PATCH] Reduce lock timeout and check pid same before cleaning up (#1035) * lock fix changes * added test files * removing unused file * lint fixes * Update cni/plugin.go Co-authored-by: Evan Baker * addressed comments * nit fix Co-authored-by: Evan Baker --- cni/plugin.go | 49 +++++++++++++++------ cni/plugin_test.go | 69 ++++++++++++++++++++++++++++++ cni/testfiles/processfound.lock | 1 + cni/testfiles/processnotfound.lock | 1 + store/json.go | 2 +- store/mockstore.go | 51 ++++++++++++++++++++++ 6 files changed, 160 insertions(+), 13 deletions(-) create mode 100644 cni/plugin_test.go create mode 100644 cni/testfiles/processfound.lock create mode 100644 cni/testfiles/processnotfound.lock create mode 100644 store/mockstore.go diff --git a/cni/plugin.go b/cni/plugin.go index d81115523..2f3873fec 100644 --- a/cni/plugin.go +++ b/cni/plugin.go @@ -20,8 +20,11 @@ import ( cniTypes "github.com/containernetworking/cni/pkg/types" cniTypesCurr "github.com/containernetworking/cni/pkg/types/current" cniVers "github.com/containernetworking/cni/pkg/version" + "github.com/pkg/errors" ) +var errEmptyContent = errors.New("read content is zero bytes") + // Plugin is the parent class for CNI plugins. type Plugin struct { *common.Plugin @@ -197,26 +200,33 @@ func (plugin *Plugin) IsSafeToRemoveLock(processName string) (bool, error) { return false, cmdErr } - // Read pid from lockfile lockFileName := plugin.Store.GetLockFileName() - content, err := ioutil.ReadFile(lockFileName) + // Read pid from lockfile + lockFilePid, err := plugin.readLockFile(lockFileName) if err != nil { - log.Errorf("Failed to read lock file :%v, ", err) - return false, err + return false, errors.Wrap(err, "IsSafeToRemoveLock lockfile read failed") } - if len(content) <= 0 { - log.Errorf("Num bytes read from lock file is 0") - return false, fmt.Errorf("Num bytes read from lock file is 0") - } - - log.Printf("Read from Lock file:%s", content) + log.Printf("Read from lockfile:%s", lockFilePid) // Get the process name if running and // check if that matches with our expected process - pName, err := platform.GetProcessNameByID(string(content)) + // if it returns non-nil error then process is not running + pName, err := platform.GetProcessNameByID(lockFilePid) if err != nil { + var content string + content, err = plugin.readLockFile(lockFileName) + if err != nil { + return false, errors.Wrap(err, "IsSafeToRemoveLock lockfile 2nd read failed") + } + + // pid in lockfile changed after getprocessnamebyid call. so some other process acquired lockfile in between. + // so its not safe to remove lockfile + if string(content) != lockFilePid { + log.Printf("Lockfile content changed from %s to %s. So not safe to remove lockfile", lockFilePid, content) + return false, nil + } + return true, nil - // if process id changed. read lockfile? } log.Printf("[CNI] Process name is %s", pName) @@ -229,3 +239,18 @@ func (plugin *Plugin) IsSafeToRemoveLock(processName string) (bool, error) { log.Errorf("Plugin store is nil") return false, fmt.Errorf("plugin store nil") } + +func (plugin *Plugin) readLockFile(filename string) (string, error) { + content, err := ioutil.ReadFile(filename) + if err != nil { + log.Errorf("Failed to read lockfile :%v", err) + return "", fmt.Errorf("readLockFile error:%w", err) + } + + if len(content) == 0 { + log.Errorf("Num bytes read from lockfile is 0") + return "", errEmptyContent + } + + return string(content), nil +} diff --git a/cni/plugin_test.go b/cni/plugin_test.go new file mode 100644 index 000000000..7c60cfaf6 --- /dev/null +++ b/cni/plugin_test.go @@ -0,0 +1,69 @@ +package cni + +import ( + "os" + "testing" + + "github.com/Azure/azure-container-networking/common" + "github.com/Azure/azure-container-networking/store" + "github.com/stretchr/testify/require" +) + +func TestMain(m *testing.M) { + // Run tests. + exitCode := m.Run() + os.Exit(exitCode) +} + +func TestPluginSafeToRemoveLock(t *testing.T) { + tests := []struct { + name string + plugin Plugin + processName string + wantIsSafe bool + wantErr bool + }{ + { + name: "Safe to remove lock-true. Process name does not match", + plugin: Plugin{ + Plugin: &common.Plugin{ + Name: "cni", + Version: "0.3.0", + Store: store.NewMockStore("testfiles/processfound.lock"), + }, + version: "0.3.0", + }, + processName: "azure-vnet", + wantIsSafe: true, + wantErr: false, + }, + { + name: "Safe to remove lock-true. Process not running", + plugin: Plugin{ + Plugin: &common.Plugin{ + Name: "cni", + Version: "0.3.0", + Store: store.NewMockStore("testfiles/processnotfound.lock"), + }, + version: "0.3.0", + }, + processName: "azure-vnet", + wantIsSafe: true, + wantErr: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + isSafe, err := tt.plugin.IsSafeToRemoveLock(tt.processName) + if tt.wantErr { + require.Error(t, err) + require.Equal(t, tt.wantIsSafe, isSafe) + } else { + require.NoError(t, err) + require.Equal(t, tt.wantIsSafe, isSafe) + } + }) + } +} diff --git a/cni/testfiles/processfound.lock b/cni/testfiles/processfound.lock new file mode 100644 index 000000000..56a6051ca --- /dev/null +++ b/cni/testfiles/processfound.lock @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/cni/testfiles/processnotfound.lock b/cni/testfiles/processnotfound.lock new file mode 100644 index 000000000..7d105a7b4 --- /dev/null +++ b/cni/testfiles/processnotfound.lock @@ -0,0 +1 @@ +-3 \ No newline at end of file diff --git a/store/json.go b/store/json.go index 3181f371a..7c51299fb 100644 --- a/store/json.go +++ b/store/json.go @@ -25,7 +25,7 @@ const ( lockExtension = ".lock" // Maximum number of retries before failing a lock call. - lockMaxRetries = 200 + lockMaxRetries = 100 // Delay between lock retries. lockRetryDelay = 100 * time.Millisecond diff --git a/store/mockstore.go b/store/mockstore.go new file mode 100644 index 000000000..fde865965 --- /dev/null +++ b/store/mockstore.go @@ -0,0 +1,51 @@ +package store + +import ( + "time" +) + +type mockStore struct { + lockFilePath string +} + +// NewMockStore creates a new jsonFileStore object, accessed as a KeyValueStore. +func NewMockStore(lockFilePath string) KeyValueStore { + return &mockStore{ + lockFilePath: lockFilePath, + } +} + +// Read restores the value for the given key from persistent store. +func (ms *mockStore) Read(key string, value interface{}) error { + return nil +} + +func (ms *mockStore) Write(key string, value interface{}) error { + return nil +} + +func (ms *mockStore) Flush() error { + return nil +} + +func (ms *mockStore) Lock(block bool) error { + return nil +} + +func (ms *mockStore) Unlock(forceUnlock bool) error { + return nil +} + +func (ms *mockStore) GetModificationTime() (time.Time, error) { + return time.Time{}, nil +} + +func (ms *mockStore) GetLockFileModificationTime() (time.Time, error) { + return time.Time{}, nil +} + +func (ms *mockStore) GetLockFileName() string { + return ms.lockFilePath +} + +func (ms *mockStore) Remove() {}