feat: custom Windows log collection script (#3940)

* feat: custom Windows log collection script

* update

* update code logic

* address comments

* remove unused

* fix linting

* address comments

* revert versions

* add tests
This commit is contained in:
haofan-ms 2020-10-26 15:44:08 -07:00 коммит произвёл GitHub
Родитель 44bae541b0
Коммит 50bc01d283
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 121 добавлений и 32 удалений

Просмотреть файл

@ -41,6 +41,7 @@ type getLogsCmd struct {
sshHostURI string
linuxSSHPrivateKeyPath string
linuxScriptPath string
windowsScriptPath string
outputDirectory string
controlPlaneOnly bool
// computed
@ -75,14 +76,14 @@ func newGetLogsCmd() *cobra.Command {
command.Flags().StringVarP(&glc.apiModelPath, "api-model", "m", "", "path to the generated apimodel.json file (required)")
command.Flags().StringVar(&glc.sshHostURI, "ssh-host", "", "FQDN, or IP address, of an SSH listener that can reach all nodes in the cluster (required)")
command.Flags().StringVar(&glc.linuxSSHPrivateKeyPath, "linux-ssh-private-key", "", "path to a valid private SSH key to access the cluster's Linux nodes (required)")
command.Flags().StringVar(&glc.linuxScriptPath, "linux-script", "", "path to the log collection script to execute on the cluster's Linux nodes (required)")
command.Flags().StringVar(&glc.linuxScriptPath, "linux-script", "", "path to the log collection script to execute on the cluster's Linux nodes (required if distro is not aks-ubuntu)")
command.Flags().StringVar(&glc.windowsScriptPath, "windows-script", "", "path to the log collection script to execute on the cluster's Windows nodes (required if distro is not aks-windows)")
command.Flags().StringVarP(&glc.outputDirectory, "output-directory", "o", "", "collected logs destination directory, derived from --api-model if missing")
command.Flags().BoolVarP(&glc.controlPlaneOnly, "control-plane-only", "", false, "get logs from control plane VMs only")
_ = command.MarkFlagRequired("location")
_ = command.MarkFlagRequired("api-model")
_ = command.MarkFlagRequired("ssh-host")
_ = command.MarkFlagRequired("linux-ssh-private-key")
_ = command.MarkFlagRequired("linux-script") // optional once in VHD
return command
}
@ -107,11 +108,15 @@ func (glc *getLogsCmd) validateArgs() (err error) {
} else if _, err := os.Stat(glc.linuxSSHPrivateKeyPath); os.IsNotExist(err) {
return errors.Errorf("specified --linux-ssh-private-key does not exist (%s)", glc.linuxSSHPrivateKeyPath)
}
if glc.linuxScriptPath == "" {
// optional once in VHD
return errors.New("--linux-script must be specified")
} else if _, err := os.Stat(glc.linuxScriptPath); os.IsNotExist(err) {
return errors.Errorf("specified --linux-script does not exist (%s)", glc.linuxScriptPath)
if glc.linuxScriptPath != "" {
if _, err := os.Stat(glc.linuxScriptPath); os.IsNotExist(err) {
return errors.Errorf("specified --linux-script does not exist (%s)", glc.linuxScriptPath)
}
}
if glc.windowsScriptPath != "" {
if _, err := os.Stat(glc.windowsScriptPath); os.IsNotExist(err) {
return errors.Errorf("specified --windows-script does not exist (%s)", glc.windowsScriptPath)
}
}
if glc.outputDirectory == "" {
glc.outputDirectory = path.Join(filepath.Dir(glc.apiModelPath), "_logs")
@ -167,6 +172,9 @@ func (glc *getLogsCmd) run() (err error) {
if err = glc.getClusterNodes(); err != nil {
return errors.Wrap(err, "listing cluster nodes")
}
if err = glc.validateLogScript(); err != nil {
return errors.Wrap(err, "validating log collection scripts for nodes")
}
for _, n := range glc.masterNodes {
log.Infof("Processing master node: %s\n", n.Name)
out, err := glc.collectLogs(n, glc.linuxSSHConfig)
@ -259,24 +267,30 @@ func (glc *getLogsCmd) collectLogs(node v1.Node, config *ssh.ClientConfig) (stri
}
func (glc *getLogsCmd) uploadScript(node v1.Node, client *ssh.Client) (string, error) {
if isWindowsNode(node) || glc.linuxScriptPath == "" {
var script, cmd string
if isLinuxNode(node) && glc.linuxScriptPath != "" {
script = glc.linuxScriptPath
cmd = "bash -c \"cat /dev/stdin > /tmp/collect-logs.sh\""
} else if isWindowsNode(node) && glc.windowsScriptPath != "" {
script = glc.windowsScriptPath
cmd = "powershell -noprofile -command \"$Input > $env:temp\\collect-windows-logs.ps1\""
} else {
return "", nil
}
scriptContent, err := ioutil.ReadFile(glc.linuxScriptPath)
sc, err := ioutil.ReadFile(script)
if err != nil {
return "", errors.Wrap(err, "reading log collection script content")
return "", errors.Wrapf(err, "reading log collection script %s", script)
}
log.Debugf("Uploading log collection script (%s)\n", glc.linuxScriptPath)
session, err := client.NewSession()
if err != nil {
return "", errors.Wrap(err, "creating SSH session")
}
defer session.Close()
session.Stdin = bytes.NewReader(scriptContent)
if co, err := session.CombinedOutput("bash -c \"cat /dev/stdin > /tmp/collect-logs.sh\""); err != nil {
log.Debugf("Uploading log collection script (%s)\n", script)
session.Stdin = bytes.NewReader(sc)
if co, err := session.CombinedOutput(cmd); err != nil {
return fmt.Sprintf("%s -> %s", node.Name, string(co)), errors.Wrap(err, "uploading log collection script")
}
return "", nil
@ -300,8 +314,12 @@ func (glc *getLogsCmd) executeScript(node v1.Node, client *ssh.Client) (string,
cmd = fmt.Sprintf("bash -c \"export AZURE_ENV=%s; sudo -E %s\"", glc.getCloudName(), script)
}
} else {
script = "c:\\k\\debug\\collect-windows-logs.ps1"
cmd = fmt.Sprintf("powershell -command \"%s | Where-Object { $_.extension -eq '.zip' } | Copy-Item -Destination $env:temp\\$env:computername.zip\"", script)
if glc.windowsScriptPath != "" {
script = "$env:temp\\collect-windows-logs.ps1"
} else {
script = "c:\\k\\debug\\collect-windows-logs.ps1"
}
cmd = fmt.Sprintf("powershell -command \"iex %s | Where-Object { $_.extension -eq '.zip' } | Copy-Item -Destination $env:temp\\$env:computername.zip\"", script)
}
if co, err := session.CombinedOutput(cmd); err != nil {
@ -351,6 +369,28 @@ func (glc *getLogsCmd) downloadLogs(node v1.Node, client *ssh.Client) (string, e
return "", nil
}
func (glc *getLogsCmd) validateLogScript() error {
if glc.linuxScriptPath == "" && !glc.cs.Properties.MasterProfile.IsVHDDistro() {
if glc.controlPlaneOnly {
return errors.Errorf("No log collection script found for control plane nodes")
}
log.Warn("Skipping control plane nodes as flag '--linux-script' is not set and the distro in masterProfiles is not aks-ubuntu VHD")
glc.masterNodes = nil
}
for _, profile := range glc.cs.Properties.AgentPoolProfiles {
if glc.linuxScriptPath == "" && strings.EqualFold(string(profile.OSType), "Linux") && !profile.IsVHDDistro() {
log.Warnf("Skipping linux agentpool %s as flag '--linux-script' is not set and the distro in agentPoolProfiles is not aks-ubuntu VHD", profile.Name)
glc.linuxNodes = filterNodesFromPool(glc.linuxNodes, profile.Name)
}
if glc.windowsScriptPath == "" && strings.EqualFold(string(profile.OSType), "Windows") && !glc.cs.Properties.WindowsProfile.IsVHDDistro() {
log.Warnf("Skipping windows agentpool %s as flag '--windows-script' is not set and the distro in windowsProfiles is not aks-windows VHD", profile.Name)
glc.windowsNodes = nil
}
}
return nil
}
func isLinuxNode(node v1.Node) bool {
return strings.EqualFold(node.Status.NodeInfo.OperatingSystem, "linux")
}
@ -366,6 +406,16 @@ func (glc *getLogsCmd) getCloudName() string {
return ""
}
func filterNodesFromPool(nodeList []v1.Node, agentPoolName string) []v1.Node {
var linuxNodeList []v1.Node
for _, node := range nodeList {
if !strings.EqualFold(strings.Split(node.Name, "-")[1], agentPoolName) {
linuxNodeList = append(linuxNodeList, node)
}
}
return linuxNodeList
}
func computeControlPlaneNodes(nodesCount int, clusterID string) []v1.Node {
var nodeList []v1.Node
for i := 0; i < nodesCount; i++ {

Просмотреть файл

@ -9,6 +9,7 @@ import (
. "github.com/onsi/gomega"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
)
func TestGetLogsCmd(t *testing.T) {
@ -41,6 +42,7 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: missingFile,
linuxSSHPrivateKeyPath: "",
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
@ -52,28 +54,19 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: "",
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
expectedErr: errors.New("--linux-ssh-private-key must be specified"),
name: "NeedsLinuxSSHPrivateKey",
},
{
glc: &getLogsCmd{
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: "",
sshHostURI: "server.example.com",
location: "southcentralus",
},
expectedErr: errors.New("--linux-script must be specified"),
name: "NeedsLinuxScript",
},
{
glc: &getLogsCmd{
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: missingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
@ -85,6 +78,19 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: existingFile,
windowsScriptPath: missingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
expectedErr: errors.Errorf("specified --windows-script does not exist (%s)", missingFile),
name: "BadWindowsScript",
},
{
glc: &getLogsCmd{
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "",
},
@ -96,6 +102,7 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "",
location: "southcentralus",
},
@ -107,6 +114,7 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: "",
linuxSSHPrivateKeyPath: missingFile,
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
@ -118,6 +126,7 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: missingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
@ -129,6 +138,7 @@ func TestGetLogsCmdValidateArgs(t *testing.T) {
apiModelPath: existingFile,
linuxSSHPrivateKeyPath: existingFile,
linuxScriptPath: existingFile,
windowsScriptPath: existingFile,
sshHostURI: "server.example.com",
location: "southcentralus",
},
@ -159,3 +169,25 @@ func TestComputeControlPlaneNodes(t *testing.T) {
g.Expect(node.Status.NodeInfo.OperatingSystem).To(Equal("linux"))
}
}
func TestFilterNodesFromPool(t *testing.T) {
t.Parallel()
g := NewGomegaWithT(t)
var nodeList []v1.Node
for i := 0; i < 3; i++ {
var node1, node2 v1.Node
node1.Name = fmt.Sprintf("k8s-linuxpool-12345678-%d", i)
node1.Status.NodeInfo.OperatingSystem = "linux"
nodeList = append(nodeList, node1)
node2.Name = fmt.Sprintf("k8s-linuxpoool-12345678-%d", i)
node2.Status.NodeInfo.OperatingSystem = "linux"
nodeList = append(nodeList, node2)
}
nodeListA := filterNodesFromPool(nodeList, "linuxpool")
g.Expect(len(nodeListA)).To(Equal(3))
nodeListB := filterNodesFromPool(nodeList, "linuxpoool")
g.Expect(len(nodeListB)).To(Equal(3))
nodeListC := filterNodesFromPool(nodeList, "linuxpol")
g.Expect(len(nodeListC)).To(Equal(6))
}

Просмотреть файл

@ -18,11 +18,11 @@ A valid SSH private key is always required to stablish a SSH session to the clus
### Log Collection Scripts
To collect Linux nodes logs, specify the path to the script-to-execute on each node by setting [parameter](#Parameters) `--linux-script`. A sample script can be found [here](/scripts/collect-logs.sh).
To collect Linux nodes logs, specify the path to the script-to-execute on each node by setting [parameter](#Parameters) `--linux-script` if the node distro is not `aks-ubuntu`. A sample script can be found [here](/scripts/collect-logs.sh).
If you choose to pass your own custom log collection script, make sure it zips all relevant files to file `/tmp/logs.zip`. Needless to say, the custom script should only query for troubleshooting information and it should not change the cluster or node configuration.
To collect Windows nodes logs, specify the path to the script-to-execute on each node by setting [parameter](#Parameters) `--windows-script` if the node distro is not `aks-windows`. A sample script can be found [here](/scripts/collect-windows-logs.ps1).
The default OS distro for Windows node pools already includes a [log collection script](./scripts/collect-windows-logs.ps1). There is no support to pass your own custom script at this point.
If you choose to pass your own custom log collection script, make sure it zips all relevant files to file `"/tmp/logs.zip"` for Linux and `"%TEMP%\{NodeName}.zip"` for Windows. Needless to say, the custom script should only query for troubleshooting information and it should not change the cluster or node configuration.
## Usage
@ -34,7 +34,8 @@ $ aks-engine get-logs \
--api-model _output/<dnsPrefix>/apimodel.json \
--ssh-host <dnsPrefix>.<location>.cloudapp.azure.com \
--linux-ssh-private-key ~/.ssh/id_rsa \
--linux-script scripts/collect-logs.sh
--linux-script scripts/collect-logs.sh \
--windows-script scripts/collect-windows-logs.ps1
```
### Parameters
@ -45,6 +46,7 @@ $ aks-engine get-logs \
|--api-model|yes|Path to the generated API model for the cluster.|
|--ssh-host|yes|FQDN, or IP address, of an SSH listener that can reach all nodes in the cluster.|
|--linux-ssh-private-key|yes|Path to a SSH private key that can be use to create a remote session on the cluster Linux nodes.|
|--linux-script|yes|Custom log collection script. It should produce file `/tmp/logs.zip`.|
|--linux-script|no|Custom log collection bash script. It it required only when the Linux node distro is not `aks-ubuntu` and it should produce file `/tmp/logs.zip`.|
|--windows-script|no|Custom log collection powershell script. It is required only when the Windows node distro is not `aks-windows` and it should produce file `%TEMP%\{NodeName}.zip`.|
|--output-directory|no|Output directory, derived from `--api-model` if missing.|
|--control-plane-only|no|Only collect logs from master nodes.|

Просмотреть файл

@ -1779,6 +1779,11 @@ func (a *AgentPoolProfile) GetKubernetesLabels(rg string, deprecated bool) strin
return buf.String()
}
// IsVHDDistro returns true if the distro uses VHD SKUs
func (w *WindowsProfile) IsVHDDistro() bool {
return w.WindowsPublisher == AKSWindowsServer2019OSImageConfig.ImagePublisher && w.WindowsOffer == AKSWindowsServer2019OSImageConfig.ImageOffer
}
// IsCSIProxyEnabled returns true if csi proxy service should be enable for Windows nodes
func (w *WindowsProfile) IsCSIProxyEnabled() bool {
if w.EnableCSIProxy != nil {