AppInsightTelemetry support for CNI (#458)
* Added AITelemetry support for CNI * added new files * added other configs in config file * fixed ut * updated disableall similar to cns * added container name to report * addressed review comments * addressed review comments * added check for azure environment * added log * close log handle in unit test * addressed review comments * addressed review comments * fixed a condition * keep the netagent channel for logs * fixed error * addressed review comments
This commit is contained in:
Родитель
a59a9aa92b
Коммит
2d619b78fe
4
Makefile
4
Makefile
|
@ -78,6 +78,8 @@ CNI_MULTITENANCY_BUILD_DIR = $(BUILD_DIR)/cni-multitenancy
|
|||
CNS_BUILD_DIR = $(BUILD_DIR)/cns
|
||||
NPM_BUILD_DIR = $(BUILD_DIR)/npm
|
||||
NPM_TELEMETRY_DIR = $(NPM_BUILD_DIR)/telemetry
|
||||
CNI_AI_ID = 5515a1eb-b2bc-406a-98eb-ba462e6f0411
|
||||
ACN_PACKAGE_PATH = github.com/Azure/azure-container-networking
|
||||
|
||||
# Containerized build parameters.
|
||||
BUILD_CONTAINER_IMAGE = acn-build
|
||||
|
@ -169,7 +171,7 @@ $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT): $(CNIFILES)
|
|||
|
||||
# Build the Azure CNI telemetry plugin.
|
||||
$(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT): $(CNIFILES)
|
||||
go build -v -o $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) -ldflags "-X main.version=$(VERSION) -s -w" $(CNI_TELEMETRY_DIR)/*.go
|
||||
go build -v -o $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) -ldflags "-X main.version=$(VERSION) -X $(ACN_PACKAGE_PATH)/telemetry.aiMetadata=$(CNI_AI_ID) -s -w" $(CNI_TELEMETRY_DIR)/*.go
|
||||
|
||||
# Build the Azure CNS Service.
|
||||
$(CNS_BUILD_DIR)/azure-cns$(EXE_EXT): $(CNSFILES)
|
||||
|
|
|
@ -28,6 +28,8 @@ type AIConfig struct {
|
|||
BatchInterval int
|
||||
DisableMetadataRefreshThread bool
|
||||
RefreshTimeout int
|
||||
GetEnvRetryCount int
|
||||
GetEnvRetryWaitTimeInSecs int
|
||||
DebugMode bool
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package aitelemetry
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
|
@ -11,14 +12,16 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
resourceGroupStr = "ResourceGroup"
|
||||
vmSizeStr = "VMSize"
|
||||
osVersionStr = "OSVersion"
|
||||
locationStr = "Region"
|
||||
appNameStr = "AppName"
|
||||
subscriptionIDStr = "SubscriptionID"
|
||||
vmNameStr = "VMName"
|
||||
defaultTimeout = 10
|
||||
resourceGroupStr = "ResourceGroup"
|
||||
vmSizeStr = "VMSize"
|
||||
osVersionStr = "OSVersion"
|
||||
locationStr = "Region"
|
||||
appNameStr = "AppName"
|
||||
subscriptionIDStr = "SubscriptionID"
|
||||
vmNameStr = "VMName"
|
||||
versionStr = "AppVersion"
|
||||
azurePublicCloudStr = "AzurePublicCloud"
|
||||
defaultTimeout = 10
|
||||
)
|
||||
|
||||
var debugMode bool
|
||||
|
@ -26,7 +29,7 @@ var debugMode bool
|
|||
func messageListener() appinsights.DiagnosticsMessageListener {
|
||||
if debugMode {
|
||||
return appinsights.NewDiagnosticsMessageListener(func(msg string) error {
|
||||
debuglog("[AppInsights] [%s] %s\n", time.Now().Format(time.UnixDate), msg)
|
||||
debugLog("[AppInsights] [%s] %s\n", time.Now().Format(time.UnixDate), msg)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
@ -34,7 +37,7 @@ func messageListener() appinsights.DiagnosticsMessageListener {
|
|||
return nil
|
||||
}
|
||||
|
||||
func debuglog(format string, args ...interface{}) {
|
||||
func debugLog(format string, args ...interface{}) {
|
||||
if debugMode {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
@ -55,12 +58,12 @@ func getMetadata(th *telemetryHandle) {
|
|||
break
|
||||
}
|
||||
|
||||
debuglog("[AppInsights] Error getting metadata %v. Sleep for %d", err, th.refreshTimeout)
|
||||
debugLog("[AppInsights] Error getting metadata %v. Sleep for %d", err, th.refreshTimeout)
|
||||
time.Sleep(time.Duration(th.refreshTimeout) * time.Second)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
debuglog("[AppInsights] Error getting metadata %v", err)
|
||||
debugLog("[AppInsights] Error getting metadata %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -72,7 +75,7 @@ func getMetadata(th *telemetryHandle) {
|
|||
// Save metadata retrieved from wireserver to a file
|
||||
kvs, err := store.NewJsonFileStore(metadataFile)
|
||||
if err != nil {
|
||||
debuglog("[AppInsights] Error initializing kvs store: %v", err)
|
||||
debugLog("[AppInsights] Error initializing kvs store: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -80,20 +83,55 @@ func getMetadata(th *telemetryHandle) {
|
|||
err = common.SaveHostMetadata(th.metadata, metadataFile)
|
||||
kvs.Unlock(true)
|
||||
if err != nil {
|
||||
debuglog("[AppInsights] saving host metadata failed with :%v", err)
|
||||
debugLog("[AppInsights] saving host metadata failed with :%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func isPublicEnvironment(url string, retryCount, waitTimeInSecs int) (bool, error) {
|
||||
var (
|
||||
cloudName string
|
||||
err error
|
||||
)
|
||||
|
||||
for i := 0; i < retryCount; i++ {
|
||||
cloudName, err = common.GetAzureCloud(url)
|
||||
if cloudName == azurePublicCloudStr {
|
||||
debugLog("[AppInsights] CloudName: %s\n", cloudName)
|
||||
return true, nil
|
||||
} else if err == nil {
|
||||
debugLog("[AppInsights] This is not azure public cloud:%s", cloudName)
|
||||
return false, fmt.Errorf("Not an azure public cloud: %s", cloudName)
|
||||
}
|
||||
|
||||
debugLog("GetAzureCloud returned err :%v", err)
|
||||
time.Sleep(time.Duration(waitTimeInSecs) * time.Second)
|
||||
}
|
||||
|
||||
return false, err
|
||||
}
|
||||
|
||||
// NewAITelemetry creates telemetry handle with user specified appinsights id.
|
||||
func NewAITelemetry(
|
||||
azEnvUrl string,
|
||||
id string,
|
||||
aiConfig AIConfig,
|
||||
) TelemetryHandle {
|
||||
) (TelemetryHandle, error) {
|
||||
debugMode = aiConfig.DebugMode
|
||||
|
||||
if id == "" {
|
||||
debugLog("Empty AI key")
|
||||
return nil, fmt.Errorf("AI key is empty")
|
||||
}
|
||||
|
||||
// check if azure instance is in public cloud
|
||||
isPublic, err := isPublicEnvironment(azEnvUrl, aiConfig.GetEnvRetryCount, aiConfig.GetEnvRetryWaitTimeInSecs)
|
||||
if !isPublic {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
telemetryConfig := appinsights.NewTelemetryConfiguration(id)
|
||||
telemetryConfig.MaxBatchSize = aiConfig.BatchSize
|
||||
telemetryConfig.MaxBatchInterval = time.Duration(aiConfig.BatchInterval) * time.Second
|
||||
debugMode = aiConfig.DebugMode
|
||||
|
||||
th := &telemetryHandle{
|
||||
client: appinsights.NewTelemetryClientFromConfig(telemetryConfig),
|
||||
|
@ -110,7 +148,7 @@ func NewAITelemetry(
|
|||
go getMetadata(th)
|
||||
}
|
||||
|
||||
return th
|
||||
return th, nil
|
||||
}
|
||||
|
||||
// TrackLog function sends report (trace) to appinsights resource. It overrides few of the existing columns with app information
|
||||
|
@ -167,6 +205,7 @@ func (th *telemetryHandle) TrackMetric(metric Metric) {
|
|||
aimetric.Properties[locationStr] = th.metadata.Location
|
||||
aimetric.Properties[subscriptionIDStr] = th.metadata.SubscriptionID
|
||||
aimetric.Properties[vmNameStr] = th.metadata.VMName
|
||||
aimetric.Properties[versionStr] = th.appVersion
|
||||
}
|
||||
|
||||
// copy custom dimensions
|
||||
|
|
|
@ -2,17 +2,33 @@ package aitelemetry
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/Azure/azure-container-networking/common"
|
||||
"github.com/Azure/azure-container-networking/log"
|
||||
"github.com/Azure/azure-container-networking/platform"
|
||||
)
|
||||
|
||||
var th TelemetryHandle
|
||||
var (
|
||||
th TelemetryHandle
|
||||
hostAgentUrl = "localhost:3501"
|
||||
getCloudResponse = "AzurePublicCloud"
|
||||
httpURL = "http://" + hostAgentUrl
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
log.SetLogDirectory("/var/log/")
|
||||
log.SetName("testaitelemetry")
|
||||
log.SetLevel(log.LevelInfo)
|
||||
err := log.SetTarget(log.TargetLogfile)
|
||||
if err == nil {
|
||||
fmt.Printf("TestST LogDir configuration succeeded\n")
|
||||
}
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
platform.ExecuteCommand("cp metadata_test.json /tmp/azuremetadata.json")
|
||||
|
@ -22,6 +38,20 @@ func TestMain(m *testing.M) {
|
|||
platform.ExecuteCommand(cmd)
|
||||
}
|
||||
|
||||
hostu, _ := url.Parse("tcp://" + hostAgentUrl)
|
||||
hostAgent, err := common.NewListener(hostu)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to create agent, err:%v.\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
hostAgent.AddHandler("/", handleGetCloud)
|
||||
err = hostAgent.Start(make(chan error, 1))
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to start agent, err:%v.\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
exitCode := m.Run()
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
|
@ -32,10 +62,17 @@ func TestMain(m *testing.M) {
|
|||
platform.ExecuteCommand(cmd)
|
||||
}
|
||||
|
||||
log.Close()
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func handleGetCloud(w http.ResponseWriter, req *http.Request) {
|
||||
w.Write([]byte(getCloudResponse))
|
||||
}
|
||||
|
||||
func TestEmptyAIKey(t *testing.T) {
|
||||
var err error
|
||||
|
||||
aiConfig := AIConfig{
|
||||
AppName: "testapp",
|
||||
AppVersion: "v1.0.26",
|
||||
|
@ -45,26 +82,29 @@ func TestEmptyAIKey(t *testing.T) {
|
|||
DebugMode: true,
|
||||
DisableMetadataRefreshThread: true,
|
||||
}
|
||||
th := NewAITelemetry("", aiConfig)
|
||||
if th == nil {
|
||||
t.Errorf("Error intializing AI telemetry")
|
||||
_, err = NewAITelemetry(httpURL, "", aiConfig)
|
||||
if err == nil {
|
||||
t.Errorf("Error intializing AI telemetry:%v", err)
|
||||
}
|
||||
th.Close(10)
|
||||
}
|
||||
|
||||
func TestNewAITelemetry(t *testing.T) {
|
||||
var err error
|
||||
|
||||
aiConfig := AIConfig{
|
||||
AppName: "testapp",
|
||||
AppVersion: "v1.0.26",
|
||||
BatchSize: 4096,
|
||||
BatchInterval: 2,
|
||||
RefreshTimeout: 10,
|
||||
GetEnvRetryCount: 1,
|
||||
GetEnvRetryWaitTimeInSecs: 2,
|
||||
DebugMode: true,
|
||||
DisableMetadataRefreshThread: true,
|
||||
}
|
||||
th = NewAITelemetry("00ca2a73-c8d6-4929-a0c2-cf84545ec225", aiConfig)
|
||||
th, err = NewAITelemetry(httpURL, "00ca2a73-c8d6-4929-a0c2-cf84545ec225", aiConfig)
|
||||
if th == nil {
|
||||
t.Errorf("Error intializing AI telemetry")
|
||||
t.Errorf("Error intializing AI telemetry: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,6 +135,8 @@ func TestClose(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestClosewithoutSend(t *testing.T) {
|
||||
var err error
|
||||
|
||||
aiConfig := AIConfig{
|
||||
AppName: "testapp",
|
||||
AppVersion: "v1.0.26",
|
||||
|
@ -102,11 +144,13 @@ func TestClosewithoutSend(t *testing.T) {
|
|||
BatchInterval: 2,
|
||||
DisableMetadataRefreshThread: true,
|
||||
RefreshTimeout: 10,
|
||||
GetEnvRetryCount: 1,
|
||||
GetEnvRetryWaitTimeInSecs: 2,
|
||||
}
|
||||
|
||||
thtest := NewAITelemetry("00ca2a73-c8d6-4929-a0c2-cf84545ec225", aiConfig)
|
||||
thtest, err := NewAITelemetry(httpURL, "00ca2a73-c8d6-4929-a0c2-cf84545ec225", aiConfig)
|
||||
if thtest == nil {
|
||||
t.Errorf("Error intializing AI telemetry")
|
||||
t.Errorf("Error intializing AI telemetry:%v", err)
|
||||
}
|
||||
|
||||
thtest.Close(10)
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-container-networking/aitelemetry"
|
||||
"github.com/Azure/azure-container-networking/cni"
|
||||
"github.com/Azure/azure-container-networking/cns"
|
||||
"github.com/Azure/azure-container-networking/cns/cnsclient"
|
||||
|
@ -61,6 +62,7 @@ type netPlugin struct {
|
|||
*cni.Plugin
|
||||
nm network.NetworkManager
|
||||
report *telemetry.CNIReport
|
||||
tb *telemetry.TelemetryBuffer
|
||||
}
|
||||
|
||||
// snatConfiguration contains a bool that determines whether CNI enables snat on host and snat for dns
|
||||
|
@ -91,8 +93,9 @@ func NewPlugin(name string, config *common.PluginConfig) (*netPlugin, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (plugin *netPlugin) SetCNIReport(report *telemetry.CNIReport) {
|
||||
func (plugin *netPlugin) SetCNIReport(report *telemetry.CNIReport, tb *telemetry.TelemetryBuffer) {
|
||||
plugin.report = report
|
||||
plugin.tb = tb
|
||||
}
|
||||
|
||||
// Starts the plugin.
|
||||
|
@ -188,6 +191,29 @@ func (plugin *netPlugin) getPodInfo(args string) (string, string, error) {
|
|||
return k8sPodName, k8sNamespace, nil
|
||||
}
|
||||
|
||||
func SetCustomDimensions(cniMetric *telemetry.AIMetric, nwCfg *cni.NetworkConfig, err error) {
|
||||
if cniMetric == nil {
|
||||
log.Errorf("[CNI] Unable to set custom dimension. Report is nil")
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cniMetric.Metric.CustomDimensions[telemetry.StatusStr] = telemetry.FailedStr
|
||||
} else {
|
||||
cniMetric.Metric.CustomDimensions[telemetry.StatusStr] = telemetry.SucceededStr
|
||||
}
|
||||
|
||||
if nwCfg != nil {
|
||||
if nwCfg.MultiTenancy {
|
||||
cniMetric.Metric.CustomDimensions[telemetry.CNIModeStr] = telemetry.MultiTenancyStr
|
||||
} else {
|
||||
cniMetric.Metric.CustomDimensions[telemetry.CNIModeStr] = telemetry.SingleTenancyStr
|
||||
}
|
||||
|
||||
cniMetric.Metric.CustomDimensions[telemetry.CNINetworkModeStr] = nwCfg.Mode
|
||||
}
|
||||
}
|
||||
|
||||
func (plugin *netPlugin) setCNIReportDetails(nwCfg *cni.NetworkConfig, opType string, msg string) {
|
||||
if nwCfg.MultiTenancy {
|
||||
plugin.report.Context = "AzureCNIMultitenancy"
|
||||
|
@ -220,8 +246,11 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error {
|
|||
enableInfraVnet bool
|
||||
enableSnatForDns bool
|
||||
nwDNSInfo network.DNSInfo
|
||||
cniMetric telemetry.AIMetric
|
||||
)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
log.Printf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v StdinData:%s}.",
|
||||
args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)
|
||||
|
||||
|
@ -245,6 +274,15 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error {
|
|||
plugin.setCNIReportDetails(nwCfg, CNI_ADD, "")
|
||||
|
||||
defer func() {
|
||||
operationTimeMs := time.Since(startTime).Milliseconds()
|
||||
cniMetric.Metric = aitelemetry.Metric{
|
||||
Name: telemetry.CNIAddTimeMetricStr,
|
||||
Value: float64(operationTimeMs),
|
||||
CustomDimensions: make(map[string]string),
|
||||
}
|
||||
SetCustomDimensions(&cniMetric, nwCfg, err)
|
||||
telemetry.SendCNIMetric(&cniMetric, plugin.tb)
|
||||
|
||||
// Add Interfaces to result.
|
||||
if result == nil {
|
||||
result = &cniTypesCurr.Result{}
|
||||
|
@ -255,9 +293,7 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error {
|
|||
}
|
||||
|
||||
result.Interfaces = append(result.Interfaces, iface)
|
||||
|
||||
addSnatInterface(nwCfg, result)
|
||||
|
||||
// Convert result to the requested CNI version.
|
||||
res, vererr := result.GetAsVersion(nwCfg.CNIVersion)
|
||||
if vererr != nil {
|
||||
|
@ -279,6 +315,8 @@ func (plugin *netPlugin) Add(args *cniSkel.CmdArgs) error {
|
|||
return err
|
||||
}
|
||||
|
||||
plugin.report.ContainerName = k8sPodName + ":" + k8sNamespace
|
||||
|
||||
if nwCfg.MultiTenancy {
|
||||
// Initialize CNSClient
|
||||
cnsclient.InitCnsClient(nwCfg.CNSUrl)
|
||||
|
@ -655,8 +693,11 @@ func (plugin *netPlugin) Delete(args *cniSkel.CmdArgs) error {
|
|||
networkId string
|
||||
nwInfo *network.NetworkInfo
|
||||
epInfo *network.EndpointInfo
|
||||
cniMetric telemetry.AIMetric
|
||||
)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
log.Printf("[cni-net] Processing DEL command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v, StdinData:%s}.",
|
||||
args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)
|
||||
|
||||
|
@ -736,6 +777,15 @@ func (plugin *netPlugin) Delete(args *cniSkel.CmdArgs) error {
|
|||
msg := fmt.Sprintf("CNI DEL succeeded : Released ip %+v podname %v namespace %v", nwCfg.Ipam.Address, k8sPodName, k8sNamespace)
|
||||
plugin.setCNIReportDetails(nwCfg, CNI_DEL, msg)
|
||||
|
||||
operationTimeMs := time.Since(startTime).Milliseconds()
|
||||
cniMetric.Metric = aitelemetry.Metric{
|
||||
Name: telemetry.CNIDelTimeMetricStr,
|
||||
Value: float64(operationTimeMs),
|
||||
CustomDimensions: make(map[string]string),
|
||||
}
|
||||
SetCustomDimensions(&cniMetric, nwCfg, nil)
|
||||
telemetry.SendCNIMetric(&cniMetric, plugin.tb)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -751,8 +801,11 @@ func (plugin *netPlugin) Update(args *cniSkel.CmdArgs) error {
|
|||
cnsClient *cnsclient.CNSClient
|
||||
orchestratorContext []byte
|
||||
targetNetworkConfig *cns.GetNetworkContainerResponse
|
||||
cniMetric telemetry.AIMetric
|
||||
)
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
log.Printf("[cni-net] Processing UPDATE command with args {Netns:%v Args:%v Path:%v}.",
|
||||
args.Netns, args.Args, args.Path)
|
||||
|
||||
|
@ -768,6 +821,15 @@ func (plugin *netPlugin) Update(args *cniSkel.CmdArgs) error {
|
|||
plugin.setCNIReportDetails(nwCfg, CNI_UPDATE, "")
|
||||
|
||||
defer func() {
|
||||
operationTimeMs := time.Since(startTime).Milliseconds()
|
||||
cniMetric.Metric = aitelemetry.Metric{
|
||||
Name: telemetry.CNIUpdateTimeMetricStr,
|
||||
Value: float64(operationTimeMs),
|
||||
CustomDimensions: make(map[string]string),
|
||||
}
|
||||
SetCustomDimensions(&cniMetric, nwCfg, err)
|
||||
telemetry.SendCNIMetric(&cniMetric, plugin.tb)
|
||||
|
||||
if result == nil {
|
||||
result = &cniTypesCurr.Result{}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"reflect"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-container-networking/aitelemetry"
|
||||
"github.com/Azure/azure-container-networking/cni"
|
||||
"github.com/Azure/azure-container-networking/cni/network"
|
||||
"github.com/Azure/azure-container-networking/common"
|
||||
|
@ -126,6 +127,8 @@ func handleIfCniUpdate(update func(*skel.CmdArgs) error) (bool, error) {
|
|||
// Main is the entry point for CNI network plugin.
|
||||
func main() {
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
// Initialize and parse command line arguments.
|
||||
acn.ParseArgs(&args, printVersion)
|
||||
vers := acn.GetArg(acn.OptVersion).(bool)
|
||||
|
@ -136,8 +139,9 @@ func main() {
|
|||
}
|
||||
|
||||
var (
|
||||
config common.PluginConfig
|
||||
err error
|
||||
config common.PluginConfig
|
||||
err error
|
||||
cnimetric telemetry.AIMetric
|
||||
)
|
||||
|
||||
log.SetName(name)
|
||||
|
@ -169,7 +173,6 @@ func main() {
|
|||
}
|
||||
|
||||
cniReport.GetReport(pluginName, version, ipamQueryURL)
|
||||
startTime := time.Now().UnixNano() / int64(time.Millisecond)
|
||||
|
||||
netPlugin, err := network.NewPlugin(name, &config)
|
||||
if err != nil {
|
||||
|
@ -177,8 +180,6 @@ func main() {
|
|||
return
|
||||
}
|
||||
|
||||
netPlugin.SetCNIReport(cniReport)
|
||||
|
||||
// CNI Acquires lock
|
||||
if err = netPlugin.Plugin.InitializeKeyValueStore(&config); err != nil {
|
||||
log.Errorf("Failed to initialize key-value store of network plugin, err:%v.\n", err)
|
||||
|
@ -214,6 +215,8 @@ func main() {
|
|||
tb.ConnectToTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds)
|
||||
defer tb.Close()
|
||||
|
||||
netPlugin.SetCNIReport(cniReport, tb)
|
||||
|
||||
t := time.Now()
|
||||
cniReport.Timestamp = t.Format("2006-01-02 15:04:05")
|
||||
|
||||
|
@ -230,9 +233,6 @@ func main() {
|
|||
log.Errorf("Failed to execute network plugin, err:%v.\n", err)
|
||||
}
|
||||
|
||||
endTime := time.Now().UnixNano() / int64(time.Millisecond)
|
||||
reflect.ValueOf(reportManager.Report).Elem().FieldByName("OperationDuration").SetInt(int64(endTime - startTime))
|
||||
|
||||
netPlugin.Stop()
|
||||
|
||||
// release cni lock
|
||||
|
@ -240,6 +240,15 @@ func main() {
|
|||
log.Errorf("Failed to uninitialize key-value store of network plugin, err:%v.\n", errUninit)
|
||||
}
|
||||
|
||||
executionTimeMs := time.Since(startTime).Milliseconds()
|
||||
cnimetric.Metric = aitelemetry.Metric{
|
||||
Name: telemetry.CNIExecutimeMetricStr,
|
||||
Value: float64(executionTimeMs),
|
||||
CustomDimensions: make(map[string]string),
|
||||
}
|
||||
network.SetCustomDimensions(&cnimetric, nil, err)
|
||||
telemetry.SendCNIMetric(&cnimetric, tb)
|
||||
|
||||
if err != nil {
|
||||
reportPluginError(reportManager, tb, err)
|
||||
panic("network plugin execute fatal error")
|
||||
|
@ -247,6 +256,7 @@ func main() {
|
|||
|
||||
// Report CNI successfully finished execution.
|
||||
reflect.ValueOf(reportManager.Report).Elem().FieldByName("CniSucceeded").SetBool(true)
|
||||
reflect.ValueOf(reportManager.Report).Elem().FieldByName("OperationDuration").SetInt(executionTimeMs)
|
||||
|
||||
if err = reportManager.SendReport(tb); err != nil {
|
||||
log.Errorf("SendReport failed due to %v", err)
|
||||
|
|
|
@ -8,15 +8,22 @@ import (
|
|||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-container-networking/aitelemetry"
|
||||
acn "github.com/Azure/azure-container-networking/common"
|
||||
"github.com/Azure/azure-container-networking/log"
|
||||
"github.com/Azure/azure-container-networking/telemetry"
|
||||
)
|
||||
|
||||
const (
|
||||
reportToHostIntervalInSeconds = 30
|
||||
azureVnetTelemetry = "azure-vnet-telemetry"
|
||||
configExtension = ".config"
|
||||
defaultReportToHostIntervalInSecs = 30
|
||||
defaultRefreshTimeoutInSecs = 15
|
||||
defaultBatchSizeInBytes = 16384
|
||||
defaultBatchIntervalInSecs = 15
|
||||
defaultGetEnvRetryCount = 2
|
||||
defaultGetEnvRetryWaitTimeInSecs = 3
|
||||
pluginName = "AzureCNI"
|
||||
azureVnetTelemetry = "azure-vnet-telemetry"
|
||||
configExtension = ".config"
|
||||
)
|
||||
|
||||
var version string
|
||||
|
@ -76,6 +83,32 @@ func printVersion() {
|
|||
fmt.Printf("Version %v\n", version)
|
||||
}
|
||||
|
||||
func setTelemetryDefaults(config *telemetry.TelemetryConfig) {
|
||||
if config.ReportToHostIntervalInSeconds == 0 {
|
||||
config.ReportToHostIntervalInSeconds = defaultReportToHostIntervalInSecs
|
||||
}
|
||||
|
||||
if config.RefreshTimeoutInSecs == 0 {
|
||||
config.RefreshTimeoutInSecs = defaultRefreshTimeoutInSecs
|
||||
}
|
||||
|
||||
if config.BatchIntervalInSecs == 0 {
|
||||
config.BatchIntervalInSecs = defaultBatchIntervalInSecs
|
||||
}
|
||||
|
||||
if config.BatchSizeInBytes == 0 {
|
||||
config.BatchSizeInBytes = defaultBatchSizeInBytes
|
||||
}
|
||||
|
||||
if config.GetEnvRetryCount == 0 {
|
||||
config.GetEnvRetryCount = defaultGetEnvRetryCount
|
||||
}
|
||||
|
||||
if config.GetEnvRetryWaitTimeInSecs == 0 {
|
||||
config.GetEnvRetryWaitTimeInSecs = defaultGetEnvRetryWaitTimeInSecs
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var tb *telemetry.TelemetryBuffer
|
||||
var config telemetry.TelemetryConfig
|
||||
|
@ -123,6 +156,10 @@ func main() {
|
|||
|
||||
log.Logf("read config returned %+v", config)
|
||||
|
||||
setTelemetryDefaults(&config)
|
||||
|
||||
log.Logf("Config after setting defaults %+v", config)
|
||||
|
||||
// Cleaning up orphan socket if present
|
||||
tbtemp := telemetry.NewTelemetryBuffer("")
|
||||
tbtemp.Cleanup(telemetry.FdName)
|
||||
|
@ -131,7 +168,7 @@ func main() {
|
|||
tb = telemetry.NewTelemetryBuffer("")
|
||||
|
||||
log.Logf("[Telemetry] Starting telemetry server")
|
||||
err = tb.StartServer()
|
||||
err = tb.StartServer(config.DisableTelemetryToNetAgent)
|
||||
if err == nil || tb.FdExists {
|
||||
break
|
||||
}
|
||||
|
@ -141,11 +178,23 @@ func main() {
|
|||
time.Sleep(time.Millisecond * 200)
|
||||
}
|
||||
|
||||
if config.ReportToHostIntervalInSeconds == 0 {
|
||||
config.ReportToHostIntervalInSeconds = reportToHostIntervalInSeconds
|
||||
aiConfig := aitelemetry.AIConfig{
|
||||
AppName: pluginName,
|
||||
AppVersion: version,
|
||||
BatchSize: config.BatchSizeInBytes,
|
||||
BatchInterval: config.BatchIntervalInSecs,
|
||||
RefreshTimeout: config.RefreshTimeoutInSecs,
|
||||
DisableMetadataRefreshThread: config.DisableMetadataThread,
|
||||
DebugMode: config.DebugMode,
|
||||
GetEnvRetryCount: config.GetEnvRetryCount,
|
||||
GetEnvRetryWaitTimeInSecs: config.GetEnvRetryWaitTimeInSecs,
|
||||
}
|
||||
|
||||
err = telemetry.CreateAITelemetryHandle(aiConfig, config.DisableAll, config.DisableTrace, config.DisableMetric)
|
||||
log.Printf("[Telemetry] AI Handle creation status:%v", err)
|
||||
log.Logf("[Telemetry] Report to host for an interval of %d seconds", config.ReportToHostIntervalInSeconds)
|
||||
tb.BufferAndPushData(config.ReportToHostIntervalInSeconds * time.Second)
|
||||
telemetry.CloseAITelemetryHandle()
|
||||
|
||||
log.Close()
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-container-networking/log"
|
||||
|
@ -21,8 +22,9 @@ import (
|
|||
|
||||
const (
|
||||
metadataURL = "http://169.254.169.254/metadata/instance?api-version=2017-08-01&format=json"
|
||||
httpConnectionTimeout = 10
|
||||
headerTimeout = 20
|
||||
azCloudUrl = "http://169.254.169.254/metadata/instance/compute/azEnvironment?api-version=2018-10-01&format=text"
|
||||
httpConnectionTimeout = 7
|
||||
headerTimeout = 7
|
||||
)
|
||||
|
||||
// XmlDocument - Azure host agent XML document format.
|
||||
|
@ -288,3 +290,36 @@ func SaveHostMetadata(metadata Metadata, fileName string) error {
|
|||
|
||||
return err
|
||||
}
|
||||
|
||||
func GetAzureCloud(url string) (string, error) {
|
||||
if url == "" {
|
||||
url = azCloudUrl
|
||||
}
|
||||
|
||||
log.Printf("GetAzureCloud querying url: %s", url)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
req.Header.Set("Metadata", "True")
|
||||
|
||||
client := InitHttpClient(httpConnectionTimeout, headerTimeout)
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("Bad http status:%v", resp.Status)
|
||||
}
|
||||
|
||||
bodyBytes, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(bodyBytes)), nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright Microsoft. All rights reserved.
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"github.com/Azure/azure-container-networking/aitelemetry"
|
||||
"github.com/Azure/azure-container-networking/log"
|
||||
)
|
||||
|
||||
var (
|
||||
aiMetadata string
|
||||
th aitelemetry.TelemetryHandle
|
||||
gDisableTrace bool
|
||||
gDisableMetric bool
|
||||
)
|
||||
|
||||
const (
|
||||
// Wait time for AI to gracefully close AI telemetry session
|
||||
waitTimeInSecs = 10
|
||||
)
|
||||
|
||||
func CreateAITelemetryHandle(aiConfig aitelemetry.AIConfig, disableAll, disableMetric, disableTrace bool) error {
|
||||
var err error
|
||||
|
||||
if disableAll {
|
||||
log.Printf("Telemetry is disabled")
|
||||
return fmt.Errorf("Telmetry disabled")
|
||||
}
|
||||
|
||||
th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
gDisableMetric = disableMetric
|
||||
gDisableTrace = disableTrace
|
||||
return nil
|
||||
}
|
||||
|
||||
func SendAITelemetry(cnireport CNIReport) {
|
||||
if th == nil || gDisableTrace {
|
||||
return
|
||||
}
|
||||
|
||||
var msg string
|
||||
if cnireport.ErrorMessage != "" {
|
||||
msg = cnireport.ErrorMessage
|
||||
} else {
|
||||
msg = cnireport.EventMessage
|
||||
}
|
||||
|
||||
report := aitelemetry.Report{
|
||||
Message: "CNI:" + msg,
|
||||
Context: cnireport.ContainerName,
|
||||
CustomDimensions: make(map[string]string),
|
||||
}
|
||||
|
||||
report.CustomDimensions[ContextStr] = cnireport.Context
|
||||
report.CustomDimensions[SubContextStr] = cnireport.SubContext
|
||||
report.CustomDimensions[VMUptimeStr] = cnireport.VMUptime
|
||||
report.CustomDimensions[OperationTypeStr] = cnireport.OperationType
|
||||
report.CustomDimensions[VersionStr] = cnireport.Version
|
||||
|
||||
th.TrackLog(report)
|
||||
}
|
||||
|
||||
func SendAIMetric(aiMetric AIMetric) {
|
||||
if th == nil || gDisableMetric {
|
||||
return
|
||||
}
|
||||
|
||||
aiMetric.Metric.CustomDimensions[OSTypeStr] = runtime.GOOS
|
||||
th.TrackMetric(aiMetric.Metric)
|
||||
}
|
||||
|
||||
func CloseAITelemetryHandle() {
|
||||
if th != nil {
|
||||
th.Close(waitTimeInSecs)
|
||||
}
|
||||
}
|
|
@ -1,3 +1,8 @@
|
|||
{
|
||||
"reportToHostIntervalInSeconds": 30
|
||||
}
|
||||
"reportToHostIntervalInSeconds": 30,
|
||||
"BatchSizeInBytes":16384,
|
||||
"BatchIntervalInSecs":15,
|
||||
"RefreshTimeoutInSecs": 15,
|
||||
"DisableAll": false,
|
||||
"DebugMode":false
|
||||
}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright Microsoft. All rights reserved.
|
||||
|
||||
package telemetry
|
||||
|
||||
const (
|
||||
|
||||
// Metric Names
|
||||
CNIExecutimeMetricStr = "CNIExecutionTimeMs"
|
||||
CNIAddTimeMetricStr = "CNIAddTimeMs"
|
||||
CNIDelTimeMetricStr = "CNIDelTimeMs"
|
||||
CNIUpdateTimeMetricStr = "CNIUpdateTimeMs"
|
||||
|
||||
// Dimension Names
|
||||
ContextStr = "Context"
|
||||
SubContextStr = "SubContext"
|
||||
VMUptimeStr = "VMUptime"
|
||||
OperationTypeStr = "OperationType"
|
||||
VersionStr = "Version"
|
||||
StatusStr = "Status"
|
||||
CNIModeStr = "CNIMode"
|
||||
CNINetworkModeStr = "CNINetworkMode"
|
||||
OSTypeStr = "OSType"
|
||||
|
||||
// Values
|
||||
SucceededStr = "Succeeded"
|
||||
FailedStr = "Failed"
|
||||
SingleTenancyStr = "SingleTenancy"
|
||||
MultiTenancyStr = "MultiTenancy"
|
||||
)
|
|
@ -14,6 +14,7 @@ import (
|
|||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/Azure/azure-container-networking/aitelemetry"
|
||||
"github.com/Azure/azure-container-networking/common"
|
||||
"github.com/Azure/azure-container-networking/log"
|
||||
"github.com/Azure/azure-container-networking/platform"
|
||||
|
@ -100,6 +101,10 @@ type CNIReport struct {
|
|||
Metadata common.Metadata `json:"compute"`
|
||||
}
|
||||
|
||||
type AIMetric struct {
|
||||
Metric aitelemetry.Metric
|
||||
}
|
||||
|
||||
// Azure CNS Telemetry Report structure.
|
||||
type CNSReport struct {
|
||||
IsNewInstance bool
|
||||
|
@ -372,6 +377,7 @@ func (reportMgr *ReportManager) ReportToBytes() ([]byte, error) {
|
|||
case *NPMReport:
|
||||
case *DNCReport:
|
||||
case *CNSReport:
|
||||
case *AIMetric:
|
||||
default:
|
||||
err = fmt.Errorf("[Telemetry] Invalid report type")
|
||||
}
|
||||
|
@ -383,3 +389,24 @@ func (reportMgr *ReportManager) ReportToBytes() ([]byte, error) {
|
|||
report, err = json.Marshal(reportMgr.Report)
|
||||
return report, err
|
||||
}
|
||||
|
||||
// This function for sending CNI metrics to telemetry service
|
||||
func SendCNIMetric(cniMetric *AIMetric, tb *TelemetryBuffer) error {
|
||||
var (
|
||||
err error
|
||||
report []byte
|
||||
)
|
||||
|
||||
if tb != nil && tb.Connected {
|
||||
reportMgr := &ReportManager{Report: cniMetric}
|
||||
report, err = reportMgr.ReportToBytes()
|
||||
if err == nil {
|
||||
// If write fails, try to re-establish connections as server/client
|
||||
if _, err = tb.Write(report); err != nil {
|
||||
tb.Cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ func TestMain(m *testing.M) {
|
|||
reportManager.Report = &CNIReport{}
|
||||
|
||||
tb = NewTelemetryBuffer(hostAgentUrl)
|
||||
err = tb.StartServer()
|
||||
err = tb.StartServer(false)
|
||||
if err == nil {
|
||||
go tb.BufferAndPushData(0)
|
||||
}
|
||||
|
@ -186,13 +186,6 @@ func TestSendTelemetry(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestReceiveTelemetryData(t *testing.T) {
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
if len(tb.buffer.CNIReports) != 1 {
|
||||
t.Errorf("buffer doesn't contain CNI report")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloseTelemetryConnection(t *testing.T) {
|
||||
tb.Cancel()
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
|
@ -204,7 +197,7 @@ func TestCloseTelemetryConnection(t *testing.T) {
|
|||
func TestServerCloseTelemetryConnection(t *testing.T) {
|
||||
// create server telemetrybuffer and start server
|
||||
tb = NewTelemetryBuffer(hostAgentUrl)
|
||||
err := tb.StartServer()
|
||||
err := tb.StartServer(false)
|
||||
if err == nil {
|
||||
go tb.BufferAndPushData(0)
|
||||
}
|
||||
|
@ -235,7 +228,7 @@ func TestServerCloseTelemetryConnection(t *testing.T) {
|
|||
func TestClientCloseTelemetryConnection(t *testing.T) {
|
||||
// create server telemetrybuffer and start server
|
||||
tb = NewTelemetryBuffer(hostAgentUrl)
|
||||
err := tb.StartServer()
|
||||
err := tb.StartServer(false)
|
||||
if err == nil {
|
||||
go tb.BufferAndPushData(0)
|
||||
}
|
||||
|
|
|
@ -28,6 +28,17 @@ import (
|
|||
// TelemetryConfig - telemetry config read by telemetry service
|
||||
type TelemetryConfig struct {
|
||||
ReportToHostIntervalInSeconds time.Duration `json:"reportToHostIntervalInSeconds"`
|
||||
DisableAll bool
|
||||
DisableTrace bool
|
||||
DisableMetric bool
|
||||
DisableMetadataThread bool
|
||||
DebugMode bool
|
||||
DisableTelemetryToNetAgent bool
|
||||
RefreshTimeoutInSecs int
|
||||
BatchIntervalInSecs int
|
||||
BatchSizeInBytes int
|
||||
GetEnvRetryCount int
|
||||
GetEnvRetryWaitTimeInSecs int
|
||||
}
|
||||
|
||||
// FdName - file descriptor name
|
||||
|
@ -50,7 +61,10 @@ const (
|
|||
cni = "CNI"
|
||||
)
|
||||
|
||||
var payloadSize uint16 = 0
|
||||
var (
|
||||
payloadSize uint16 = 0
|
||||
disableTelemetryToNetAgent bool
|
||||
)
|
||||
|
||||
// TelemetryBuffer object
|
||||
type TelemetryBuffer struct {
|
||||
|
@ -104,7 +118,8 @@ func remove(s []net.Conn, i int) []net.Conn {
|
|||
}
|
||||
|
||||
// Starts Telemetry server listening on unix domain socket
|
||||
func (tb *TelemetryBuffer) StartServer() error {
|
||||
func (tb *TelemetryBuffer) StartServer(disableNetAgentChannel bool) error {
|
||||
disableTelemetryToNetAgent = disableNetAgentChannel
|
||||
err := tb.Listen(FdName)
|
||||
if err != nil {
|
||||
tb.FdExists = strings.Contains(err.Error(), "in use") || strings.Contains(err.Error(), "Access is denied")
|
||||
|
@ -136,6 +151,10 @@ func (tb *TelemetryBuffer) StartServer() error {
|
|||
var cniReport CNIReport
|
||||
json.Unmarshal([]byte(reportStr), &cniReport)
|
||||
tb.data <- cniReport
|
||||
} else if _, ok := tmp["Metric"]; ok {
|
||||
var aiMetric AIMetric
|
||||
json.Unmarshal([]byte(reportStr), &aiMetric)
|
||||
tb.data <- aiMetric
|
||||
} else if _, ok := tmp["Allocations"]; ok {
|
||||
var dncReport DNCReport
|
||||
json.Unmarshal([]byte(reportStr), &dncReport)
|
||||
|
@ -279,6 +298,10 @@ func (tb *TelemetryBuffer) Close() {
|
|||
|
||||
// sendToHost - send buffer to host
|
||||
func (tb *TelemetryBuffer) sendToHost() error {
|
||||
if disableTelemetryToNetAgent {
|
||||
return nil
|
||||
}
|
||||
|
||||
buf := Buffer{
|
||||
DNCReports: make([]DNCReport, 0),
|
||||
CNIReports: make([]CNIReport, 0),
|
||||
|
@ -443,7 +466,13 @@ func (buf *Buffer) push(x interface{}) {
|
|||
}
|
||||
cniReport := x.(CNIReport)
|
||||
cniReport.Metadata = metadata
|
||||
SendAITelemetry(cniReport)
|
||||
buf.CNIReports = append(buf.CNIReports, cniReport)
|
||||
|
||||
case AIMetric:
|
||||
aiMetric := x.(AIMetric)
|
||||
SendAIMetric(aiMetric)
|
||||
|
||||
case NPMReport:
|
||||
if len(buf.NPMReports) >= MaxNumReports {
|
||||
return
|
||||
|
|
Загрузка…
Ссылка в новой задаче