Update metrics name and refactor (#451)

* Update metrics name and refactor

* update go mod

* add imds op views and update buckets

* update measurement

* update metrics readme

* update bucket size

* add unit tests
This commit is contained in:
Anish Ramasekar 2019-12-09 23:08:04 -08:00 коммит произвёл Krishnakumar R
Родитель f6130135fc
Коммит b64f812db1
18 изменённых файлов: 325 добавлений и 250 удалений

Просмотреть файл

@ -125,11 +125,10 @@ func main() {
probes.InitAndStart(httpProbePort, &micClient.SyncLoopStarted, &mic.Log{})
// Register and expose metrics views
metricErr := metrics.RegisterAndExport(prometheusPort, &mic.Log{})
if metricErr != nil {
glog.Fatalf("Could not register and export metrics: %+v", metricErr)
if err = metrics.RegisterAndExport(prometheusPort, &mic.Log{}); err != nil {
glog.Fatalf("Could not register and export metrics: %+v", err)
}
// Starts the leader election loop
micClient.Run()
glog.Info("AAD Pod identity controller initialized!!")

Просмотреть файл

@ -99,10 +99,8 @@ func main() {
probes.InitAndStart(*httpProbePort, &s.Initialized, logger)
// Register and expose metrics views
metricErr := metrics.RegisterAndExport(*prometheusPort, logger)
if metricErr != nil {
log.Fatalf("Could not register and export metrics: %+v", metricErr)
if err = metrics.RegisterAndExport(*prometheusPort, logger); err != nil {
log.Fatalf("Could not register and export metrics: %+v", err)
}
if err := s.Run(); err != nil {

Просмотреть файл

@ -33,21 +33,21 @@ Histogram that tracks the duration (in seconds) it takes for Assigned identity d
Counter that tracks the cumulative number of assigned identity deletion operations.
**5. aadpodidentity_nodemanagedidentity_operations_duration_seconds**
**5. aadpodidentity_nmi_operations_duration_seconds**
Histogram that tracks the latency (in seconds) of Node Managed Identity operations to complete. Broken down by operation type, status code.
Histogram that tracks the latency (in seconds) of NMI operations to complete. Broken down by operation type, status code.
**6. aadpodidentity_managedidentitycontroller_cycle_duration_seconds**
**6. aadpodidentity_mic_cycle_duration_seconds**
Histogram that tracks the duration (in seconds) it takes for a single cycle in Managed Identity Controller.
Histogram that tracks the duration (in seconds) it takes for a single cycle in MIC.
**7. aadpodidentity_managedidentitycontroller_cycle_count**
**7. aadpodidentity_mic_cycle_count**
Counter that tracks the number of cycles executed in Managed Identity Controller.
Counter that tracks the number of cycles executed in MIC.
**8. aadpodidentity_managedidentitycontroller_new_leader_election_count**
**8. aadpodidentity_mic_new_leader_election_count**
Counter that tracks the cumulative number of new leader election in Managed Identity Controller.
Counter that tracks the cumulative number of new leader election in MIC.
**9. aadpodidentity_cloud_provider_operations_errors_count**
@ -60,3 +60,11 @@ Histogram that tracks the duration (in seconds) it takes for cloud provider oper
**11. aadpodidentity_kubernetes_api_operations_errors_count**
Counter that tracks the cumulative number of kubernetes api operations errors. Broken down by operation type.
**12. aadpodidentity_imds_operations_errors_count**
Counter that tracks the cumulative number of imds token operation errors. Broken down by operation type.
**13. aadpodidentity_imds_operations_duration_seconds**
Histogram that tracks the duration (in seconds) it takes for imds token operations. Broken down by operation type.

4
go.mod
Просмотреть файл

@ -6,7 +6,6 @@ require (
contrib.go.opencensus.io/exporter/ocagent v0.5.0 // indirect
contrib.go.opencensus.io/exporter/prometheus v0.1.0
github.com/Azure/azure-sdk-for-go v31.0.0+incompatible
github.com/Azure/go-autorest v12.2.0+incompatible
github.com/Azure/go-autorest/autorest v0.3.0
github.com/Azure/go-autorest/autorest/adal v0.1.0
github.com/Azure/go-autorest/autorest/azure/auth v0.1.0
@ -27,14 +26,11 @@ require (
github.com/onsi/ginkgo v1.7.0
github.com/onsi/gomega v1.4.3
github.com/pkg/errors v0.8.0
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 // indirect
github.com/sirupsen/logrus v1.2.0
github.com/spf13/pflag v1.0.1
go.opencensus.io v0.22.0
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect
gopkg.in/airbrake/gobrake.v2 v2.0.9 // indirect
gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.2.1
k8s.io/api v0.0.0-20180501062503-590a9173e3b6

25
go.sum
Просмотреть файл

@ -6,16 +6,8 @@ contrib.go.opencensus.io/exporter/ocagent v0.5.0 h1:TKXjQSRS0/cCDrP7KvkgU6SmILtF
contrib.go.opencensus.io/exporter/ocagent v0.5.0/go.mod h1:ImxhfLRpxoYiSq891pBrLVhN+qmP8BTVvdH2YLs7Gl0=
contrib.go.opencensus.io/exporter/prometheus v0.1.0 h1:SByaIoWwNgMdPSgl5sMqM2KDE5H/ukPWBRo314xiDvg=
contrib.go.opencensus.io/exporter/prometheus v0.1.0/go.mod h1:cGFniUXGZlKRjzOyuZJ6mgB+PgBcCIa79kEKR8YCW+A=
github.com/Azure/azure-sdk-for-go v16.2.1+incompatible h1:KnPIugL51v3N3WwvaSmZbxukD1WuWXOiE9fRdu32f2I=
github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/azure-sdk-for-go v31.0.0+incompatible h1:18nT+M3yxnWcO66yoJyomlCoKMu578UHh0DjJBA5c1M=
github.com/Azure/azure-sdk-for-go v31.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/go-autorest v10.8.1+incompatible h1:u0jVQf+a6k6x8A+sT60l6EY9XZu+kHdnZVPAYqpVRo0=
github.com/Azure/go-autorest v10.8.1+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest v11.4.0+incompatible h1:z3Yr6KYqs0nhSNwqGXEBpWK977hxVqsLv2n9PVYcixY=
github.com/Azure/go-autorest v11.4.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest v12.2.0+incompatible h1:2Fxszbg492oAJrcvJlgyVaTqnQYRkxmEK6VPCLLVpBI=
github.com/Azure/go-autorest v12.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest/autorest v0.1.0/go.mod h1:AKyIcETwSUFxIcs/Wnq/C+kwCtlEYGUVd7FPNb2slmg=
github.com/Azure/go-autorest/autorest v0.3.0 h1:yOmXNB2qa2Kx40wMZB19YyafzjCHacXPk8u0neqa+M0=
github.com/Azure/go-autorest/autorest v0.3.0/go.mod h1:AKyIcETwSUFxIcs/Wnq/C+kwCtlEYGUVd7FPNb2slmg=
@ -56,8 +48,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dimchansky/utfbom v0.0.0-20170328061312-6c6132ff69f0 h1:ef7gXyTQd1fTBuM2Y8XjpeMIquTI/vN7uaXZABxfyE4=
github.com/dimchansky/utfbom v0.0.0-20170328061312-6c6132ff69f0/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4=
github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
@ -70,8 +60,6 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.0.0 h1:2jyBKDKU/8v3v2xVR2PtiWQviFUyiaGk2rpfyFT8rTM=
github.com/gogo/protobuf v1.0.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.0 h1:xU6/SpYbvkNYiptHJYEDRseDLvYE7wSqhYYNy0QSUzI=
github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@ -98,8 +86,6 @@ github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/grpc-ecosystem/grpc-gateway v1.8.5 h1:2+KSC78XiO6Qy0hIjfc1OD9H+hsaJdJlb8Kqsd41CTE=
github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
github.com/hashicorp/golang-lru v0.0.0-20180201235237-0fb14efe8c47 h1:UnszMmmmm5vLwWzDjTFVIkfhvWF1NdrmChl8L2NUDCw=
github.com/hashicorp/golang-lru v0.0.0-20180201235237-0fb14efe8c47/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=
@ -138,8 +124,6 @@ github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.4.2 h1:3mYCb7aPxS/RU7TI1y4rkEn1oKmPRjNJLNEXgw7MH2I=
github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw=
@ -164,8 +148,6 @@ github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1 h1:/K3IL0Z1quvmJ
github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/sirupsen/logrus v1.0.5 h1:8c8b5uO0zS4X6RPl/sd1ENwSkIc0/H2PaHxE3udaE8I=
github.com/sirupsen/logrus v1.0.5/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/spf13/pflag v1.0.1 h1:aCvUg6QPl3ibpQUxyLkrEkCHtPqYJL4x9AuhqVqFis4=
@ -181,8 +163,6 @@ go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0 h1:C9hSCOW830chIVkdja34wa6Ky+IzWllkUinR+BtRZd4=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
golang.org/x/crypto v0.0.0-20180505025534-4ec37c66abab h1:w4c/LoOA2vE8SYwh8wEEQVRUwpph7TtcjH7AtZvOjy0=
golang.org/x/crypto v0.0.0-20180505025534-4ec37c66abab/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793 h1:u+LnwYTOOW7Ukr/fppxEb1Nwz0AtPflrblfvUudpo+I=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
@ -246,6 +226,7 @@ google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMt
google.golang.org/api v0.4.0 h1:KKgc1aqhV8wDPbDzlDtpvyjZFY3vjz85FP7p4wcQUyI=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19 h1:Lj2SnHtxkRGJDqnGaSjo+CCdIieEnwVazbOXILwQemk=
@ -258,8 +239,6 @@ google.golang.org/grpc v1.19.1 h1:TrBcJ1yqAl1G++wO39nD/qtgpsW9/1+QGrluyMGEYgM=
google.golang.org/grpc v1.19.1/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
gopkg.in/airbrake/gobrake.v2 v2.0.9 h1:7z2uVWwn7oVeeugY1DtlPAy5H+KYgB1KeKTnqjNatLo=
gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@ -267,8 +246,6 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2 h1:OAj3g0cR6Dx/R07QgQe8wkA9RNjB2u4i700xBkIT4e0=
gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=

Просмотреть файл

@ -11,9 +11,6 @@ import (
const (
activeDirectoryEndpoint = "https://login.microsoftonline.com/"
adalTokenFromMSIOperationName = "adal_token_msi"
adalTokenFromMSIWithUserAssignedIDOperationName = "adal_token_msi_userassignedid"
adalTokenOperationName = "adal_token"
)
var reporter *metrics.Reporter
@ -21,36 +18,51 @@ var reporter *metrics.Reporter
// GetServicePrincipalTokenFromMSI return the token for the assigned user
func GetServicePrincipalTokenFromMSI(resource string) (*adal.Token, error) {
begin := time.Now()
var err error
defer func() {
if err != nil {
reporter.ReportIMDSOperationError(metrics.AdalTokenFromMSIOperationName)
return
}
reporter.ReportIMDSOperationDuration(metrics.AdalTokenFromMSIOperationName, time.Since(begin))
}()
// Get the MSI endpoint accoriding with the OS (Linux/Windows)
msiEndpoint, err := adal.GetMSIVMEndpoint()
if err != nil {
recordError(adalTokenFromMSIOperationName)
return nil, fmt.Errorf("Failed to get the MSI endpoint. Error: %v", err)
}
// Set up the configuration of the service principal
spt, err := adal.NewServicePrincipalTokenFromMSI(msiEndpoint, resource)
if err != nil {
recordError(adalTokenFromMSIOperationName)
return nil, fmt.Errorf("Failed to acquire a token for MSI. Error: %v", err)
}
// Effectively acquire the token
// obtain a fresh token
err = spt.Refresh()
if err != nil {
recordError(adalTokenFromMSIOperationName)
return nil, err
}
token := spt.Token()
recordDuration(adalTokenFromMSIOperationName, time.Since(begin))
return &token, nil
}
// GetServicePrincipalTokenFromMSIWithUserAssignedID return the token for the assigned user
func GetServicePrincipalTokenFromMSIWithUserAssignedID(clientID, resource string) (*adal.Token, error) {
begin := time.Now()
var err error
defer func() {
if err != nil {
reporter.ReportIMDSOperationError(metrics.AdalTokenFromMSIWithUserAssignedIDOperationName)
return
}
reporter.ReportIMDSOperationDuration(metrics.AdalTokenFromMSIWithUserAssignedIDOperationName, time.Since(begin))
}()
// Get the MSI endpoint accoriding with the OS (Linux/Windows)
msiEndpoint, err := adal.GetMSIVMEndpoint()
if err != nil {
recordError(adalTokenFromMSIWithUserAssignedIDOperationName)
return nil, fmt.Errorf("Failed to get the MSI endpoint. Error: %v", err)
}
// The ID of the user for whom the token is requested
@ -58,47 +70,45 @@ func GetServicePrincipalTokenFromMSIWithUserAssignedID(clientID, resource string
// Set up the configuration of the service principal
spt, err := adal.NewServicePrincipalTokenFromMSIWithUserAssignedID(msiEndpoint, resource, userAssignedID)
if err != nil {
recordError(adalTokenFromMSIWithUserAssignedIDOperationName)
return nil, fmt.Errorf("Failed to acquire a token using the MSI VM extension. Error: %v", err)
}
// Effectively acquire the token
// obtain a fresh token
err = spt.Refresh()
if err != nil {
recordError(adalTokenFromMSIWithUserAssignedIDOperationName)
return nil, err
}
token := spt.Token()
recordDuration(adalTokenFromMSIWithUserAssignedIDOperationName, time.Since(begin))
return &token, nil
}
// GetServicePrincipalToken return the token for the assigned user
func GetServicePrincipalToken(tenantID, clientID, secret, resource string) (*adal.Token, error) {
begin := time.Now()
var err error
defer func() {
if err != nil {
reporter.ReportIMDSOperationError(metrics.AdalTokenOperationName)
return
}
reporter.ReportIMDSOperationDuration(metrics.AdalTokenOperationName, time.Since(begin))
}()
oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, tenantID)
if err != nil {
recordError(adalTokenOperationName)
return nil, fmt.Errorf("creating the OAuth config: %v", err)
}
spt, err := adal.NewServicePrincipalToken(
*oauthConfig,
clientID,
secret,
resource,
)
spt, err := adal.NewServicePrincipalToken(*oauthConfig, clientID, secret, resource)
if err != nil {
recordError(adalTokenOperationName)
return nil, err
}
// Evectively acqurie the token
// obtain a fresh token
err = spt.Refresh()
if err != nil {
recordError(adalTokenOperationName)
return nil, err
}
token := spt.Token()
recordDuration(adalTokenOperationName, time.Since(begin))
return &token, nil
}
@ -114,21 +124,3 @@ func init() {
func InitReporter(reporterInstance *metrics.Reporter) {
reporter = reporterInstance
}
// recordError records the error in appropriate metric
func recordError(operation string) {
if reporter != nil {
reporter.ReportOperation(
operation,
metrics.CloudProviderOperationsErrorsCountM.M(1))
}
}
// recordDuration records the duration in appropriate metric
func recordDuration(operation string, duration time.Duration) {
if reporter != nil {
reporter.ReportOperation(
operation,
metrics.CloudProviderOperationsDurationM.M(duration.Seconds()))
}
}

Просмотреть файл

@ -2,10 +2,17 @@ package auth
import (
"testing"
"github.com/Azure/aad-pod-identity/pkg/metrics"
)
func TestGetServicePrincipalToken(t *testing.T) {
_, err := GetServicePrincipalToken("tid", "cid", "", "")
reporter, err := metrics.NewReporter()
if err != nil {
t.Fatalf("expected nil error, got: %+v", err)
}
InitReporter(reporter)
_, err = GetServicePrincipalToken("tid", "cid", "", "")
if err == nil {
t.Fatal("should be error with empty secret")
}

Просмотреть файл

@ -11,7 +11,6 @@ import (
"time"
config "github.com/Azure/aad-pod-identity/pkg/config"
"github.com/Azure/aad-pod-identity/pkg/metrics"
"github.com/Azure/aad-pod-identity/pkg/utils"
"github.com/Azure/aad-pod-identity/version"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-04-01/compute"
@ -338,22 +337,3 @@ func ParseResourceID(resourceID string) (azure.Resource, error) {
return result, nil
}
// recordError records the error in appropriate metric
func recordError(reporter *metrics.Reporter, operation string) {
if reporter != nil {
reporter.ReportOperation(
operation,
metrics.CloudProviderOperationsErrorsCountM.M(1))
}
}
// recordDuration records the duration
func recordDuration(reporter *metrics.Reporter, operation string, duration time.Duration) {
if reporter != nil {
reporter.ReportOperation(
operation,
metrics.CloudProviderOperationsDurationM.M(duration.Seconds()))
}
}

Просмотреть файл

@ -15,22 +15,19 @@ import (
"github.com/golang/glog"
)
const (
getVmOperationName = "vm_get"
putVmOperationName = "vm_create_or_update"
)
// VMClient client for VirtualMachines
type VMClient struct {
client compute.VirtualMachinesClient
reporter *metrics.Reporter
}
// VMClientInt is the interface used by "cloudprovider" for interacting with Azure vmas
type VMClientInt interface {
CreateOrUpdate(rg string, nodeName string, vm compute.VirtualMachine) error
Get(rgName string, nodeName string) (compute.VirtualMachine, error)
}
// NewVirtualMachinesClient creates a new vm client.
func NewVirtualMachinesClient(config config.AzureConfig, spt *adal.ServicePrincipalToken) (c *VMClient, e error) {
client := compute.NewVirtualMachinesClient(config.SubscriptionID)
@ -56,42 +53,60 @@ func NewVirtualMachinesClient(config config.AzureConfig, spt *adal.ServicePrinci
}, nil
}
// CreateOrUpdate creates a new vm, or if the vm already exists it updates the existing one.
// This is used by "cloudprovider" to *update* add/remove identities from an already existing vm.
func (c *VMClient) CreateOrUpdate(rg string, nodeName string, vm compute.VirtualMachine) error {
// Set the read-only property of extension to null.
vm.Resources = nil
ctx := context.Background()
begin := time.Now()
var err error
defer func() {
if err != nil {
c.reporter.ReportCloudProviderOperationError(metrics.PutVMOperationName)
return
}
c.reporter.ReportCloudProviderOperationDuration(metrics.PutVMOperationName, time.Since(begin))
}()
future, err := c.client.CreateOrUpdate(ctx, rg, nodeName, vm)
if err != nil {
glog.Error(err)
recordError(c.reporter, putVmOperationName)
return err
}
err = future.WaitForCompletionRef(ctx, c.client.Client)
if err != nil {
glog.Error(err)
recordError(c.reporter, putVmOperationName)
return err
}
recordDuration(c.reporter, putVmOperationName, time.Since(begin))
stats.UpdateCount(stats.TotalPutCalls, 1)
stats.Update(stats.CloudPut, time.Since(begin))
return nil
}
// Get gets the passed in vm.
func (c *VMClient) Get(rgName string, nodeName string) (compute.VirtualMachine, error) {
ctx := context.Background()
beginGetTime := time.Now()
begin := time.Now()
var err error
defer func() {
if err != nil {
c.reporter.ReportCloudProviderOperationError(metrics.GetVMOperationName)
return
}
c.reporter.ReportCloudProviderOperationDuration(metrics.GetVMOperationName, time.Since(begin))
}()
vm, err := c.client.Get(ctx, rgName, nodeName, "")
if err != nil {
glog.Error(err)
recordError(c.reporter, getVmOperationName)
return vm, err
}
recordDuration(c.reporter, getVmOperationName, time.Since(beginGetTime))
stats.UpdateCount(stats.TotalGetCalls, 1)
stats.Update(stats.CloudGet, time.Since(beginGetTime))
stats.Update(stats.CloudGet, time.Since(begin))
return vm, nil
}

Просмотреть файл

@ -15,11 +15,6 @@ import (
"github.com/golang/glog"
)
const (
getVmssOperationName = "vmss_get"
putVmssOperationName = "vmss_create_or_update"
)
// VMSSClient is used to interact with Azure virtual machine scale sets.
type VMSSClient struct {
client compute.VirtualMachineScaleSetsClient
@ -47,11 +42,11 @@ func NewVMSSClient(config config.AzureConfig, spt *adal.ServicePrincipalToken) (
client.AddToUserAgent(version.GetUserAgent("MIC", version.MICVersion))
reporter, err := metrics.NewReporter()
if err != nil {
glog.Errorf("New reporter error: %+v", err)
return nil, err
}
return &VMSSClient{
client: client,
reporter: reporter,
@ -63,23 +58,29 @@ func NewVMSSClient(config config.AzureConfig, spt *adal.ServicePrincipalToken) (
func (c *VMSSClient) CreateOrUpdate(rg string, vmssName string, vm compute.VirtualMachineScaleSet) error {
// Set the read-only property of extension to null.
//vm.Resources = nil
ctx := context.Background()
begin := time.Now()
var err error
defer func() {
if err != nil {
c.reporter.ReportCloudProviderOperationError(metrics.PutVmssOperationName)
return
}
c.reporter.ReportCloudProviderOperationDuration(metrics.PutVmssOperationName, time.Since(begin))
}()
future, err := c.client.CreateOrUpdate(ctx, rg, vmssName, vm)
if err != nil {
glog.Error(err)
recordError(c.reporter, putVmssOperationName)
return err
}
err = future.WaitForCompletionRef(ctx, c.client.Client)
if err != nil {
glog.Error(err)
recordError(c.reporter, putVmssOperationName)
return err
}
recordDuration(c.reporter, putVmssOperationName, time.Since(begin))
stats.UpdateCount(stats.TotalPutCalls, 1)
stats.Update(stats.CloudPut, time.Since(begin))
return nil
@ -88,16 +89,22 @@ func (c *VMSSClient) CreateOrUpdate(rg string, vmssName string, vm compute.Virtu
// Get gets the passed in vmss.
func (c *VMSSClient) Get(rgName string, vmssName string) (ret compute.VirtualMachineScaleSet, err error) {
ctx := context.Background()
beginGetTime := time.Now()
begin := time.Now()
defer func() {
if err != nil {
c.reporter.ReportCloudProviderOperationError(metrics.GetVmssOperationName)
return
}
c.reporter.ReportCloudProviderOperationDuration(metrics.GetVmssOperationName, time.Since(begin))
}()
vm, err := c.client.Get(ctx, rgName, vmssName)
if err != nil {
glog.Error(err)
recordError(c.reporter, getVmssOperationName)
return vm, err
}
recordDuration(c.reporter, getVmssOperationName, time.Since(beginGetTime))
stats.UpdateCount(stats.TotalGetCalls, 1)
stats.Update(stats.CloudGet, time.Since(beginGetTime))
stats.Update(stats.CloudGet, time.Since(begin))
return vm, nil
}

Просмотреть файл

@ -315,46 +315,54 @@ func (c *Client) syncCache(exit <-chan struct{}, initial bool, cacheSyncs ...cac
}
// RemoveAssignedIdentity removes the assigned identity
func (c *Client) RemoveAssignedIdentity(assignedIdentity *aadpodid.AzureAssignedIdentity) error {
func (c *Client) RemoveAssignedIdentity(assignedIdentity *aadpodid.AzureAssignedIdentity) (err error) {
glog.V(6).Infof("Deletion of assigned id named: %s", assignedIdentity.Name)
begin := time.Now()
err := c.rest.Delete().Namespace(assignedIdentity.Namespace).Resource("azureassignedidentities").Name(assignedIdentity.Name).Do().Error()
glog.V(5).Infof("Deletion %s took: %v", assignedIdentity.Name, time.Since(begin))
stats.Update(stats.AssignedIDDel, time.Since(begin))
defer func() {
if err != nil {
c.reporter.ReportKubernetesAPIOperationError(metrics.AssignedIdentityDeletionOperationName)
return
}
c.reporter.Report(
metrics.AssignedIdentityDeletionCountM.M(1),
metrics.AssignedIdentityDeletionDurationM.M(metrics.SinceInSeconds(begin)))
if err != nil {
recordError(c.reporter, "assigned_identity_deletion")
}
}()
err = c.rest.Delete().Namespace(assignedIdentity.Namespace).Resource("azureassignedidentities").Name(assignedIdentity.Name).Do().Error()
glog.V(5).Infof("Deletion %s took: %v", assignedIdentity.Name, time.Since(begin))
stats.Update(stats.AssignedIDDel, time.Since(begin))
return err
}
// CreateAssignedIdentity creates new assigned identity
func (c *Client) CreateAssignedIdentity(assignedIdentity *aadpodid.AzureAssignedIdentity) error {
func (c *Client) CreateAssignedIdentity(assignedIdentity *aadpodid.AzureAssignedIdentity) (err error) {
glog.Infof("Got assigned id %s to assign", assignedIdentity.Name)
begin := time.Now()
// Create a new AzureAssignedIdentity which maps the relationship between
// id and pod
var res aadpodid.AzureAssignedIdentity
// TODO: Ensure that the status reflects the corresponding
err := c.rest.Post().Namespace(assignedIdentity.Namespace).Resource("azureassignedidentities").Body(assignedIdentity).Do().Into(&res)
defer func() {
if err != nil {
c.reporter.ReportKubernetesAPIOperationError(metrics.AssignedIdentityAdditionOperationName)
return
}
c.reporter.Report(
metrics.AssignedIdentityAdditionCountM.M(1),
metrics.AssignedIdentityAdditionDurationM.M(metrics.SinceInSeconds(begin)))
}()
// Create a new AzureAssignedIdentity which maps the relationship between id and pod
var res aadpodid.AzureAssignedIdentity
// TODO: Ensure that the status reflects the corresponding
err = c.rest.Post().Namespace(assignedIdentity.Namespace).Resource("azureassignedidentities").Body(assignedIdentity).Do().Into(&res)
if err != nil {
glog.Error(err)
recordError(c.reporter, "assigned_identity_addition")
return err
}
glog.V(5).Infof("Time take to create %s: %v", assignedIdentity.Name, time.Since(begin))
stats.Update(stats.AssignedIDAdd, time.Since(begin))
//TODO: Update the status of the assign identity to indicate that the node assignment got done.
return nil
}
@ -527,9 +535,15 @@ type patchStatusOps struct {
}
// UpdateAzureAssignedIdentityStatus updates the status field in AzureAssignedIdentity to indicate current status
func (c *Client) UpdateAzureAssignedIdentityStatus(assignedIdentity *aadpodid.AzureAssignedIdentity, status string) error {
func (c *Client) UpdateAzureAssignedIdentityStatus(assignedIdentity *aadpodid.AzureAssignedIdentity, status string) (err error) {
glog.Infof("Updating assigned identity %s/%s status to %s", assignedIdentity.Namespace, assignedIdentity.Name, status)
defer func() {
if err != nil {
c.reporter.ReportKubernetesAPIOperationError(metrics.UpdateAzureAssignedIdentityStatusOperationName)
}
}()
ops := make([]patchStatusOps, 1)
ops[0].Op = "replace"
ops[0].Path = "/Status/status"
@ -550,16 +564,5 @@ func (c *Client) UpdateAzureAssignedIdentityStatus(assignedIdentity *aadpodid.Az
Do().
Error()
glog.V(5).Infof("Patch of %s took: %v", assignedIdentity.Name, time.Since(begin))
if err != nil {
recordError(c.reporter, "update_azure_assigned_identity_status")
}
return err
}
// recordError records the error in appropriate metric
func recordError(r *metrics.Reporter, operation string) {
if r != nil {
r.ReportOperation(operation, metrics.KubernetesAPIOperationsErrorsCountM.M(1))
}
}

Просмотреть файл

@ -28,8 +28,6 @@ import (
const (
getPodListRetries = 4
getPodListSleepTimeMilliseconds = 300
getPodListOperationName = "get_pod_list"
getSecretOperationName = "get_secret"
)
// Client api client
@ -72,9 +70,7 @@ func NewKubeClient(log inlog.Logger, nodeName string, scale bool) (Client, error
if err != nil {
return nil, err
}
reporter, err := metrics.NewReporter()
if err != nil {
return nil, err
}
@ -186,7 +182,8 @@ func (c *KubeClient) getPodListRetry(podip string, retries int, sleeptime time.D
if err == nil {
return podList, nil
}
recordError(c.reporter, getPodListOperationName)
c.reporter.ReportKubernetesAPIOperationError(metrics.GetPodListOperationName)
if i >= retries {
break
}
@ -230,7 +227,7 @@ func (c *KubeClient) ListPodIdentityExceptions(ns string) (*[]aadpodid.AzurePodI
func (c *KubeClient) GetSecret(secretRef *v1.SecretReference) (*v1.Secret, error) {
secret, err := c.ClientSet.CoreV1().Secrets(secretRef.Namespace).Get(secretRef.Name, metav1.GetOptions{})
if err != nil {
recordError(c.reporter, getSecretOperationName)
c.reporter.ReportKubernetesAPIOperationError(metrics.GetSecretOperationName)
return nil, err
}
return secret, nil

Просмотреть файл

@ -2,6 +2,7 @@ package metrics
import (
"context"
"sync"
"time"
log "github.com/Azure/aad-pod-identity/pkg/logger"
@ -16,18 +17,44 @@ const (
assignedIdentityAdditionCountName = "assigned_identity_addition_count"
assignedIdentityDeletionDurationName = "assigned_identity_deletion_duration_seconds"
assignedIdentityDeletionCountName = "assigned_identity_deletion_count"
nodeManagedIdentityOperationsDurationName = "nodemanagedidentity_operations_duration_seconds"
managedIdentityControllerCycleDurationName = "managedidentitycontroller_cycle_duration_seconds"
managedIdentityControllerCycleCountName = "managedidentitycontroller_cycle_count"
managedIdentityControllerNewLeaderElectionCountName = "managedidentitycontroller_new_leader_election_count"
nmiOperationsDurationName = "nmi_operations_duration_seconds"
micCycleDurationName = "mic_cycle_duration_seconds"
micCycleCountName = "mic_cycle_count"
micNewLeaderElectionCountName = "mic_new_leader_election_count"
cloudProviderOperationsErrorsCountName = "cloud_provider_operations_errors_count"
cloudProviderOperationsDurationName = "cloud_provider_operations_duration_seconds"
kubernetesAPIOperationsErrorsCountName = "kubernetes_api_operations_errors_count"
imdsOperationsErrorsCountName = "imds_operations_errors_count"
imdsOperationsDurationName = "imds_operations_duration_seconds"
// AdalTokenFromMSIOperationName ...
AdalTokenFromMSIOperationName = "adal_token_msi"
// AdalTokenFromMSIWithUserAssignedIDOperationName ...
AdalTokenFromMSIWithUserAssignedIDOperationName = "adal_token_msi_userassignedid"
// AdalTokenOperationName ...
AdalTokenOperationName = "adal_token"
// GetVmssOperationName ...
GetVmssOperationName = "vmss_get"
// PutVmssOperationName ...
PutVmssOperationName = "vmss_create_or_update"
// GetVMOperationName ...
GetVMOperationName = "vm_get"
// PutVMOperationName ...
PutVMOperationName = "vm_create_or_update"
// AssignedIdentityDeletionOperationName ...
AssignedIdentityDeletionOperationName = "assigned_identity_deletion"
// AssignedIdentityAdditionOperationName ...
AssignedIdentityAdditionOperationName = "assigned_identity_addition"
// UpdateAzureAssignedIdentityStatusOperationName ...
UpdateAzureAssignedIdentityStatusOperationName = "update_azure_assigned_identity_status"
// GetPodListOperationName
GetPodListOperationName = "get_pod_list"
// GetSecretOperationName
GetSecretOperationName = "get_secret"
)
// The following variables are measures
var (
// AssignedIdentityAdditionDurationM is a measure that tracks the duration in seconds of assigned_identity_addition operations.
AssignedIdentityAdditionDurationM = stats.Float64(
assignedIdentityAdditionDurationName,
@ -51,29 +78,28 @@ var (
"Total number of assigned identity deletion operations",
stats.UnitDimensionless)
// NodeManagedIdentityOperationsDurationM is a measure that tracks the duration in seconds of nodemanagedidentity operations.
NodeManagedIdentityOperationsDurationM = stats.Float64(
nodeManagedIdentityOperationsDurationName,
"Duration in seconds of node managed identity operations",
stats.UnitMilliseconds)
// "operation_type", "status_code"
// ManagedIdentityControllerCycleDurationM is a measure that tracks the duration in seconds of single cycle in Managed Identity Controller.
ManagedIdentityControllerCycleDurationM = stats.Float64(
managedIdentityControllerCycleDurationName,
"Duration in seconds of single cycle in managed identity controller",
// NMIOperationsDurationM is a measure that tracks the duration in seconds of nmi operations.
NMIOperationsDurationM = stats.Float64(
nmiOperationsDurationName,
"Duration in seconds for nmi operations",
stats.UnitMilliseconds)
// ManagedIdentityControllerCycleCountM is a measure that tracks the cumulative number of cycles executed in managed identity controller.
ManagedIdentityControllerCycleCountM = stats.Int64(
managedIdentityControllerCycleCountName,
"Total number of cycles executed in managed identity controller",
// MICCycleDurationM is a measure that tracks the duration in seconds for single mic sync cycle.
MICCycleDurationM = stats.Float64(
micCycleDurationName,
"Duration in seconds for single mic sync cycle",
stats.UnitMilliseconds)
// MICCycleCountM is a measure that tracks the cumulative number of cycles executed in mic.
MICCycleCountM = stats.Int64(
micCycleCountName,
"Total number of cycles executed in mic",
stats.UnitDimensionless)
// ManagedIdentityControllerCycleCountM is a measure that tracks the cumulative number of new leader election in managed identity controller.
ManagedIdentityControllerNewLeaderElectionCountM = stats.Int64(
managedIdentityControllerNewLeaderElectionCountName,
"Total number of new leader election in managed identity controller",
// MICNewLeaderElectionCountM is a measure that tracks the cumulative number of new leader election in mic.
MICNewLeaderElectionCountM = stats.Int64(
micNewLeaderElectionCountName,
"Total number of new leader election in mic",
stats.UnitDimensionless)
// CloudProviderOperationsErrorsCountM is a measure that tracks the cumulative number of errors in cloud provider operations.
@ -81,21 +107,30 @@ var (
cloudProviderOperationsErrorsCountName,
"Total number of errors in cloud provider operations",
stats.UnitDimensionless)
// operation_type
// CloudProviderOperationsDurationM is a measure that tracks the duration in seconds of CloudProviderOperations operations.
CloudProviderOperationsDurationM = stats.Float64(
cloudProviderOperationsDurationName,
"Duration in seconds of cloudprovider operations",
stats.UnitMilliseconds)
// operation_type
// KubernetesAPIOperationsErrorsCountM is a measure that tracks the cumulative number of errors in cloud provider operations.
KubernetesAPIOperationsErrorsCountM = stats.Int64(
kubernetesAPIOperationsErrorsCountName,
"Total number of errors in kubernetes api operations",
stats.UnitDimensionless)
// operation_type
// ImdsOperationsErrorsCountM is a measure that tracks the cumulative number of errors in imds operations.
ImdsOperationsErrorsCountM = stats.Int64(
imdsOperationsErrorsCountName,
"Total number of errors in imds token operations",
stats.UnitDimensionless)
// ImdsOperationsDurationM is a measure that tracks the duration in seconds of imds operations.
ImdsOperationsDurationM = stats.Float64(
imdsOperationsDurationName,
"Duration in seconds of imds token operations",
stats.UnitMilliseconds)
)
var (
@ -114,7 +149,6 @@ func SinceInSeconds(start time.Time) float64 {
// registerViews register views to be collected by exporter
func registerViews() error {
views := []*view.View{
&view.View{
Description: AssignedIdentityAdditionDurationM.Description(),
@ -137,24 +171,24 @@ func registerViews() error {
Aggregation: view.Count(),
},
&view.View{
Description: NodeManagedIdentityOperationsDurationM.Description(),
Measure: NodeManagedIdentityOperationsDurationM,
Aggregation: view.Distribution(0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 10),
Description: NMIOperationsDurationM.Description(),
Measure: NMIOperationsDurationM,
Aggregation: view.Distribution(0.5, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100),
TagKeys: []tag.Key{operationTypeKey, statusCodeKey, namespaceKey, resourceKey},
},
&view.View{
Description: ManagedIdentityControllerCycleDurationM.Description(),
Measure: ManagedIdentityControllerCycleDurationM,
Aggregation: view.Distribution(0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 10),
Description: MICCycleDurationM.Description(),
Measure: MICCycleDurationM,
Aggregation: view.Distribution(0.5, 1, 5, 10, 30, 60, 120, 300, 600, 900, 1200),
},
&view.View{
Description: ManagedIdentityControllerCycleCountM.Description(),
Measure: ManagedIdentityControllerCycleCountM,
Description: MICCycleCountM.Description(),
Measure: MICCycleCountM,
Aggregation: view.Count(),
},
&view.View{
Description: ManagedIdentityControllerNewLeaderElectionCountM.Description(),
Measure: ManagedIdentityControllerNewLeaderElectionCountM,
Description: MICNewLeaderElectionCountM.Description(),
Measure: MICNewLeaderElectionCountM,
Aggregation: view.Count(),
},
&view.View{
@ -166,7 +200,7 @@ func registerViews() error {
&view.View{
Description: CloudProviderOperationsDurationM.Description(),
Measure: CloudProviderOperationsDurationM,
Aggregation: view.Distribution(0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 10),
Aggregation: view.Distribution(0.5, 1, 5, 10, 30, 60, 120, 300, 600, 900, 1200),
TagKeys: []tag.Key{operationTypeKey},
},
&view.View{
@ -175,6 +209,18 @@ func registerViews() error {
Aggregation: view.Count(),
TagKeys: []tag.Key{operationTypeKey},
},
&view.View{
Description: ImdsOperationsErrorsCountM.Description(),
Measure: ImdsOperationsErrorsCountM,
Aggregation: view.Count(),
TagKeys: []tag.Key{operationTypeKey},
},
&view.View{
Description: ImdsOperationsDurationM.Description(),
Measure: ImdsOperationsDurationM,
Aggregation: view.Distribution(0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 10),
TagKeys: []tag.Key{operationTypeKey},
},
}
err := view.Register(views...)
return err
@ -187,6 +233,10 @@ func record(ctx context.Context, ms ...stats.Measurement) {
// Reporter is stats reporter in the context
type Reporter struct {
// adding mutex lock to ensure thread safety
// TODO (aramase) remove this lock after confirming opencensus report
// call is thread-safe
mu sync.Mutex
ctx context.Context
}
@ -198,16 +248,21 @@ func NewReporter() (*Reporter, error) {
if err != nil {
return nil, err
}
return &Reporter{ctx: ctx}, nil
return &Reporter{ctx: ctx, mu: sync.Mutex{}}, nil
}
// Report records the given measure
func (r *Reporter) Report(ms ...stats.Measurement) {
r.mu.Lock()
record(r.ctx, ms...)
r.mu.Unlock()
}
// ReportOperationAndStatus records given measurements by operation type, status code for the given namespace and resource.
func (r *Reporter) ReportOperationAndStatus(operationType string, statusCode string, namespace string, resource string, ms ...stats.Measurement) error {
func (r *Reporter) ReportOperationAndStatus(operationType, statusCode, namespace, resource string, ms ...stats.Measurement) error {
r.mu.Lock()
defer r.mu.Unlock()
ctx, err := tag.New(
r.ctx,
tag.Insert(operationTypeKey, operationType),
@ -224,6 +279,9 @@ func (r *Reporter) ReportOperationAndStatus(operationType string, statusCode str
// ReportOperation records given measurement by operation type.
func (r *Reporter) ReportOperation(operationType string, measurement stats.Measurement) error {
r.mu.Lock()
defer r.mu.Unlock()
ctx, err := tag.New(
r.ctx,
tag.Insert(operationTypeKey, operationType),
@ -252,3 +310,28 @@ func RegisterAndExport(port string, log log.Logger) error {
log.Infof("Registered and exported metrics on port %s", port)
return nil
}
// ReportIMDSOperationError reports IMDS error count
func (r *Reporter) ReportIMDSOperationError(operation string) error {
return r.ReportOperation(operation, ImdsOperationsErrorsCountM.M(1))
}
// ReportIMDSOperationDuration reports IMDS operation duration
func (r *Reporter) ReportIMDSOperationDuration(operation string, duration time.Duration) error {
return r.ReportOperation(operation, ImdsOperationsDurationM.M(duration.Seconds()))
}
// ReportCloudProviderOperationError reports cloud provider operation error count
func (r *Reporter) ReportCloudProviderOperationError(operation string) error {
return r.ReportOperation(operation, CloudProviderOperationsErrorsCountM.M(1))
}
// ReportCloudProviderOperationDuration reports cloud provider operation duration
func (r *Reporter) ReportCloudProviderOperationDuration(operation string, duration time.Duration) error {
return r.ReportOperation(operation, CloudProviderOperationsDurationM.M(duration.Seconds()))
}
// ReportKubernetesAPIOperationError reports kubernetes operation error count
func (r *Reporter) ReportKubernetesAPIOperationError(operation string) error {
return r.ReportOperation(operation, KubernetesAPIOperationsErrorsCountM.M(1))
}

Просмотреть файл

@ -16,12 +16,12 @@ func TestBasicAndDurationReport(t *testing.T) {
testCounterMetric(t, reporter, AssignedIdentityAdditionCountM)
testCounterMetric(t, reporter, AssignedIdentityDeletionCountM)
testCounterMetric(t, reporter, ManagedIdentityControllerCycleCountM)
testCounterMetric(t, reporter, ManagedIdentityControllerNewLeaderElectionCountM)
testCounterMetric(t, reporter, MICCycleCountM)
testCounterMetric(t, reporter, MICNewLeaderElectionCountM)
testCounterMetric(t, reporter, CloudProviderOperationsErrorsCountM)
testCounterMetric(t, reporter, KubernetesAPIOperationsErrorsCountM)
testOperationDurationMetric(t, reporter, CloudProviderOperationsDurationM)
testOperationDurationMetric(t, reporter, NodeManagedIdentityOperationsDurationM)
testOperationDurationMetric(t, reporter, NMIOperationsDurationM)
}
// testOperationDurationMetric tests the duration metric and related tags

Просмотреть файл

@ -163,9 +163,7 @@ func NewMICClient(cloudconfig string, config *rest.Config, isNamespaced bool, sy
func (c *Client) Run() {
glog.Infof("Initiating MIC Leader election")
// counter to track number of mic election
if c.Reporter != nil {
c.Reporter.Report(metrics.ManagedIdentityControllerNewLeaderElectionCountM.M(1))
}
c.Reporter.Report(metrics.MICNewLeaderElectionCountM.M(1))
c.leaderElector.Run()
}
@ -370,15 +368,13 @@ func (c *Client) Sync(exit <-chan struct{}) {
glog.Infof("Total work cycles: %d, out of which work was done in: %d.", totalSyncCycles, totalWorkDoneCycles)
stats.Put(stats.Total, time.Since(begin))
if c.Reporter != nil {
c.Reporter.Report(
metrics.ManagedIdentityControllerCycleCountM.M(1),
metrics.ManagedIdentityControllerCycleDurationM.M(metrics.SinceInSeconds(begin)))
}
metrics.MICCycleCountM.M(1),
metrics.MICCycleDurationM.M(metrics.SinceInSeconds(begin)))
stats.PrintSync()
if workDone {
// We need to synchornize the cache inorder to get the latest updates. Sync cache has a bug in the current go client which caused thread leak.
// We need to synchronize the cache inorder to get the latest updates. Sync cache has a bug in the current go client which caused thread leak.
// Updating of go client has issues with case sensitivity. Avoid this issue by sleping for 500 milliseconds to reduce the chance
// of cache misses for assignedidentities updated in the previous cycle.
time.Sleep(time.Millisecond * 200)

Просмотреть файл

@ -10,6 +10,7 @@ import (
aadpodid "github.com/Azure/aad-pod-identity/pkg/apis/aadpodidentity/v1"
"github.com/Azure/aad-pod-identity/pkg/config"
"github.com/Azure/aad-pod-identity/pkg/metrics"
"github.com/golang/glog"
@ -604,6 +605,8 @@ func NewMICTestClient(eventCh chan aadpodid.EventType,
createDeleteBatch int64,
immutableUserMSIs map[string]bool) *TestMICClient {
reporter, _ := metrics.NewReporter()
realMICClient := &Client{
CloudClient: cpClient,
CRDClient: crdClient,
@ -615,6 +618,7 @@ func NewMICTestClient(eventCh chan aadpodid.EventType,
IsNamespaced: isNamespaced,
createDeleteBatch: createDeleteBatch,
ImmutableUserMSIsMap: immutableUserMSIs,
Reporter: reporter,
}
return &TestMICClient{

Просмотреть файл

@ -140,7 +140,7 @@ loop:
}
}
type appHandler func(*log.Entry, http.ResponseWriter, *http.Request)
type appHandler func(*log.Entry, http.ResponseWriter, *http.Request) string
type responseWriter struct {
http.ResponseWriter
@ -189,27 +189,34 @@ func (fn appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
}
}()
rw := newResponseWriter(w)
fn(logger, rw, r)
ns := fn(logger, rw, r)
latency := time.Since(start)
logger.Infof("Status (%d) took %d ns", rw.statusCode, latency.Nanoseconds())
namespace, _ := parseRequestHeader(r)
_, resource := parseRequestClientIDAndResource(r)
if appHandlerReporter != nil {
appHandlerReporter.ReportOperationAndStatus(
r.URL.Path,
strconv.Itoa(rw.statusCode),
namespace,
ns,
resource,
metrics.NodeManagedIdentityOperationsDurationM.M(metrics.SinceInSeconds(start)))
metrics.NMIOperationsDurationM.M(metrics.SinceInSeconds(start)))
}
}
func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) {
func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) (ns string) {
hostIP := parseRemoteAddr(r.RemoteAddr)
rqClientID, rqResource := parseRequestClientIDAndResource(r)
podns, podname := parseRequestHeader(r)
if podns == "" || podname == "" {
logger.Errorf("missing podname and podns from request")
http.Error(w, "missing 'podname' and 'podns' from request header", http.StatusBadRequest)
return
}
// set the ns so it can be used for metrics
ns = podns
if hostIP != localhost {
msg := "request remote address is not from a host"
logger.Error(msg)
@ -221,12 +228,6 @@ func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.R
http.Error(w, "parameter resource cannot be empty", http.StatusBadRequest)
return
}
podns, podname := parseRequestHeader(r)
if podns == "" || podname == "" {
logger.Errorf("missing podname and podns from request")
http.Error(w, "missing 'podname' and 'podns' from request header", http.StatusBadRequest)
return
}
podIDs, identityInCreatedStateFound, err := s.listPodIDsWithRetry(r.Context(), s.KubeClient, logger, podns, podname, rqClientID)
if err != nil {
msg := fmt.Sprintf("no AzureAssignedIdentity found for pod:%s/%s in assigned state", podns, podname)
@ -270,6 +271,7 @@ func (s *Server) hostHandler(logger *log.Entry, w http.ResponseWriter, r *http.R
return
}
w.Write(response)
return
}
// msiResponse marshals in a format that matches the underlying
@ -336,7 +338,7 @@ func (s *Server) getTokenForExceptedPod(logger *log.Entry, rqClientID, rqResourc
// AAD using adal
// if the requests contains client id it validates it against the admin
// configured id.
func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) {
func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) (ns string) {
podIP := parseRemoteAddr(r.RemoteAddr)
rqClientID, rqResource := parseRequestClientIDAndResource(r)
@ -357,6 +359,8 @@ func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Re
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// set ns for using in metrics
ns = podns
exceptionList, err := s.KubeClient.ListPodIdentityExceptions(podns)
if err != nil {
logger.Errorf("getting list of azurepodidentityexceptions failed with error: %+v", err)
@ -398,6 +402,7 @@ func (s *Server) msiHandler(logger *log.Entry, w http.ResponseWriter, r *http.Re
return
}
w.Write(response)
return
}
func getTokenForMatchingID(kubeClient k8s.Client, logger *log.Entry, rqClientID string, rqResource string, podIDs []aadpodid.AzureIdentity) (token *adal.Token, clientID string, err error) {
@ -467,7 +472,7 @@ func parseRequestClientIDAndResource(r *http.Request) (clientID string, resource
}
// defaultPathHandler creates a new request and returns the response body and code
func (s *Server) defaultPathHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) {
func (s *Server) defaultPathHandler(logger *log.Entry, w http.ResponseWriter, r *http.Request) (ns string) {
client := &http.Client{}
req, err := http.NewRequest(r.Method, r.URL.String(), r.Body)
if err != nil || req == nil {
@ -499,6 +504,7 @@ func (s *Server) defaultPathHandler(logger *log.Entry, w http.ResponseWriter, r
http.Error(w, err.Error(), http.StatusInternalServerError)
}
w.Write(body)
return
}
// forbiddenHandler responds to any request with HTTP 403 Forbidden

Просмотреть файл

@ -5,7 +5,9 @@ import (
"testing"
aadpodid "github.com/Azure/aad-pod-identity/pkg/apis/aadpodidentity/v1"
auth "github.com/Azure/aad-pod-identity/pkg/auth"
"github.com/Azure/aad-pod-identity/pkg/k8s"
"github.com/Azure/aad-pod-identity/pkg/metrics"
log "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -14,6 +16,11 @@ import (
func TestGetTokenForMatchingIDBySP(t *testing.T) {
fakeClient := fake.NewSimpleClientset()
reporter, err := metrics.NewReporter()
if err != nil {
t.Fatalf("expected nil error, got: %+v", err)
}
auth.InitReporter(reporter)
secret := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "clientSecret"}, Data: make(map[string][]byte)}
val, _ := base64.StdEncoding.DecodeString("YWJjZA==")