From a64934fa569b231ea9542bae70a9b63bc0a9f204 Mon Sep 17 00:00:00 2001 From: Quentin Smith Date: Wed, 21 Sep 2016 13:55:38 -0400 Subject: [PATCH] devapp: add median close time per create time plot Also implements ymin, ymax for log scale. Change-Id: I872184bb5b3dbaee685c068bc3381e075fecf146 Reviewed-on: https://go-review.googlesource.com/29530 Reviewed-by: Brad Fitzpatrick --- devapp/static/stats.html | 2 + devapp/static/svg.html | 12 +++-- devapp/stats.go | 104 ++++++++++++++++++++++++++++++++++----- godash/stats/issues.go | 19 ++++++- 4 files changed, 120 insertions(+), 17 deletions(-) diff --git a/devapp/static/stats.html b/devapp/static/stats.html index 38fa38ca..447b4743 100644 --- a/devapp/static/stats.html +++ b/devapp/static/stats.html @@ -10,5 +10,7 @@

Age of open issues

+

Median close time for issues opened per day

+ diff --git a/devapp/static/svg.html b/devapp/static/svg.html index 851ffca5..33c37d02 100644 --- a/devapp/static/svg.html +++ b/devapp/static/svg.html @@ -7,9 +7,10 @@
  • xscale
    • xscale=log - log scale
    • -
    • xscale=lin - linear scale. Also supports xmin and xmax parameters.
    • +
    • xscale=lin - linear scale.
  • -
  • yscale - same as xscale but for y axis
  • +
  • x{min,max} - set min and max for scale. Only take effect if xscale is also supplied.
  • +
  • y{scale,min,max} - same as x* but for y axis
  • pivot - predefined graphs
    • pivot=opencount - plot number of open issues over time. With group=release plots the number of open issues by release over time.
    • @@ -23,7 +24,7 @@
    • column - column to bucket issues by
        -
      • column={Created,Closed,Updated}{,Day,Month,Year} - time, day, month, or year the issue was created, closed, or updated
      • +
      • column={Created,Closed,Updated}{,Day,Week,Month,Year} - time, day, week, month, or year the issue was created, closed, or updated
      • column=UpdateAge - time since issue was last updated
    • @@ -33,6 +34,11 @@
    • agg=ecdf - CDF of values
    • agg=bin - automatically chosen histogram bins
    • agg=density - best fit PDF of values
    • +
    • agg=percentile - plots percentiles of a second column (currently hardcoded to Open), over a moving window (defaulting to 30 days) +
        +
      • window=24h - time.Duration over which to window percentiles
      • +
      +
  • diff --git a/devapp/stats.go b/devapp/stats.go index d433414f..13f3781f 100644 --- a/devapp/stats.go +++ b/devapp/stats.go @@ -7,6 +7,7 @@ package devapp import ( "encoding/json" "fmt" + "image/color" "math" "net/http" "regexp" @@ -17,9 +18,11 @@ import ( "golang.org/x/build/godash" gdstats "golang.org/x/build/godash/stats" + "github.com/aclements/go-gg/generic/slice" "github.com/aclements/go-gg/gg" "github.com/aclements/go-gg/ggstat" "github.com/aclements/go-gg/table" + "github.com/aclements/go-moremath/stats" "github.com/kylelemons/godebug/pretty" "golang.org/x/net/context" "google.golang.org/appengine" @@ -223,6 +226,38 @@ func (o openCount) F(input table.Grouping) table.Grouping { }) } +// windowedPercentiles computes the 0, 25, 50, 75, and 100th +// percentile of the values in column Y over the range (X[i]-Window, +// X[i]). +type windowedPercentiles struct { + Window time.Duration + // X must name a time.Time column, Y must name a time.Duration column. + X, Y string +} + +// TODO: This ought to be able to operate on any float64-convertible +// column, but MapCols doesn't use slice.Convert. +func (p windowedPercentiles) F(input table.Grouping) table.Grouping { + return table.MapCols(input, func(xs []time.Time, ys []time.Duration, outMin []time.Duration, out25 []time.Duration, out50 []time.Duration, out75 []time.Duration, outMax []time.Duration, points []int) { + var ysFloat []float64 + slice.Convert(&ysFloat, ys) + for i, x := range xs { + start := x.Add(-p.Window) + iStart := sort.Search(len(xs), func(j int) bool { return xs[j].After(start) }) + + data := ysFloat[iStart : i+1] + points[i] = len(data) // XXX + + s := stats.Sample{Xs: data}.Copy().Sort() + + min, max := s.Bounds() + outMin[i], outMax[i] = time.Duration(min), time.Duration(max) + p25, p50, p75 := s.Percentile(.25), s.Percentile(.5), s.Percentile(.75) + out25[i], out50[i], out75[i] = time.Duration(p25), time.Duration(p50), time.Duration(p75) + } + }, p.X, p.Y)("min "+p.Y, "p25 "+p.Y, "median "+p.Y, "p75 "+p.Y, "max "+p.Y, "points "+p.Y) +} + func argtoi(req *http.Request, arg string) (int, bool, error) { val := req.Form.Get(arg) if val != "" { @@ -239,14 +274,23 @@ func plot(w http.ResponseWriter, req *http.Request, stats table.Grouping) error plot := gg.NewPlot(stats) plot.Stat(releaseFilter{}) for _, aes := range []string{"x", "y"} { + var s gg.ContinuousScaler switch scale := req.Form.Get(aes + "scale"); scale { case "log": - ls := gg.NewLogScaler(10) + s = gg.NewLogScaler(10) // Our plots tend to go to 0, which makes log scales unhappy. - ls.SetMin(1) - plot.SetScale(aes, ls) + s.SetMin(1) case "lin": - s := gg.NewLinearScaler() + s = gg.NewLinearScaler() + case "": + if aes == "y" { + s = gg.NewLinearScaler() + s.Include(0) + } + default: + return fmt.Errorf("unknown %sscale %q", aes, scale) + } + if s != nil { max, ok, err := argtoi(req, aes+"max") if err != nil { return err @@ -260,14 +304,6 @@ func plot(w http.ResponseWriter, req *http.Request, stats table.Grouping) error s.SetMin(min) } plot.SetScale(aes, s) - case "": - if aes == "y" { - s := gg.NewLinearScaler() - s.Include(0) - plot.SetScale(aes, s) - } - default: - return fmt.Errorf("unknown %sscale %q", aes, scale) } } switch pivot := req.Form.Get("pivot"); pivot { @@ -368,10 +404,54 @@ func plot(w http.ResponseWriter, req *http.Request, stats table.Grouping) error X: column, Y: "probability density", }) + case "percentile": + window := 30 * 24 * time.Hour + if win := req.Form.Get("window"); win != "" { + var err error + window, err = time.ParseDuration(win) + if err != nil { + return err + } + } + plot.Stat(windowedPercentiles{ + Window: window, + X: column, + Y: "Open", + }) + // plot.Stat(ggstat.Agg(column)(ggstat.AggMin("Open"), ggstat.AggMax("Open"), ggstat.AggPercentile("median", .5, "Open"), ggstat.AggPercentile("p25", .25, "Open"), ggstat.AggPercentile("p75", .75, "Open"))) + /* + plot.Add(gg.LayerPaths{ + X: column, + Y: "points Open", + }) + */ + plot.Add(gg.LayerArea{ + X: column, + Upper: "max Open", + Lower: "min Open", + Fill: plot.Const(color.Gray{192}), + }) + plot.Add(gg.LayerArea{ + X: column, + Upper: "p75 Open", + Lower: "p25 Open", + Fill: plot.Const(color.Gray{128}), + }) + plot.Add(gg.LayerPaths{ + X: column, + Y: "median Open", + }) + default: + return fmt.Errorf("unknown agg %q", agg) } default: return fmt.Errorf("unknown pivot %q", pivot) } + if req.Form.Get("raw") != "" { + w.Header().Set("Content-Type", "text/plain") + table.Fprint(w, plot.Data()) + return nil + } w.Header().Set("Content-Type", "image/svg+xml") plot.WriteSVG(w, 1200, 600) return nil diff --git a/godash/stats/issues.go b/godash/stats/issues.go index 92004fab..80f02358 100644 --- a/godash/stats/issues.go +++ b/godash/stats/issues.go @@ -11,6 +11,20 @@ import ( "golang.org/x/build/godash" ) +func truncateWeek(t time.Time) time.Time { + year, month, day := t.Date() + loc := t.Location() + _, week1 := t.ISOWeek() + for { + day-- + tnew := time.Date(year, month, day, 0, 0, 0, 0, loc) + if _, week2 := tnew.ISOWeek(); week1 != week2 { + return t + } + t = tnew + } +} + // IssueStats prepares a table.Grouping with information about the issues found in s, which can be used for later plotting. func IssueStats(s *godash.Stats) table.Grouping { var nums []int @@ -23,15 +37,16 @@ func IssueStats(s *godash.Stats) table.Grouping { tb.Add("Number", nums) g := table.Grouping(tb.Done()) for _, in := range []string{"Created", "Closed", "Updated"} { - g = table.MapCols(g, func(in []time.Time, outD, outM, outY []time.Time) { + g = table.MapCols(g, func(in []time.Time, outD, outW, outM, outY []time.Time) { for i, t := range in { year, month, day := t.Date() loc := t.Location() outD[i] = time.Date(year, month, day, 0, 0, 0, 0, loc) + outW[i] = truncateWeek(t) outM[i] = time.Date(year, month, 1, 0, 0, 0, 0, loc) outY[i] = time.Date(year, time.January, 1, 0, 0, 0, 0, loc) } - }, in)(in+"Day", in+"Month", in+"Year") + }, in)(in+"Day", in+"Week", in+"Month", in+"Year") } g = table.MapCols(g, func(created, updated, closed []time.Time, open, updateAge []time.Duration) { for i := range created {