orc: initial copy

* go modules enabled * go -> go/vt/orchestrator * go/cmd -> go/cmd * vendor-> external: golib, raft, zk * imports fixed * lint checks are failing Signed-off-by: Sugu Sougoumarane <ssougou@gmail.com>
2020-08-13 16:24:41 -07:00 · 2020-08-13 16:24:41 -07:00 · a478fe1a88
--- a/go.mod
+++ b/go.mod
@ -9,17 +9,21 @@ require (
 	github.com/GeertJohan/go.rice v1.0.0
 	github.com/PuerkitoBio/goquery v1.5.1
 	github.com/TylerBrock/colorjson v0.0.0-20180527164720-95ec53f28296
-	github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 // indirect
+	github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6
+	github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878
 	github.com/aws/aws-sdk-go v1.28.8
 	github.com/buger/jsonparser v0.0.0-20200322175846-f7e751efca13
 	github.com/cespare/xxhash/v2 v2.1.1
 	github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd // indirect
+	github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 // indirect
 	github.com/coreos/bbolt v1.3.2 // indirect
 	github.com/coreos/etcd v3.3.10+incompatible
 	github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect
 	github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
 	github.com/corpix/uarand v0.1.1 // indirect
+	github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432
 	github.com/evanphx/json-patch v4.5.0+incompatible
+	github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab
 	github.com/go-sql-driver/mysql v1.5.0
 	github.com/gogo/protobuf v1.3.1
 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
@ -34,11 +38,12 @@ require (
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
 	github.com/hashicorp/consul/api v1.5.0
 	github.com/hashicorp/go-immutable-radix v1.1.0 // indirect
-	github.com/hashicorp/go-msgpack v0.5.5 // indirect
+	github.com/hashicorp/go-msgpack v0.5.5
 	github.com/hashicorp/go-sockaddr v1.0.2 // indirect
 	github.com/hashicorp/go-uuid v1.0.2 // indirect
 	github.com/hashicorp/golang-lru v0.5.3 // indirect
 	github.com/hashicorp/serf v0.9.2 // indirect
+	github.com/howeyc/gopass v0.0.0-20190910152052-7cb4b85ec19c
 	github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428
 	github.com/imdario/mergo v0.3.6 // indirect
 	github.com/klauspost/compress v1.4.1 // indirect
@ -48,22 +53,32 @@ require (
 	github.com/krishicks/yaml-patch v0.0.10
 	github.com/magiconair/properties v1.8.1
 	github.com/manifoldco/promptui v0.7.0
+	github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8
+	github.com/martini-contrib/gzip v0.0.0-20151124214156-6c035326b43f
+	github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11
+	github.com/mattn/go-sqlite3 v1.14.0
 	github.com/minio/minio-go v0.0.0-20190131015406-c8a261de75c1
 	github.com/mitchellh/go-ps v1.0.0 // indirect
 	github.com/mitchellh/go-testing-interface v1.14.0 // indirect
 	github.com/mitchellh/mapstructure v1.2.3 // indirect
+	github.com/montanaflynn/stats v0.6.3
 	github.com/olekukonko/tablewriter v0.0.5-0.20200416053754-163badb3bac6
 	github.com/onsi/ginkgo v1.10.3 // indirect
 	github.com/onsi/gomega v1.7.1 // indirect
 	github.com/opentracing-contrib/go-grpc v0.0.0-20180928155321-4b5a12d3ff02
 	github.com/opentracing/opentracing-go v1.1.0
+	github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect
+	github.com/patrickmn/go-cache v2.1.0+incompatible
 	github.com/pborman/uuid v1.2.0
 	github.com/philhofer/fwd v1.0.0 // indirect
 	github.com/pires/go-proxyproto v0.0.0-20191211124218-517ecdf5bb2b
 	github.com/pkg/errors v0.8.1
 	github.com/prometheus/client_golang v1.4.1
 	github.com/prometheus/common v0.9.1
+	github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
+	github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e
 	github.com/satori/go.uuid v1.2.0 // indirect
+	github.com/sjmudd/stopwatch v0.0.0-20170613150411-f380bf8a9be1
 	github.com/smartystreets/goconvey v1.6.4 // indirect
 	github.com/spf13/cobra v0.0.5
 	github.com/stretchr/testify v1.4.0
@ -78,7 +93,7 @@ require (
 	github.com/z-division/go-zookeeper v0.0.0-20190128072838-6d7457066b9b
 	golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975
 	golang.org/x/lint v0.0.0-20190409202823-959b441ac422
-	golang.org/x/net v0.0.0-20200202094626-16171245cfb2
+	golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
 	golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
 	golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
 	golang.org/x/text v0.3.2
@ -89,8 +104,10 @@ require (
 	google.golang.org/grpc v1.24.0
 	gopkg.in/DataDog/dd-trace-go.v1 v1.17.0
 	gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d // indirect
+	gopkg.in/gcfg.v1 v1.2.3
 	gopkg.in/ini.v1 v1.51.0 // indirect
 	gopkg.in/ldap.v2 v2.5.0
+	gopkg.in/warnings.v0 v0.1.2 // indirect
 	gotest.tools v2.2.0+incompatible
 	honnef.co/go/tools v0.0.1-2019.2.3
 	k8s.io/apiextensions-apiserver v0.17.3
--- a/go.sum
+++ b/go.sum
@ -70,6 +70,7 @@ github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5z
 github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e h1:QEF07wC0T1rKkctt1RINW/+RMTVmiwxETico2l3gxJA=
 github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
+github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6 h1:G1bPvciwNyF7IUmKXNt9Ak3m6u9DE1rF+RmtIkBpVdA=
 github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
 github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
 github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM=
@ -110,6 +111,8 @@ github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:z
 github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w=
 github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI=
 github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA=
+github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 h1:sDMmm+q/3+BukdIpxwO365v/Rbspp2Nt5XntgQRXq8Q=
+github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM=
 github.com/coreos/bbolt v1.3.2 h1:wZwiHHUieZCquLkDL0B8UhzreNWsPHooDAG3q34zk0s=
 github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
 github.com/coreos/etcd v3.3.10+incompatible h1:jFneRYjIvLMLhDLCzuTuU4rSJUjRplcJQ7pD7MnhC04=
@ -132,6 +135,8 @@ github.com/corpix/uarand v0.1.1 h1:RMr1TWc9F4n5jiPDzFHtmaUXLKLNUFK0SgCLo4BhX/U=
 github.com/corpix/uarand v0.1.1/go.mod h1:SFKZvkcRoLqVRFZ4u25xPmp6m9ktANfbpXZ7SJ0/FNU=
 github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
 github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
+github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432 h1:M5QgkYacWj0Xs8MhpIK/5uwU02icXpEoSo9sM2aRCps=
+github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432/go.mod h1:xwIwAxMvYnVrGJPe2FKx5prTrnAjGOD8zvDOnxnrrkM=
 github.com/daaku/go.zipexe v1.0.0 h1:VSOgZtH418pH9L16hC/JrgSNJbbAL26pj7lmD1+CGdY=
 github.com/daaku/go.zipexe v1.0.0/go.mod h1:z8IiR6TsVLEYKwXAoE/I+8ys/sDkgTzSL0CLnGVd57E=
 github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -168,6 +173,8 @@ github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
 github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
+github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab h1:xveKWz2iaueeTaUgdetzel+U7exyigDYBryyVfV/rZk=
+github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8=
 github.com/go-openapi/analysis v0.0.0-20180825180245-b006789cd277/go.mod h1:k70tL6pCuVxPJOHXQ+wIac1FUrvNkHolPie/cLEU6hI=
 github.com/go-openapi/analysis v0.17.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik=
 github.com/go-openapi/analysis v0.18.0/go.mod h1:IowGgpVeD0vNm45So8nr+IcQ3pxVtpRoBWb8PVZO0ik=
@ -334,6 +341,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
 github.com/hashicorp/serf v0.9.0/go.mod h1:YL0HO+FifKOW2u1ke99DGVu1zhcpZzNwrLIqBC7vbYU=
 github.com/hashicorp/serf v0.9.2 h1:yJoyfZXo4Pk2p/M/viW+YLibBFiIbKoP79gu7kDAFP0=
 github.com/hashicorp/serf v0.9.2/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
+github.com/howeyc/gopass v0.0.0-20190910152052-7cb4b85ec19c h1:aY2hhxLhjEAbfXOx2nRJxCXezC6CO2V/yN+OCr1srtk=
+github.com/howeyc/gopass v0.0.0-20190910152052-7cb4b85ec19c/go.mod h1:lADxMC39cJJqL93Duh1xhAs4I2Zs8mKS89XWXFGp9cs=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428 h1:Mo9W14pwbO9VfRe+ygqZ8dFbPpoIK1HFrG/zjTuQ+nc=
@ -402,6 +411,12 @@ github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN
 github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs=
 github.com/manifoldco/promptui v0.7.0 h1:3l11YT8tm9MnwGFQ4kETwkzpAwY2Jt9lCrumCUW4+z4=
 github.com/manifoldco/promptui v0.7.0/go.mod h1:n4zTdgP0vr0S3w7/O/g98U+e0gwLScEXGwov2nIKuGQ=
+github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8 h1:1ded5x5QpCLsyTH5ct62Rh1RXPFnn0/dubCqAeh+stU=
+github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8/go.mod h1:ahTFgV/NtzY/CALneRrC67m1dis5arHTQDfyIhKk69E=
+github.com/martini-contrib/gzip v0.0.0-20151124214156-6c035326b43f h1:wVDxEVZP1eiPIlHVaafUAEUDtyl6ytjHv3egJVbyfOk=
+github.com/martini-contrib/gzip v0.0.0-20151124214156-6c035326b43f/go.mod h1:jhUB0rZB2TPWqy0yGugKRRictO591eSO7If7O4MfCaA=
+github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11 h1:YFh+sjyJTMQSYjKwM4dFKhJPJC/wfo98tPUc17HdoYw=
+github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11/go.mod h1:Ah2dBMoxZEqk118as2T4u4fjfXarE0pPnMJaArZQZsI=
 github.com/mattn/go-colorable v0.0.9 h1:UVL0vNpWh04HeJXV0KLcaT7r06gOH2l4OW6ddYRUIY4=
 github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
 github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
@ -422,6 +437,8 @@ github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzp
 github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
 github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54=
 github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/mattn/go-sqlite3 v1.14.0 h1:mLyGNKR8+Vv9CAU7PphKa2hkEqxxhn8i32J6FPj1/QA=
+github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/miekg/dns v1.0.14 h1:9jZdLNd/P4+SfEJ0TNyxYpsK8N4GtfylBLqtbYN1sbA=
@ -453,6 +470,8 @@ github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lN
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/montanaflynn/stats v0.6.3 h1:F8446DrvIF5V5smZfZ8K9nrmmix0AFgevPdLruGOmzk=
+github.com/montanaflynn/stats v0.6.3/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
@ -479,9 +498,14 @@ github.com/opentracing-contrib/go-grpc v0.0.0-20180928155321-4b5a12d3ff02 h1:0R5
 github.com/opentracing-contrib/go-grpc v0.0.0-20180928155321-4b5a12d3ff02/go.mod h1:JNdpVEzCpXBgIiv4ds+TzhN1hrtxq6ClLrTlT9OQRSc=
 github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU=
 github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
+github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw=
+github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0=
 github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
 github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
 github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
+github.com/patrickmn/go-cache v1.0.0 h1:3gD5McaYs9CxjyK5AXGcq8gdeCARtd/9gJDUvVeaZ0Y=
+github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
+github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
 github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
 github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
 github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
@ -523,6 +547,8 @@ github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
 github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8=
 github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
+github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 h1:MkV+77GLUNo5oJ0jf870itWm3D0Sjh7+Za9gazKc5LQ=
+github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
 github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M=
 github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
@ -530,6 +556,8 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/ryanuber/columnize v2.1.0+incompatible h1:j1Wcmh8OrK4Q7GXY+V7SVSY8nUWQxHW5TkBe7YUl+2s=
 github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
+github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e h1:CGjiMQ0wMH4wtNWrlj6kiTbkPt2F3rbYnhGX6TWLfco=
+github.com/samuel/go-zookeeper v0.0.0-20200724154423-2164a8ac840e/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
 github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
@ -538,6 +566,8 @@ github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAm
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
 github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/sjmudd/stopwatch v0.0.0-20170613150411-f380bf8a9be1 h1:acClJNSOjUrAUKW+ZneCZymCFDWtSaJG5YQl8FoOlyI=
+github.com/sjmudd/stopwatch v0.0.0-20170613150411-f380bf8a9be1/go.mod h1:Pgf1sZ2KrHK8vdRTV5UHGp80LT7HMUKuNAiKC402abY=
 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
 github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304 h1:Jpy1PXuP99tXNrhbq2BaPz9B+jNAvH1JPQQpG/9GCXY=
 github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
@ -685,6 +715,8 @@ golang.org/x/net v0.0.0-20191004110552-13f9640d40b9 h1:rjwSpXsdiK0dV8/Naq3kAw9ym
 golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
 golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0=
@ -731,6 +763,8 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae h1:/WDfKMnPU+m5M4xB+6x4kaepxRw6jWvR5iDRdvjHgy8=
 golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@ -817,6 +851,8 @@ gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qS
 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
 gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
 gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
+gopkg.in/gcfg.v1 v1.2.3 h1:m8OOJ4ccYHnx2f4gQwpno8nAX5OGOh7RLaaz0pj3Ogs=
+gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/ini.v1 v1.41.0 h1:Ka3ViY6gNYSKiVy71zXBEqKplnV35ImDLVG+8uoIklE=
@ -831,6 +867,8 @@ gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
 gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
+gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
+gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
 gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
--- a/go/cmd/orchestrator/main.go
+++ b/go/cmd/orchestrator/main.go
@ -0,0 +1,171 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	"vitess.io/vitess/go/vt/orchestrator/app"
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+var AppVersion, GitCommit string
+
+// main is the application's entry point. It will either spawn a CLI or HTTP itnerfaces.
+func main() {
+	configFile := flag.String("config", "", "config file name")
+	command := flag.String("c", "", "command, required. See full list of commands via 'orchestrator -c help'")
+	strict := flag.Bool("strict", false, "strict mode (more checks, slower)")
+	instance := flag.String("i", "", "instance, host_fqdn[:port] (e.g. db.company.com:3306, db.company.com)")
+	sibling := flag.String("s", "", "sibling instance, host_fqdn[:port]")
+	destination := flag.String("d", "", "destination instance, host_fqdn[:port] (synonym to -s)")
+	owner := flag.String("owner", "", "operation owner")
+	reason := flag.String("reason", "", "operation reason")
+	duration := flag.String("duration", "", "maintenance duration (format: 59s, 59m, 23h, 6d, 4w)")
+	pattern := flag.String("pattern", "", "regular expression pattern")
+	clusterAlias := flag.String("alias", "", "cluster alias")
+	pool := flag.String("pool", "", "Pool logical name (applies for pool-related commands)")
+	hostnameFlag := flag.String("hostname", "", "Hostname/fqdn/CNAME/VIP (applies for hostname/resolve related commands)")
+	discovery := flag.Bool("discovery", true, "auto discovery mode")
+	quiet := flag.Bool("quiet", false, "quiet")
+	verbose := flag.Bool("verbose", false, "verbose")
+	debug := flag.Bool("debug", false, "debug mode (very verbose)")
+	stack := flag.Bool("stack", false, "add stack trace upon error")
+	config.RuntimeCLIFlags.SkipBinlogSearch = flag.Bool("skip-binlog-search", false, "when matching via Pseudo-GTID, only use relay logs. This can save the hassle of searching for a non-existend pseudo-GTID entry, for example in servers with replication filters.")
+	config.RuntimeCLIFlags.SkipUnresolve = flag.Bool("skip-unresolve", false, "Do not unresolve a host name")
+	config.RuntimeCLIFlags.SkipUnresolveCheck = flag.Bool("skip-unresolve-check", false, "Skip/ignore checking an unresolve mapping (via hostname_unresolve table) resolves back to same hostname")
+	config.RuntimeCLIFlags.Noop = flag.Bool("noop", false, "Dry run; do not perform destructing operations")
+	config.RuntimeCLIFlags.BinlogFile = flag.String("binlog", "", "Binary log file name")
+	config.RuntimeCLIFlags.Statement = flag.String("statement", "", "Statement/hint")
+	config.RuntimeCLIFlags.GrabElection = flag.Bool("grab-election", false, "Grab leadership (only applies to continuous mode)")
+	config.RuntimeCLIFlags.PromotionRule = flag.String("promotion-rule", "prefer", "Promotion rule for register-andidate (prefer|neutral|prefer_not|must_not)")
+	config.RuntimeCLIFlags.Version = flag.Bool("version", false, "Print version and exit")
+	config.RuntimeCLIFlags.SkipContinuousRegistration = flag.Bool("skip-continuous-registration", false, "Skip cli commands performaing continuous registration (to reduce orchestratrator backend db load")
+	config.RuntimeCLIFlags.EnableDatabaseUpdate = flag.Bool("enable-database-update", false, "Enable database update, overrides SkipOrchestratorDatabaseUpdate")
+	config.RuntimeCLIFlags.IgnoreRaftSetup = flag.Bool("ignore-raft-setup", false, "Override RaftEnabled for CLI invocation (CLI by default not allowed for raft setups). NOTE: operations by CLI invocation may not reflect in all raft nodes.")
+	config.RuntimeCLIFlags.Tag = flag.String("tag", "", "tag to add ('tagname' or 'tagname=tagvalue') or to search ('tagname' or 'tagname=tagvalue' or comma separated 'tag0,tag1=val1,tag2' for intersection of all)")
+	flag.Parse()
+
+	if *destination != "" && *sibling != "" {
+		log.Fatalf("-s and -d are synonyms, yet both were specified. You're probably doing the wrong thing.")
+	}
+	switch *config.RuntimeCLIFlags.PromotionRule {
+	case "prefer", "neutral", "prefer_not", "must_not":
+		{
+			// OK
+		}
+	default:
+		{
+			log.Fatalf("-promotion-rule only supports prefer|neutral|prefer_not|must_not")
+		}
+	}
+	if *destination == "" {
+		*destination = *sibling
+	}
+
+	log.SetLevel(log.ERROR)
+	if *verbose {
+		log.SetLevel(log.INFO)
+	}
+	if *debug {
+		log.SetLevel(log.DEBUG)
+	}
+	if *stack {
+		log.SetPrintStackTrace(*stack)
+	}
+	if *config.RuntimeCLIFlags.Version {
+		fmt.Println(AppVersion)
+		fmt.Println(GitCommit)
+		return
+	}
+
+	startText := "starting orchestrator"
+	if AppVersion != "" {
+		startText += ", version: " + AppVersion
+	}
+	if GitCommit != "" {
+		startText += ", git commit: " + GitCommit
+	}
+	log.Info(startText)
+
+	if len(*configFile) > 0 {
+		config.ForceRead(*configFile)
+	} else {
+		config.Read("/etc/orchestrator.conf.json", "conf/orchestrator.conf.json", "orchestrator.conf.json")
+	}
+	if *config.RuntimeCLIFlags.EnableDatabaseUpdate {
+		config.Config.SkipOrchestratorDatabaseUpdate = false
+	}
+	if config.Config.Debug {
+		log.SetLevel(log.DEBUG)
+	}
+	if *quiet {
+		// Override!!
+		log.SetLevel(log.ERROR)
+	}
+	if config.Config.EnableSyslog {
+		log.EnableSyslogWriter("orchestrator")
+		log.SetSyslogLevel(log.INFO)
+	}
+	if config.Config.AuditToSyslog {
+		inst.EnableAuditSyslog()
+	}
+	config.RuntimeCLIFlags.ConfiguredVersion = AppVersion
+	config.MarkConfigurationLoaded()
+
+	if len(flag.Args()) == 0 && *command == "" {
+		// No command, no argument: just prompt
+		fmt.Println(app.AppPrompt)
+		return
+	}
+
+	helpTopic := ""
+	if flag.Arg(0) == "help" {
+		if flag.Arg(1) != "" {
+			helpTopic = flag.Arg(1)
+		}
+		if helpTopic == "" {
+			helpTopic = *command
+		}
+		if helpTopic == "" {
+			// hacky way to make the CLI kick in as if the user typed `orchestrator -c help cli`
+			*command = "help"
+			flag.Args()[0] = "cli"
+		}
+	}
+
+	switch {
+	case helpTopic != "":
+		app.HelpCommand(helpTopic)
+	case len(flag.Args()) == 0 || flag.Arg(0) == "cli":
+		app.CliWrapper(*command, *strict, *instance, *destination, *owner, *reason, *duration, *pattern, *clusterAlias, *pool, *hostnameFlag)
+	case flag.Arg(0) == "http":
+		app.Http(*discovery)
+	default:
+		fmt.Fprintln(os.Stderr, `Usage:
+  orchestrator --options... [cli|http]
+See complete list of commands:
+  orchestrator -c help
+Full blown documentation:
+  orchestrator`)
+		os.Exit(1)
+	}
+}
--- a/go/vt/orchestrator/agent/agent.go
+++ b/go/vt/orchestrator/agent/agent.go
@ -0,0 +1,82 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package agent
+
+import "vitess.io/vitess/go/vt/orchestrator/inst"
+
+// LogicalVolume describes an LVM volume
+type LogicalVolume struct {
+	Name            string
+	GroupName       string
+	Path            string
+	IsSnapshot      bool
+	SnapshotPercent float64
+}
+
+// Mount describes a file system mount point
+type Mount struct {
+	Path           string
+	Device         string
+	LVPath         string
+	FileSystem     string
+	IsMounted      bool
+	DiskUsage      int64
+	MySQLDataPath  string
+	MySQLDiskUsage int64
+}
+
+// Agent presents the data of an agent
+type Agent struct {
+	Hostname                string
+	Port                    int
+	Token                   string
+	LastSubmitted           string
+	AvailableLocalSnapshots []string
+	AvailableSnapshots      []string
+	LogicalVolumes          []LogicalVolume
+	MountPoint              Mount
+	MySQLRunning            bool
+	MySQLDiskUsage          int64
+	MySQLPort               int64
+	MySQLDatadirDiskFree    int64
+	MySQLErrorLogTail       []string
+}
+
+// SeedOperation makes for the high level data & state of a seed operation
+type SeedOperation struct {
+	SeedId         int64
+	TargetHostname string
+	SourceHostname string
+	StartTimestamp string
+	EndTimestamp   string
+	IsComplete     bool
+	IsSuccessful   bool
+}
+
+// SeedOperationState represents a single state (step) in a seed operation
+type SeedOperationState struct {
+	SeedStateId    int64
+	SeedId         int64
+	StateTimestamp string
+	Action         string
+	ErrorMessage   string
+}
+
+// Build an instance key for a given agent
+func (this *Agent) GetInstance() *inst.InstanceKey {
+	return &inst.InstanceKey{Hostname: this.Hostname, Port: int(this.MySQLPort)}
+}
--- a/go/vt/orchestrator/agent/agent_dao.go
+++ b/go/vt/orchestrator/agent/agent_dao.go
@ -0,0 +1,944 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package agent
+
+import (
+	"crypto/tls"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"net"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+type httpMethodFunc func(uri string) (resp *http.Response, err error)
+
+var SeededAgents chan *Agent = make(chan *Agent)
+
+var httpClient *http.Client
+var httpClientMutex = &sync.Mutex{}
+
+// InitHttpClient gets called once, and initializes httpClient according to config.Config
+func InitHttpClient() {
+	httpClientMutex.Lock()
+	defer httpClientMutex.Unlock()
+
+	if httpClient != nil {
+		return
+	}
+
+	httpTimeout := time.Duration(time.Duration(config.AgentHttpTimeoutSeconds) * time.Second)
+	dialTimeout := func(network, addr string) (net.Conn, error) {
+		return net.DialTimeout(network, addr, httpTimeout)
+	}
+	httpTransport := &http.Transport{
+		TLSClientConfig:       &tls.Config{InsecureSkipVerify: config.Config.AgentSSLSkipVerify},
+		Dial:                  dialTimeout,
+		ResponseHeaderTimeout: httpTimeout,
+	}
+	httpClient = &http.Client{Transport: httpTransport}
+}
+
+// httpGet is a convenience method for getting http response from URL, optionaly skipping SSL cert verification
+func httpGet(url string) (resp *http.Response, err error) {
+	return httpClient.Get(url)
+}
+
+// httpPost is a convenience method for posting text data
+func httpPost(url string, bodyType string, content string) (resp *http.Response, err error) {
+	return httpClient.Post(url, bodyType, strings.NewReader(content))
+}
+
+// AuditAgentOperation creates and writes a new audit entry by given agent
+func auditAgentOperation(auditType string, agent *Agent, message string) error {
+	instanceKey := &inst.InstanceKey{}
+	if agent != nil {
+		instanceKey = &inst.InstanceKey{Hostname: agent.Hostname, Port: int(agent.MySQLPort)}
+	}
+	return inst.AuditOperation(auditType, instanceKey, message)
+}
+
+// readResponse returns the body of an HTTP response
+func readResponse(res *http.Response, err error) ([]byte, error) {
+	if err != nil {
+		return nil, err
+	}
+	defer res.Body.Close()
+
+	body, err := ioutil.ReadAll(res.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if res.Status == "500" {
+		return body, errors.New("Response Status 500")
+	}
+
+	return body, nil
+}
+
+// SubmitAgent submits a new agent for listing
+func SubmitAgent(hostname string, port int, token string) (string, error) {
+	_, err := db.ExecOrchestrator(`
+			replace
+				into host_agent (
+					hostname, port, token, last_submitted, count_mysql_snapshots
+				) VALUES (
+					?, ?, ?, NOW(), 0
+				)
+			`,
+		hostname,
+		port,
+		token,
+	)
+	if err != nil {
+		return "", log.Errore(err)
+	}
+
+	// Try to discover topology instances when an agent submits
+	go DiscoverAgentInstance(hostname, port)
+
+	return hostname, err
+}
+
+// If a mysql port is available, try to discover against it
+func DiscoverAgentInstance(hostname string, port int) error {
+	agent, err := GetAgent(hostname)
+	if err != nil {
+		log.Errorf("Couldn't get agent for %s: %v", hostname, err)
+		return err
+	}
+
+	instanceKey := agent.GetInstance()
+	instance, err := inst.ReadTopologyInstance(instanceKey)
+	if err != nil {
+		log.Errorf("Failed to read topology for %v. err=%+v", instanceKey, err)
+		return err
+	}
+	if instance == nil {
+		log.Errorf("Failed to read topology for %v", instanceKey)
+		return err
+	}
+	log.Infof("Discovered Agent Instance: %v", instance.Key)
+	return nil
+}
+
+// ForgetLongUnseenAgents will remove entries of all agents that have long since been last seen.
+func ForgetLongUnseenAgents() error {
+	_, err := db.ExecOrchestrator(`
+			delete
+				from host_agent
+			where
+				last_submitted < NOW() - interval ? hour`,
+		config.Config.UnseenAgentForgetHours,
+	)
+	return err
+}
+
+// ReadOutdatedAgentsHosts returns agents that need to be updated
+func ReadOutdatedAgentsHosts() ([]string, error) {
+	res := []string{}
+	query := `
+		select
+			hostname
+		from
+			host_agent
+		where
+			IFNULL(last_checked < now() - interval ? minute, 1)
+			`
+	err := db.QueryOrchestrator(query, sqlutils.Args(config.Config.AgentPollMinutes), func(m sqlutils.RowMap) error {
+		hostname := m.GetString("hostname")
+		res = append(res, hostname)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// ReadAgents returns a list of all known agents
+func ReadAgents() ([]Agent, error) {
+	res := []Agent{}
+	query := `
+		select
+			hostname,
+			port,
+			token,
+			last_submitted,
+			mysql_port
+		from
+			host_agent
+		order by
+			hostname
+		`
+	err := db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error {
+		agent := Agent{}
+		agent.Hostname = m.GetString("hostname")
+		agent.Port = m.GetInt("port")
+		agent.MySQLPort = m.GetInt64("mysql_port")
+		agent.Token = ""
+		agent.LastSubmitted = m.GetString("last_submitted")
+
+		res = append(res, agent)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+
+}
+
+// readAgentBasicInfo returns the basic data for an agent directly from backend table (no agent access)
+func readAgentBasicInfo(hostname string) (Agent, string, error) {
+	agent := Agent{}
+	token := ""
+	query := `
+		select
+			hostname,
+			port,
+			token,
+			last_submitted,
+			mysql_port
+		from
+			host_agent
+		where
+			hostname = ?
+		`
+	err := db.QueryOrchestrator(query, sqlutils.Args(hostname), func(m sqlutils.RowMap) error {
+		agent.Hostname = m.GetString("hostname")
+		agent.Port = m.GetInt("port")
+		agent.LastSubmitted = m.GetString("last_submitted")
+		agent.MySQLPort = m.GetInt64("mysql_port")
+		token = m.GetString("token")
+
+		return nil
+	})
+	if err != nil {
+		return agent, "", err
+	}
+
+	if token == "" {
+		return agent, "", log.Errorf("Cannot get agent/token: %s", hostname)
+	}
+	return agent, token, nil
+}
+
+// UpdateAgentLastChecked updates the last_check timestamp in the orchestrator backed database
+// for a given agent
+func UpdateAgentLastChecked(hostname string) error {
+	_, err := db.ExecOrchestrator(`
+        	update
+        		host_agent
+        	set
+        		last_checked = NOW()
+			where
+				hostname = ?`,
+		hostname,
+	)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	return nil
+}
+
+// UpdateAgentInfo  updates some agent state in backend table
+func UpdateAgentInfo(hostname string, agent Agent) error {
+	_, err := db.ExecOrchestrator(`
+        	update
+        		host_agent
+        	set
+        		last_seen = NOW(),
+        		mysql_port = ?,
+        		count_mysql_snapshots = ?
+			where
+				hostname = ?`,
+		agent.MySQLPort,
+		len(agent.LogicalVolumes),
+		hostname,
+	)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	return nil
+}
+
+// baseAgentUri returns the base URI for accessing an agent
+func baseAgentUri(agentHostname string, agentPort int) string {
+	protocol := "http"
+	if config.Config.AgentsUseSSL {
+		protocol = "https"
+	}
+	uri := fmt.Sprintf("%s://%s:%d/api", protocol, agentHostname, agentPort)
+	log.Debugf("orchestrator-agent uri: %s", uri)
+	return uri
+}
+
+// GetAgent gets a single agent status from the agent service. This involves multiple HTTP requests.
+func GetAgent(hostname string) (Agent, error) {
+	agent, token, err := readAgentBasicInfo(hostname)
+	if err != nil {
+		return agent, log.Errore(err)
+	}
+
+	// All seems to be in order. Now make some inquiries from orchestrator-agent service:
+	{
+		uri := baseAgentUri(agent.Hostname, agent.Port)
+		log.Debugf("orchestrator-agent uri: %s", uri)
+
+		{
+			availableLocalSnapshotsUri := fmt.Sprintf("%s/available-snapshots-local?token=%s", uri, token)
+			body, err := readResponse(httpGet(availableLocalSnapshotsUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.AvailableLocalSnapshots)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			availableSnapshotsUri := fmt.Sprintf("%s/available-snapshots?token=%s", uri, token)
+			body, err := readResponse(httpGet(availableSnapshotsUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.AvailableSnapshots)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			lvSnapshotsUri := fmt.Sprintf("%s/lvs-snapshots?token=%s", uri, token)
+			body, err := readResponse(httpGet(lvSnapshotsUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.LogicalVolumes)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			mountUri := fmt.Sprintf("%s/mount?token=%s", uri, token)
+			body, err := readResponse(httpGet(mountUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MountPoint)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			mySQLRunningUri := fmt.Sprintf("%s/mysql-status?token=%s", uri, token)
+			body, err := readResponse(httpGet(mySQLRunningUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MySQLRunning)
+			}
+			// Actually an error is OK here since "status" returns with non-zero exit code when MySQL not running
+		}
+		{
+			mySQLRunningUri := fmt.Sprintf("%s/mysql-port?token=%s", uri, token)
+			body, err := readResponse(httpGet(mySQLRunningUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MySQLPort)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			mySQLDiskUsageUri := fmt.Sprintf("%s/mysql-du?token=%s", uri, token)
+			body, err := readResponse(httpGet(mySQLDiskUsageUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MySQLDiskUsage)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			mySQLDatadirDiskFreeUri := fmt.Sprintf("%s/mysql-datadir-available-space?token=%s", uri, token)
+			body, err := readResponse(httpGet(mySQLDatadirDiskFreeUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MySQLDatadirDiskFree)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+		{
+			errorLogTailUri := fmt.Sprintf("%s/mysql-error-log-tail?token=%s", uri, token)
+			body, err := readResponse(httpGet(errorLogTailUri))
+			if err == nil {
+				err = json.Unmarshal(body, &agent.MySQLErrorLogTail)
+			}
+			if err != nil {
+				log.Errore(err)
+			}
+		}
+	}
+	return agent, err
+}
+
+// executeAgentCommandWithMethodFunc requests an agent to execute a command via HTTP api, either GET or POST,
+// with specific http method implementation by the caller
+func executeAgentCommandWithMethodFunc(hostname string, command string, methodFunc httpMethodFunc, onResponse *func([]byte)) (Agent, error) {
+	agent, token, err := readAgentBasicInfo(hostname)
+	if err != nil {
+		return agent, err
+	}
+
+	// All seems to be in order. Now make some inquiries from orchestrator-agent service:
+	uri := baseAgentUri(agent.Hostname, agent.Port)
+
+	var fullCommand string
+	if strings.Contains(command, "?") {
+		fullCommand = fmt.Sprintf("%s&token=%s", command, token)
+	} else {
+		fullCommand = fmt.Sprintf("%s?token=%s", command, token)
+	}
+	log.Debugf("orchestrator-agent command: %s", fullCommand)
+	agentCommandUri := fmt.Sprintf("%s/%s", uri, fullCommand)
+
+	body, err := readResponse(methodFunc(agentCommandUri))
+	if err != nil {
+		return agent, log.Errore(err)
+	}
+	if onResponse != nil {
+		(*onResponse)(body)
+	}
+	auditAgentOperation("agent-command", &agent, command)
+
+	return agent, err
+}
+
+// executeAgentCommand requests an agent to execute a command via HTTP api
+func executeAgentCommand(hostname string, command string, onResponse *func([]byte)) (Agent, error) {
+	httpFunc := func(uri string) (resp *http.Response, err error) {
+		return httpGet(uri)
+	}
+	return executeAgentCommandWithMethodFunc(hostname, command, httpFunc, onResponse)
+}
+
+// executeAgentPostCommand requests an agent to execute a command via HTTP POST
+func executeAgentPostCommand(hostname string, command string, content string, onResponse *func([]byte)) (Agent, error) {
+	httpFunc := func(uri string) (resp *http.Response, err error) {
+		return httpPost(uri, "text/plain", content)
+	}
+	return executeAgentCommandWithMethodFunc(hostname, command, httpFunc, onResponse)
+}
+
+// Unmount unmounts the designated snapshot mount point
+func Unmount(hostname string) (Agent, error) {
+	return executeAgentCommand(hostname, "umount", nil)
+}
+
+// MountLV requests an agent to mount the given volume on the designated mount point
+func MountLV(hostname string, lv string) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("mountlv?lv=%s", lv), nil)
+}
+
+// RemoveLV requests an agent to remove a snapshot
+func RemoveLV(hostname string, lv string) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("removelv?lv=%s", lv), nil)
+}
+
+// CreateSnapshot requests an agent to create a new snapshot -- a DIY implementation
+func CreateSnapshot(hostname string) (Agent, error) {
+	return executeAgentCommand(hostname, "create-snapshot", nil)
+}
+
+// deleteMySQLDatadir requests an agent to purge the MySQL data directory (step before seed)
+func deleteMySQLDatadir(hostname string) (Agent, error) {
+	return executeAgentCommand(hostname, "delete-mysql-datadir", nil)
+}
+
+// MySQLStop requests an agent to stop MySQL service
+func MySQLStop(hostname string) (Agent, error) {
+	return executeAgentCommand(hostname, "mysql-stop", nil)
+}
+
+// MySQLStart requests an agent to start the MySQL service
+func MySQLStart(hostname string) (Agent, error) {
+	return executeAgentCommand(hostname, "mysql-start", nil)
+}
+
+// ReceiveMySQLSeedData requests an agent to start listening for incoming seed data
+func ReceiveMySQLSeedData(hostname string, seedId int64) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("receive-mysql-seed-data/%d", seedId), nil)
+}
+
+// ReceiveMySQLSeedData requests an agent to start sending seed data
+func SendMySQLSeedData(hostname string, targetHostname string, seedId int64) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("send-mysql-seed-data/%s/%d", targetHostname, seedId), nil)
+}
+
+// ReceiveMySQLSeedData requests an agent to abort seed send/receive (depending on the agent's role)
+func AbortSeedCommand(hostname string, seedId int64) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("abort-seed/%d", seedId), nil)
+}
+
+func CustomCommand(hostname string, cmd string) (output string, err error) {
+	onResponse := func(body []byte) {
+		output = string(body)
+		log.Debugf("output: %v", output)
+	}
+
+	_, err = executeAgentCommand(hostname, fmt.Sprintf("custom-commands/%s", cmd), &onResponse)
+	return output, err
+}
+
+// seedCommandCompleted checks an agent to see if it thinks a seed was completed.
+func seedCommandCompleted(hostname string, seedId int64) (Agent, bool, error) {
+	result := false
+	onResponse := func(body []byte) {
+		json.Unmarshal(body, &result)
+	}
+	agent, err := executeAgentCommand(hostname, fmt.Sprintf("seed-command-completed/%d", seedId), &onResponse)
+	return agent, result, err
+}
+
+// seedCommandCompleted checks an agent to see if it thinks a seed was successful.
+func seedCommandSucceeded(hostname string, seedId int64) (Agent, bool, error) {
+	result := false
+	onResponse := func(body []byte) {
+		json.Unmarshal(body, &result)
+	}
+	agent, err := executeAgentCommand(hostname, fmt.Sprintf("seed-command-succeeded/%d", seedId), &onResponse)
+	return agent, result, err
+}
+
+// AbortSeed will contact agents associated with a seed and request abort.
+func AbortSeed(seedId int64) error {
+	seedOperations, err := AgentSeedDetails(seedId)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	for _, seedOperation := range seedOperations {
+		AbortSeedCommand(seedOperation.TargetHostname, seedId)
+		AbortSeedCommand(seedOperation.SourceHostname, seedId)
+	}
+	updateSeedComplete(seedId, errors.New("Aborted"))
+	return nil
+}
+
+// PostCopy will request an agent to invoke post-copy commands
+func PostCopy(hostname, sourceHostname string) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("post-copy/?sourceHost=%s", sourceHostname), nil)
+}
+
+// SubmitSeedEntry submits a new seed operation entry, returning its unique ID
+func SubmitSeedEntry(targetHostname string, sourceHostname string) (int64, error) {
+	res, err := db.ExecOrchestrator(`
+			insert
+				into agent_seed (
+					target_hostname, source_hostname, start_timestamp
+				) VALUES (
+					?, ?, NOW()
+				)
+			`,
+		targetHostname,
+		sourceHostname,
+	)
+	if err != nil {
+		return 0, log.Errore(err)
+	}
+	id, err := res.LastInsertId()
+
+	return id, err
+}
+
+// updateSeedComplete updates the seed entry, signing for completion
+func updateSeedComplete(seedId int64, seedError error) error {
+	_, err := db.ExecOrchestrator(`
+			update
+				agent_seed
+					set end_timestamp = NOW(),
+					is_complete = 1,
+					is_successful = ?
+				where
+					agent_seed_id = ?
+			`,
+		(seedError == nil),
+		seedId,
+	)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	return nil
+}
+
+// submitSeedStateEntry submits a seed state: a single step in the overall seed process
+func submitSeedStateEntry(seedId int64, action string, errorMessage string) (int64, error) {
+	res, err := db.ExecOrchestrator(`
+			insert
+				into agent_seed_state (
+					agent_seed_id, state_timestamp, state_action, error_message
+				) VALUES (
+					?, NOW(), ?, ?
+				)
+			`,
+		seedId,
+		action,
+		errorMessage,
+	)
+	if err != nil {
+		return 0, log.Errore(err)
+	}
+	id, err := res.LastInsertId()
+
+	return id, err
+}
+
+// updateSeedStateEntry updates seed step state
+func updateSeedStateEntry(seedStateId int64, reason error) error {
+	_, err := db.ExecOrchestrator(`
+			update
+				agent_seed_state
+					set error_message = ?
+				where
+					agent_seed_state_id = ?
+			`,
+		reason.Error(),
+		seedStateId,
+	)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	return reason
+}
+
+// FailStaleSeeds marks as failed seeds where no progress have been seen recently
+func FailStaleSeeds() error {
+	_, err := db.ExecOrchestrator(`
+				update
+						agent_seed
+					set
+						is_complete=1,
+						is_successful=0
+					where
+						is_complete=0
+						and (
+							select
+									max(state_timestamp) as last_state_timestamp
+								from
+									agent_seed_state
+								where
+									agent_seed.agent_seed_id = agent_seed_state.agent_seed_id
+						) < now() - interval ? minute`,
+		config.Config.StaleSeedFailMinutes,
+	)
+	return err
+}
+
+// executeSeed is *the* function for taking a seed. It is a complex operation of testing, preparing, re-testing
+// agents on both sides, initiating data transfer, following up, awaiting completion, diagnosing errors, claning up.
+func executeSeed(seedId int64, targetHostname string, sourceHostname string) error {
+
+	var err error
+	var seedStateId int64
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("getting target agent info for %s", targetHostname), "")
+	targetAgent, err := GetAgent(targetHostname)
+	SeededAgents <- &targetAgent
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("getting source agent info for %s", sourceHostname), "")
+	sourceAgent, err := GetAgent(sourceHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Checking MySQL status on target %s", targetHostname), "")
+	if targetAgent.MySQLRunning {
+		return updateSeedStateEntry(seedStateId, errors.New("MySQL is running on target host. Cowardly refusing to proceeed. Please stop the MySQL service"))
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Looking up available snapshots on source %s", sourceHostname), "")
+	if len(sourceAgent.LogicalVolumes) == 0 {
+		return updateSeedStateEntry(seedStateId, errors.New("No logical volumes found on source host"))
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Checking mount point on source %s", sourceHostname), "")
+	if sourceAgent.MountPoint.IsMounted {
+		return updateSeedStateEntry(seedStateId, errors.New("Volume already mounted on source host; please unmount"))
+	}
+
+	seedFromLogicalVolume := sourceAgent.LogicalVolumes[0]
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("%s Mounting logical volume: %s", sourceHostname, seedFromLogicalVolume.Path), "")
+	_, err = MountLV(sourceHostname, seedFromLogicalVolume.Path)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+	sourceAgent, err = GetAgent(sourceHostname)
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("MySQL data volume on source host %s is %d bytes", sourceHostname, sourceAgent.MountPoint.MySQLDiskUsage), "")
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Erasing MySQL data on %s", targetHostname), "")
+	_, err = deleteMySQLDatadir(targetHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Aquiring target host datadir free space on %s", targetHostname), "")
+	targetAgent, err = GetAgent(targetHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	if sourceAgent.MountPoint.MySQLDiskUsage > targetAgent.MySQLDatadirDiskFree {
+		Unmount(sourceHostname)
+		return updateSeedStateEntry(seedStateId, fmt.Errorf("Not enough disk space on target host %s. Required: %d, available: %d. Bailing out.", targetHostname, sourceAgent.MountPoint.MySQLDiskUsage, targetAgent.MySQLDatadirDiskFree))
+	}
+
+	// ...
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("%s will now receive data in background", targetHostname), "")
+	ReceiveMySQLSeedData(targetHostname, seedId)
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Waiting %d seconds for %s to start listening for incoming data", config.Config.SeedWaitSecondsBeforeSend, targetHostname), "")
+	time.Sleep(time.Duration(config.Config.SeedWaitSecondsBeforeSend) * time.Second)
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("%s will now send data to %s in background", sourceHostname, targetHostname), "")
+	SendMySQLSeedData(sourceHostname, targetHostname, seedId)
+
+	copyComplete := false
+	numStaleIterations := 0
+	var bytesCopied int64 = 0
+
+	for !copyComplete {
+		targetAgentPoll, err := GetAgent(targetHostname)
+		if err != nil {
+			return log.Errore(err)
+		}
+
+		if targetAgentPoll.MySQLDiskUsage == bytesCopied {
+			numStaleIterations++
+		}
+		bytesCopied = targetAgentPoll.MySQLDiskUsage
+
+		copyFailed := false
+		if _, commandCompleted, _ := seedCommandCompleted(targetHostname, seedId); commandCompleted {
+			copyComplete = true
+			if _, commandSucceeded, _ := seedCommandSucceeded(targetHostname, seedId); !commandSucceeded {
+				// failed.
+				copyFailed = true
+			}
+		}
+		if numStaleIterations > 10 {
+			copyFailed = true
+		}
+		if copyFailed {
+			AbortSeedCommand(sourceHostname, seedId)
+			AbortSeedCommand(targetHostname, seedId)
+			Unmount(sourceHostname)
+			return updateSeedStateEntry(seedStateId, errors.New("10 iterations have passed without progress. Bailing out."))
+		}
+
+		var copyPct int64 = 0
+		if sourceAgent.MountPoint.MySQLDiskUsage > 0 {
+			copyPct = 100 * bytesCopied / sourceAgent.MountPoint.MySQLDiskUsage
+		}
+		seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Copied %d/%d bytes (%d%%)", bytesCopied, sourceAgent.MountPoint.MySQLDiskUsage, copyPct), "")
+
+		if !copyComplete {
+			time.Sleep(30 * time.Second)
+		}
+	}
+
+	// Cleanup:
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Executing post-copy command on %s", targetHostname), "")
+	_, err = PostCopy(targetHostname, sourceHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("%s Unmounting logical volume: %s", sourceHostname, seedFromLogicalVolume.Path), "")
+	_, err = Unmount(sourceHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Starting MySQL on target: %s", targetHostname), "")
+	_, err = MySQLStart(targetHostname)
+	if err != nil {
+		return updateSeedStateEntry(seedStateId, err)
+	}
+
+	seedStateId, _ = submitSeedStateEntry(seedId, fmt.Sprintf("Submitting MySQL instance for discovery: %s", targetHostname), "")
+	SeededAgents <- &targetAgent
+
+	seedStateId, _ = submitSeedStateEntry(seedId, "Done", "")
+
+	return nil
+}
+
+// Seed is the entry point for making a seed
+func Seed(targetHostname string, sourceHostname string) (int64, error) {
+	if targetHostname == sourceHostname {
+		return 0, log.Errorf("Cannot seed %s onto itself", targetHostname)
+	}
+	seedId, err := SubmitSeedEntry(targetHostname, sourceHostname)
+	if err != nil {
+		return 0, log.Errore(err)
+	}
+
+	go func() {
+		err := executeSeed(seedId, targetHostname, sourceHostname)
+		updateSeedComplete(seedId, err)
+	}()
+
+	return seedId, nil
+}
+
+// readSeeds reads seed from the backend table
+func readSeeds(whereCondition string, args []interface{}, limit string) ([]SeedOperation, error) {
+	res := []SeedOperation{}
+	query := fmt.Sprintf(`
+		select
+			agent_seed_id,
+			target_hostname,
+			source_hostname,
+			start_timestamp,
+			end_timestamp,
+			is_complete,
+			is_successful
+		from
+			agent_seed
+		%s
+		order by
+			agent_seed_id desc
+		%s
+		`, whereCondition, limit)
+	err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error {
+		seedOperation := SeedOperation{}
+		seedOperation.SeedId = m.GetInt64("agent_seed_id")
+		seedOperation.TargetHostname = m.GetString("target_hostname")
+		seedOperation.SourceHostname = m.GetString("source_hostname")
+		seedOperation.StartTimestamp = m.GetString("start_timestamp")
+		seedOperation.EndTimestamp = m.GetString("end_timestamp")
+		seedOperation.IsComplete = m.GetBool("is_complete")
+		seedOperation.IsSuccessful = m.GetBool("is_successful")
+
+		res = append(res, seedOperation)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// ReadActiveSeedsForHost reads active seeds where host participates either as source or target
+func ReadActiveSeedsForHost(hostname string) ([]SeedOperation, error) {
+	whereCondition := `
+		where
+			is_complete = 0
+			and (
+				target_hostname = ?
+				or source_hostname = ?
+			)
+		`
+	return readSeeds(whereCondition, sqlutils.Args(hostname, hostname), "")
+}
+
+// ReadRecentCompletedSeedsForHost reads active seeds where host participates either as source or target
+func ReadRecentCompletedSeedsForHost(hostname string) ([]SeedOperation, error) {
+	whereCondition := `
+		where
+			is_complete = 1
+			and (
+				target_hostname = ?
+				or source_hostname = ?
+			)
+		`
+	return readSeeds(whereCondition, sqlutils.Args(hostname, hostname), "limit 10")
+}
+
+// AgentSeedDetails reads details from backend table
+func AgentSeedDetails(seedId int64) ([]SeedOperation, error) {
+	whereCondition := `
+		where
+			agent_seed_id = ?
+		`
+	return readSeeds(whereCondition, sqlutils.Args(seedId), "")
+}
+
+// ReadRecentSeeds reads seeds from backend table.
+func ReadRecentSeeds() ([]SeedOperation, error) {
+	return readSeeds(``, sqlutils.Args(), "limit 100")
+}
+
+// SeedOperationState reads states for a given seed operation
+func ReadSeedStates(seedId int64) ([]SeedOperationState, error) {
+	res := []SeedOperationState{}
+	query := `
+		select
+			agent_seed_state_id,
+			agent_seed_id,
+			state_timestamp,
+			state_action,
+			error_message
+		from
+			agent_seed_state
+		where
+			agent_seed_id = ?
+		order by
+			agent_seed_state_id desc
+		`
+	err := db.QueryOrchestrator(query, sqlutils.Args(seedId), func(m sqlutils.RowMap) error {
+		seedState := SeedOperationState{}
+		seedState.SeedStateId = m.GetInt64("agent_seed_state_id")
+		seedState.SeedId = m.GetInt64("agent_seed_id")
+		seedState.StateTimestamp = m.GetString("state_timestamp")
+		seedState.Action = m.GetString("state_action")
+		seedState.ErrorMessage = m.GetString("error_message")
+
+		res = append(res, seedState)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+func RelaylogContentsTail(hostname string, startCoordinates *inst.BinlogCoordinates, onResponse *func([]byte)) (Agent, error) {
+	return executeAgentCommand(hostname, fmt.Sprintf("mysql-relaylog-contents-tail/%s/%d", startCoordinates.LogFile, startCoordinates.LogPos), onResponse)
+}
+
+func ApplyRelaylogContents(hostname string, content string) (Agent, error) {
+	return executeAgentPostCommand(hostname, "apply-relaylog-contents", content, nil)
+}
--- a/go/vt/orchestrator/agent/instance_topology_agent.go
+++ b/go/vt/orchestrator/agent/instance_topology_agent.go
@ -0,0 +1,78 @@
+/*
+   Copyright 2017 GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package agent
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+func SyncReplicaRelayLogs(instance, otherInstance *inst.Instance) (*inst.Instance, error) {
+	var err error
+	var found bool
+	var nextCoordinates *inst.BinlogCoordinates
+	var content string
+	onResponse := func(contentBytes []byte) {
+		json.Unmarshal(contentBytes, &content)
+	}
+	log.Debugf("SyncReplicaRelayLogs: stopping replication")
+
+	if !instance.ReplicationThreadsStopped() {
+		return instance, log.Errorf("SyncReplicaRelayLogs: replication on %+v must not run", instance.Key)
+	}
+	if !otherInstance.ReplicationThreadsStopped() {
+		return instance, log.Errorf("SyncReplicaRelayLogs: replication on %+v must not run", otherInstance.Key)
+	}
+
+	log.Debugf("SyncReplicaRelayLogs: correlating coordinates of %+v on %+v", instance.Key, otherInstance.Key)
+	_, _, nextCoordinates, found, err = inst.CorrelateRelaylogCoordinates(instance, nil, otherInstance)
+	if err != nil {
+		goto Cleanup
+	}
+	if !found {
+		goto Cleanup
+	}
+	log.Debugf("SyncReplicaRelayLogs: correlated next-coordinates are %+v", *nextCoordinates)
+
+	InitHttpClient()
+	if _, err := RelaylogContentsTail(otherInstance.Key.Hostname, nextCoordinates, &onResponse); err != nil {
+		goto Cleanup
+	}
+	log.Debugf("SyncReplicaRelayLogs: got content (%d bytes)", len(content))
+
+	if _, err := ApplyRelaylogContents(instance.Key.Hostname, content); err != nil {
+		goto Cleanup
+	}
+	log.Debugf("SyncReplicaRelayLogs: applied content (%d bytes)", len(content))
+
+	instance, err = inst.ChangeMasterTo(&instance.Key, &otherInstance.MasterKey, &otherInstance.ExecBinlogCoordinates, false, inst.GTIDHintNeutral)
+	if err != nil {
+		goto Cleanup
+	}
+
+Cleanup:
+	if err != nil {
+		return instance, log.Errore(err)
+	}
+	// and we're done (pending deferred functions)
+	inst.AuditOperation("align-via-relaylogs", &instance.Key, fmt.Sprintf("aligned %+v by relaylogs from %+v", instance.Key, otherInstance.Key))
+
+	return instance, err
+}
--- a/go/vt/orchestrator/app/cli.go
+++ b/go/vt/orchestrator/app/cli.go
--- a/go/vt/orchestrator/app/cli_test.go
+++ b/go/vt/orchestrator/app/cli_test.go
@ -0,0 +1,37 @@
+package app
+
+import (
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+func init() {
+	config.Config.HostnameResolveMethod = "none"
+	config.MarkConfigurationLoaded()
+	log.SetLevel(log.ERROR)
+}
+
+func TestHelp(t *testing.T) {
+	Cli("help", false, "localhost:9999", "localhost:9999", "orc", "no-reason", "1m", ".", "no-alias", "no-pool", "")
+	test.S(t).ExpectTrue(len(knownCommands) > 0)
+}
+
+func TestKnownCommands(t *testing.T) {
+	Cli("help", false, "localhost:9999", "localhost:9999", "orc", "no-reason", "1m", ".", "no-alias", "no-pool", "")
+
+	commandsMap := make(map[string]string)
+	for _, command := range knownCommands {
+		commandsMap[command.Command] = command.Section
+	}
+	test.S(t).ExpectEquals(commandsMap["no-such-command"], "")
+	test.S(t).ExpectEquals(commandsMap["relocate"], "Smart relocation")
+	test.S(t).ExpectEquals(commandsMap["relocate-slaves"], "")
+	test.S(t).ExpectEquals(commandsMap["relocate-replicas"], "Smart relocation")
+
+	for _, synonym := range commandSynonyms {
+		test.S(t).ExpectNotEquals(commandsMap[synonym], "")
+	}
+}
--- a/go/vt/orchestrator/app/command_help.go
+++ b/go/vt/orchestrator/app/command_help.go
@ -0,0 +1,907 @@
+/*
+   Copyright 2016 GitHub Inc.
+	 See https://github.com/openark/orchestrator/blob/master/LICENSE
+*/
+
+package app
+
+import (
+	"fmt"
+	"strings"
+)
+
+const AppPrompt string = `
+orchestrator [-c command] [-i instance] [-d destination] [--verbose|--debug] [... cli ] | http
+
+Cheatsheet:
+    Run orchestrator in HTTP mode:
+
+        orchestrator --debug http
+
+    See all possible commands:
+
+        orchestrator help
+
+    Detailed help for a given command (e.g. "relocate")
+
+        orchestrator help relocate
+`
+
+var CommandHelp map[string]string
+
+func init() {
+	CommandHelp = make(map[string]string)
+	CommandHelp["relocate"] = `
+  Relocate a replica beneath another (destination) instance. The choice of destination is almost arbitrary;
+  it must not be a child/descendant of the instance, but otherwise it can be anywhere, and can be a normal replica
+  or a binlog server. Orchestrator will choose the best course of action to relocate the replica.
+  No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.)
+  Examples:
+
+  orchestrator -c relocate -i replica.to.relocate.com -d instance.that.becomes.its.master
+
+  orchestrator -c relocate -d destination.instance.that.becomes.its.master
+      -i not given, implicitly assumed local hostname
+
+  (this command was previously named "relocate-below")
+  `
+	CommandHelp["relocate-replicas"] = `
+  Relocates all or part of the replicas of a given instance under another (destination) instance. This is
+  typically much faster than relocating replicas one by one.
+  Orchestrator chooses the best course of action to relocation the replicas. It may choose a multi-step operations.
+  Some replicas may succeed and some may fail the operation.
+  The instance (replicas' master) itself may be crashed or inaccessible. It is not contacted throughout the operation.
+  Examples:
+
+  orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master
+
+  orchestrator -c relocate-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter
+      only apply to those instances that match given regex
+  `
+	CommandHelp["move-up-replicas"] = `
+  Moves replicas of the given instance one level up the topology, making them siblings of given instance.
+  This is a (faster) shortcut to executing move-up on all replicas of given instance.
+  Examples:
+
+  orchestrator -c move-up-replicas -i replica.whose.subreplicas.will.move.up.com[:3306]
+
+  orchestrator -c move-up-replicas -i replica.whose.subreplicas.will.move.up.com[:3306] --pattern=regexp.filter
+      only apply to those instances that match given regex
+	`
+	CommandHelp["move-below"] = `
+  Moves a replica beneath its sibling. Both replicas must be actively replicating from same master.
+  The sibling will become instance's master. No action taken when sibling cannot act as master
+  (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.)
+  Example:
+
+  orchestrator -c move-below -i replica.to.move.com -d sibling.replica.under.which.to.move.com
+
+  orchestrator -c move-below -d sibling.replica.under.which.to.move.com
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["move-equivalent"] = `
+  Moves a replica beneath another server, based on previously recorded "equivalence coordinates". Such coordinates
+  are obtained whenever orchestrator issues a CHANGE MASTER TO. The "before" and "after" masters coordinates are
+  persisted. In such cases where the newly relocated replica is unable to replicate (e.g. firewall issues) it is then
+  easy to revert the relocation via "move-equivalent".
+  The command works if and only if orchestrator has an exact mapping between the replica's current replication coordinates
+  and some other coordinates.
+  Example:
+
+  orchestrator -c move-equivalent -i replica.to.revert.master.position.com -d master.to.move.to.com
+	`
+	CommandHelp["take-siblings"] = `
+  Turn all siblings of a replica into its sub-replicas. No action taken for siblings that cannot become
+  replicas of given instance (e.g. incompatible versions, binlog format etc.). This is a (faster) shortcut
+  to executing move-below for all siblings of the given instance. Example:
+
+  orchestrator -c take-siblings -i replica.whose.siblings.will.move.below.com
+	`
+	CommandHelp["take-master"] = `
+  Turn an instance into a master of its own master; essentially switch the two. Replicas of each of the two
+  involved instances are unaffected, and continue to replicate as they were.
+  The instance's master must itself be a replica. It does not necessarily have to be actively replicating.
+
+  orchestrator -c take-master -i replica.that.will.switch.places.with.its.master.com
+	`
+	CommandHelp["repoint"] = `
+  Make the given instance replicate from another instance without changing the binglog coordinates. There
+  are little sanity checks to this and this is a risky operation. Use cases are: a rename of the master's
+  host, a corruption in relay-logs, move from beneath MaxScale & Binlog-server. Examples:
+
+  orchestrator -c repoint -i replica.to.operate.on.com -d new.master.com
+
+  orchestrator -c repoint -i replica.to.operate.on.com
+      The above will repoint the replica back to its existing master without change
+
+  orchestrator -c repoint
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["repoint-replicas"] = `
+  Repoint all replicas of given instance to replicate back from the instance. This is a convenience method
+  which implies a one-by-one "repoint" command on each replica.
+
+  orchestrator -c repoint-replicas -i instance.whose.replicas.will.be.repointed.com
+
+  orchestrator -c repoint-replicas
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["make-co-master"] = `
+  Create a master-master replication. Given instance is a replica which replicates directly from a master.
+  The master is then turned to be a replica of the instance. The master is expected to not be a replica.
+  The read_only property of the slve is unaffected by this operation. Examples:
+
+  orchestrator -c make-co-master -i replica.to.turn.into.co.master.com
+
+  orchestrator -c make-co-master
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["get-candidate-replica"] = `
+  Information command suggesting the most up-to-date replica of a given instance, which can be promoted
+  as local master to its siblings. If replication is up and running, this command merely gives an
+  estimate, since replicas advance and progress continuously in different pace. If all replicas of given
+  instance have broken replication (e.g. because given instance is dead), then this command provides
+  with a definitve candidate, which could act as a replace master. See also regroup-replicas. Example:
+
+  orchestrator -c get-candidate-replica -i instance.with.replicas.one.of.which.may.be.candidate.com
+	`
+	CommandHelp["regroup-replicas-bls"] = `
+  Given an instance that has Binlog Servers for replicas, promote one such Binlog Server over its other
+  Binlog Server siblings.
+
+  Example:
+
+  orchestrator -c regroup-replicas-bls -i instance.with.binlog.server.replicas.com
+
+  --debug is your friend.
+	`
+	CommandHelp["move-gtid"] = `
+  Move a replica beneath another (destination) instance. Orchestrator will reject the operation if GTID is
+  not enabled on the replica, or is not supported by the would-be master.
+  You may try and move the replica under any other instance; there are no constraints on the family ties the
+  two may have, though you should be careful as not to try and replicate from a descendant (making an
+  impossible loop).
+  Examples:
+
+  orchestrator -c move-gtid -i replica.to.move.com -d instance.that.becomes.its.master
+
+  orchestrator -c match -d destination.instance.that.becomes.its.master
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["move-replicas-gtid"] = `
+  Moves all replicas of a given instance under another (destination) instance using GTID. This is a (faster)
+  shortcut to moving each replica via "move-gtid".
+  Orchestrator will only move those replica configured with GTID (either Oracle or MariaDB variants) and under the
+  condition the would-be master supports GTID.
+  Examples:
+
+  orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master
+
+  orchestrator -c move-replicas-gtid -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter
+      only apply to those instances that match given regex
+	`
+	CommandHelp["regroup-replicas-gtid"] = `
+  Given an instance (possibly a crashed one; it is never being accessed), pick one of its replica and make it
+  local master of its siblings, using GTID. The rules are similar to those in the "regroup-replicas" command.
+  Example:
+
+  orchestrator -c regroup-replicas-gtid -i instance.with.gtid.and.replicas.one.of.which.will.turn.local.master.if.possible
+
+  --debug is your friend.
+	`
+	CommandHelp["match"] = `
+  Matches a replica beneath another (destination) instance. The choice of destination is almost arbitrary;
+  it must not be a child/descendant of the instance. But otherwise they don't have to be direct siblings,
+  and in fact (if you know what you're doing), they don't actually have to belong to the same topology.
+  The operation expects the relocated instance to be "behind" the destination instance. It only finds out
+  whether this is the case by the end; the operation is cancelled in the event this is not the case.
+  No action taken when destination instance cannot act as master (e.g. has no binary logs, is of incompatible version, incompatible binlog format etc.)
+  Examples:
+
+  orchestrator -c match -i replica.to.relocate.com -d instance.that.becomes.its.master
+
+  orchestrator -c match -d destination.instance.that.becomes.its.master
+      -i not given, implicitly assumed local hostname
+
+  (this command was previously named "match-below")
+	`
+	CommandHelp["match-replicas"] = `
+  Matches all replicas of a given instance under another (destination) instance. This is a (faster) shortcut
+  to matching said replicas one by one under the destination instance. In fact, this bulk operation is highly
+  optimized and can execute in orders of magnitue faster, depeding on the nu,ber of replicas involved and their
+  respective position behind the instance (the more replicas, the more savings).
+  The instance itself may be crashed or inaccessible. It is not contacted throughout the operation. Examples:
+
+  orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master
+
+  orchestrator -c match-replicas -i instance.whose.replicas.will.relocate -d instance.that.becomes.their.master --pattern=regexp.filter
+      only apply to those instances that match given regex
+
+  (this command was previously named "multi-match-replicas")
+	`
+	CommandHelp["match-up"] = `
+  Transport the replica one level up the hierarchy, making it child of its grandparent. This is
+  similar in essence to move-up, only based on Pseudo-GTID. The master of the given instance
+  does not need to be alive or connected (and could in fact be crashed). It is never contacted.
+  Grandparent instance must be alive and accessible.
+  Examples:
+
+  orchestrator -c match-up -i replica.to.match.up.com:3306
+
+  orchestrator -c match-up
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["match-up-replicas"] = `
+  Matches replicas of the given instance one level up the topology, making them siblings of given instance.
+  This is a (faster) shortcut to executing match-up on all replicas of given instance. The instance need
+  not be alive / accessib;e / functional. It can be crashed.
+  Example:
+
+  orchestrator -c match-up-replicas -i replica.whose.subreplicas.will.match.up.com
+
+  orchestrator -c match-up-replicas -i replica.whose.subreplicas.will.match.up.com[:3306] --pattern=regexp.filter
+      only apply to those instances that match given regex
+	`
+	CommandHelp["rematch"] = `
+  Reconnect a replica onto its master, via PSeudo-GTID. The use case for this operation is a non-crash-safe
+  replication configuration (e.g. MySQL 5.5) with sync_binlog=1 and log_slave_updates. This operation
+  implies crash-safe-replication and makes it possible for the replica to reconnect. Example:
+
+  orchestrator -c rematch -i replica.to.rematch.under.its.master
+	`
+	CommandHelp["regroup-replicas"] = `
+  Given an instance (possibly a crashed one; it is never being accessed), pick one of its replica and make it
+  local master of its siblings, using Pseudo-GTID. It is uncertain that there *is* a replica that will be able to
+  become master to all its siblings. But if there is one, orchestrator will pick such one. There are many
+  constraints, most notably the replication positions of all replicas, whether they use log_slave_updates, and
+  otherwise version compatabilities etc.
+  As many replicas that can be regrouped under promoted slves are operated on. The rest are untouched.
+  This command is useful in the event of a crash. For example, in the event that a master dies, this operation
+  can promote a candidate replacement and set up the remaining topology to correctly replicate from that
+  replacement replica. Example:
+
+  orchestrator -c regroup-replicas -i instance.with.replicas.one.of.which.will.turn.local.master.if.possible
+
+  --debug is your friend.
+	`
+
+	CommandHelp["enable-gtid"] = `
+  If possible, enable GTID replication. This works on Oracle (>= 5.6, gtid-mode=1) and MariaDB (>= 10.0).
+  Replication is stopped for a short duration so as to reconfigure as GTID. In case of error replication remains
+  stopped. Example:
+
+  orchestrator -c enable-gtid -i replica.compatible.with.gtid.com
+	`
+	CommandHelp["disable-gtid"] = `
+  Assuming replica replicates via GTID, disable GTID replication and resume standard file:pos replication. Example:
+
+  orchestrator -c disable-gtid -i replica.replicating.via.gtid.com
+	`
+	CommandHelp["reset-master-gtid-remove-own-uuid"] = `
+  Assuming GTID is enabled, Reset master on instance, remove GTID entries generated by the instance.
+  This operation is only allowed on Oracle-GTID enabled servers that have no replicas.
+  Is is used for cleaning up the GTID mess incurred by mistakenly issuing queries on the replica (even such
+  queries as "FLUSH ENGINE LOGS" that happen to write to binary logs). Example:
+
+  orchestrator -c reset-master-gtid-remove-own-uuid -i replica.running.with.gtid.com
+	`
+	CommandHelp["stop-slave"] = `
+  Issues a STOP SLAVE; command. Example:
+
+  orchestrator -c stop-slave -i replica.to.be.stopped.com
+	`
+	CommandHelp["start-slave"] = `
+  Issues a START SLAVE; command. Example:
+
+  orchestrator -c start-slave -i replica.to.be.started.com
+	`
+	CommandHelp["restart-slave"] = `
+  Issues STOP SLAVE + START SLAVE; Example:
+
+  orchestrator -c restart-slave -i replica.to.be.started.com
+	`
+	CommandHelp["skip-query"] = `
+  On a failed replicating replica, skips a single query and attempts to resume replication.
+  Only applies when the replication seems to be broken on SQL thread (e.g. on duplicate
+  key error). Also works in GTID mode. Example:
+
+  orchestrator -c skip-query -i replica.with.broken.sql.thread.com
+	`
+	CommandHelp["reset-slave"] = `
+  Issues a RESET SLAVE command. Destructive to replication. Example:
+
+  orchestrator -c reset-slave -i replica.to.reset.com
+	`
+	CommandHelp["detach-replica"] = `
+  Stops replication and modifies binlog position into an impossible, yet reversible, value.
+  This effectively means the replication becomes broken. See reattach-replica. Example:
+
+  orchestrator -c detach-replica -i replica.whose.replication.will.break.com
+
+  Issuing this on an already detached replica will do nothing.
+	`
+	CommandHelp["reattach-replica"] = `
+  Undo a detach-replica operation. Reverses the binlog change into the original values, and
+  resumes replication. Example:
+
+  orchestrator -c reattach-replica -i detahced.replica.whose.replication.will.amend.com
+
+  Issuing this on an attached (i.e. normal) replica will do nothing.
+	`
+	CommandHelp["detach-replica-master-host"] = `
+  Stops replication and modifies Master_Host into an impossible, yet reversible, value.
+  This effectively means the replication becomes broken. See reattach-replica-master-host. Example:
+
+  orchestrator -c detach-replica-master-host -i replica.whose.replication.will.break.com
+
+  Issuing this on an already detached replica will do nothing.
+	`
+	CommandHelp["reattach-replica-master-host"] = `
+  Undo a detach-replica-master-host operation. Reverses the hostname change into the original value, and
+  resumes replication. Example:
+
+  orchestrator -c reattach-replica-master-host -i detahced.replica.whose.replication.will.amend.com
+
+  Issuing this on an attached (i.e. normal) replica will do nothing.
+	`
+	CommandHelp["restart-slave-statements"] = `
+	Prints a list of statements to execute to stop then restore replica to same execution state.
+	Provide --statement for injected statement.
+	This is useful for issuing a command that can only be executed while replica is stopped. Such
+	commands are any of CHANGE MASTER TO.
+	Orchestrator will not execute given commands, only print them as courtesy. It may not have
+	the privileges to execute them in the first place. Example:
+
+	orchestrator -c restart-slave-statements -i some.replica.com -statement="change master to master_heartbeat_period=5"
+	`
+
+	CommandHelp["set-read-only"] = `
+  Turn an instance read-only, via SET GLOBAL read_only := 1. Examples:
+
+  orchestrator -c set-read-only -i instance.to.turn.read.only.com
+
+  orchestrator -c set-read-only
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["set-writeable"] = `
+  Turn an instance writeable, via SET GLOBAL read_only := 0. Example:
+
+  orchestrator -c set-writeable -i instance.to.turn.writeable.com
+
+  orchestrator -c set-writeable
+      -i not given, implicitly assumed local hostname
+	`
+
+	CommandHelp["flush-binary-logs"] = `
+  Flush binary logs on an instance. Examples:
+
+  orchestrator -c flush-binary-logs -i instance.with.binary.logs.com
+
+  orchestrator -c flush-binary-logs -i instance.with.binary.logs.com --binlog=mysql-bin.002048
+      Flushes binary logs until reaching given number. Fails when current number is larger than input
+	`
+	CommandHelp["purge-binary-logs"] = `
+  Purge binary logs on an instance. Examples:
+
+  orchestrator -c purge-binary-logs -i instance.with.binary.logs.com --binlog mysql-bin.002048
+
+      Purges binary logs until given log
+	`
+	CommandHelp["last-pseudo-gtid"] = `
+  Information command; an authoritative way of detecting whether a Pseudo-GTID event exist for an instance,
+  and if so, output the last Pseudo-GTID entry and its location. Example:
+
+  orchestrator -c last-pseudo-gtid -i instance.with.possible.pseudo-gtid.injection
+	`
+	CommandHelp["find-binlog-entry"] = `
+  Get binlog file:pos of entry given by --pattern (exact full match, not a regular expression) in a given instance.
+  This will search the instance's binary logs starting with most recent, and terminate as soon as an exact match is found.
+  The given input is not a regular expression. It must fully match the entry (not a substring).
+  This is most useful when looking for uniquely identifyable values, such as Pseudo-GTID. Example:
+
+  orchestrator -c find-binlog-entry -i instance.to.search.on.com --pattern "insert into my_data (my_column) values ('distinct_value_01234_56789')"
+
+      Prints out the binlog file:pos where the entry is found, or errors if unfound.
+	`
+	CommandHelp["correlate-binlog-pos"] = `
+  Given an instance (-i) and binlog coordinates (--binlog=file:pos), find the correlated coordinates in another instance (-d).
+  "Correlated coordinates" are those that present the same point-in-time of sequence of binary log events, untangling
+  the mess of different binlog file:pos coordinates on different servers.
+  This operation relies on Pseudo-GTID: your servers must have been pre-injected with PSeudo-GTID entries as these are
+  being used as binlog markers in the correlation process.
+  You must provide a valid file:pos in the binlogs of the source instance (-i), and in response get the correlated
+  coordinates in the binlogs of the destination instance (-d). This operation does not work on relay logs.
+  Example:
+
+  orchestrator -c correlate-binlog-pos  -i instance.with.binary.log.com --binlog=mysql-bin.002366:14127 -d other.instance.with.binary.logs.com
+
+      Prints out correlated coordinates, e.g.: "mysql-bin.002302:14220", or errors out.
+	`
+
+	CommandHelp["submit-pool-instances"] = `
+  Submit a pool name with a list of instances in that pool. This removes any previous instances associated with
+  that pool. Expecting comma delimited list of instances
+
+  orchestrator -c submit-pool-instances --pool name_of_pool -i pooled.instance1.com,pooled.instance2.com:3306,pooled.instance3.com
+	`
+	CommandHelp["cluster-pool-instances"] = `
+  List all pools and their associated instances. Output is in tab delimited format, and lists:
+  cluster_name, cluster_alias, pool_name, pooled instance
+  Example:
+
+  orchestrator -c cluster-pool-instances
+	`
+	CommandHelp["which-heuristic-cluster-pool-instances"] = `
+	List instances belonging to a cluster, which are also in some pool or in a specific given pool.
+	Not all instances are listed: unreachable, downtimed instances ar left out. Only those that should be
+	responsive and healthy are listed. This serves applications in getting information about instances
+	that could be queried (this complements a proxy behavior in providing the *list* of instances).
+	Examples:
+
+	orchestrator -c which-heuristic-cluster-pool-instances --alias mycluster
+			Get the instances of a specific cluster, no specific pool
+
+	orchestrator -c which-heuristic-cluster-pool-instances --alias mycluster --pool some_pool
+			Get the instances of a specific cluster and which belong to a given pool
+
+	orchestrator -c which-heuristic-cluster-pool-instances -i instance.belonging.to.a.cluster
+			Cluster inferred by given instance
+
+	orchestrator -c which-heuristic-cluster-pool-instances
+			Cluster inferred by local hostname
+	`
+
+	CommandHelp["find"] = `
+  Find instances whose hostname matches given regex pattern. Example:
+
+  orchestrator -c find -pattern "backup.*us-east"
+	`
+	CommandHelp["clusters"] = `
+  List all clusters known to orchestrator. A cluster (aka topology, aka chain) is identified by its
+  master (or one of its master if more than one exists). Example:
+
+  orchestrator -c clusters
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["all-clusters-masters"] = `
+  List of writeable masters, one per cluster.
+	For most single-master topologies, this is trivially the master.
+	For active-active master-master topologies, this ensures only one of
+	the masters is returned. Example:
+
+        orchestrator -c all-clusters-masters
+	`
+	CommandHelp["topology"] = `
+  Show an ascii-graph of a replication topology, given a member of that topology. Example:
+
+  orchestrator -c topology -i instance.belonging.to.a.topology.com
+
+  orchestrator -c topology
+      -i not given, implicitly assumed local hostname
+
+  Instance must be already known to orchestrator. Topology is generated by orchestrator's mapping
+  and not from synchronuous investigation of the instances. The generated topology may include
+  instances that are dead, or whose replication is broken.
+	`
+	CommandHelp["all-instances"] = `
+  List the complete known set of instances. Similar to '-c find -pattern "."' Example:
+
+    orchestrator -c all-instances
+	`
+	CommandHelp["which-instance"] = `
+  Output the fully-qualified hostname:port representation of the given instance, or error if unknown
+  to orchestrator. Examples:
+
+  orchestrator -c which-instance -i instance.to.check.com
+
+  orchestrator -c which-instance
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["which-cluster"] = `
+  Output the name of the cluster an instance belongs to, or error if unknown to orchestrator. Examples:
+
+  orchestrator -c which-cluster -i instance.to.check.com
+
+  orchestrator -c which-cluster
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["which-cluster-instances"] = `
+  Output the list of instances participating in same cluster as given instance; output is one line
+  per instance, in hostname:port format. Examples:
+
+  orchestrator -c which-cluster-instances -i instance.to.check.com
+
+  orchestrator -c which-cluster-instances
+      -i not given, implicitly assumed local hostname
+
+  orchestrator -c which-cluster-instances -alias some_alias
+      assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration)
+	`
+	CommandHelp["which-cluster-domain"] = `
+  Output the domain name of given cluster, indicated by instance or alias. This depends on
+	the DetectClusterDomainQuery configuration. Example:
+
+  orchestrator -c which-cluster-domain -i instance.to.check.com
+
+  orchestrator -c which-cluster-domain
+      -i not given, implicitly assumed local hostname
+
+  orchestrator -c which-cluster-domain -alias some_alias
+      assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration)
+	`
+	CommandHelp["which-heuristic-domain-instance"] = `
+	Returns the instance associated as the cluster's writer with a cluster's domain name.
+	Given a cluster, orchestrator looks for the domain name indicated by this cluster, and proceeds to search for
+	a stord key-value attribute for that domain name. This would be the writer host for the given domain.
+	See also set-heuristic-domain-instance, this is meant to be a temporary service mimicking in micro-scale a
+	service discovery functionality.
+	Example:
+
+	orchestrator -c which-heuristic-domain-instance -alias some_alias
+		Detects the domain name for given cluster, reads from key-value store the writer host associated with the domain name.
+
+	orchestrator -c which-heuristic-domain-instance -i instance.of.some.cluster
+		Cluster is inferred by a member instance (the instance is not necessarily the master)
+	`
+	CommandHelp["which-cluster-master"] = `
+	Output the name of the active master in a given cluster, indicated by instance or alias.
+	An "active" master is one that is writable and is not marked as downtimed due to a topology recovery.
+	Examples:
+
+  orchestrator -c which-cluster-master -i instance.to.check.com
+
+  orchestrator -c which-cluster-master
+      -i not given, implicitly assumed local hostname
+
+  orchestrator -c which-cluster-master -alias some_alias
+      assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration)
+	`
+	CommandHelp["which-cluster-osc-replicas"] = `
+  Output a list of replicas in same cluster as given instance, that would server as good candidates as control replicas
+  for a pt-online-schema-change operation.
+  Those replicas would be used for replication delay so as to throtthe osc operation. Selected replicas will include,
+  where possible: intermediate masters, their replicas, 3rd level replicas, direct non-intermediate-master replicas.
+
+  orchestrator -c which-cluster-osc-replicas -i instance.to.check.com
+
+  orchestrator -c which-cluster-osc-replicas
+      -i not given, implicitly assumed local hostname
+
+  orchestrator -c which-cluster-osc-replicas -alias some_alias
+      assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration)
+	`
+	CommandHelp["which-lost-in-recovery"] = `
+	List instances marked as downtimed for being lost in a recovery process. The output of this command lists
+  "lost" instances that probably should be recycled.
+	The topology recovery process injects a magic hint when downtiming lost instances, that is picked up
+	by this command. Examples:
+
+	orchestrator -c which-lost-in-recovery
+			Lists all heuristically-recent known lost instances
+	`
+	CommandHelp["which-master"] = `
+  Output the fully-qualified hostname:port representation of a given instance's master. Examples:
+
+  orchestrator -c which-master -i a.known.replica.com
+
+  orchestrator -c which-master
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["which-replicas"] = `
+  Output the fully-qualified hostname:port list of replicas (one per line) of a given instance (or empty
+  list if instance is not a master to anyone). Examples:
+
+  orchestrator -c which-replicas -i a.known.instance.com
+
+  orchestrator -c which-replicas
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["get-cluster-heuristic-lag"] = `
+  For a given cluster (indicated by an instance or alias), output a heuristic "representative" lag of that cluster.
+  The output is obtained by examining the replicas that are member of "which-cluster-osc-replicas"-command, and
+  getting the maximum replica lag of those replicas. Recall that those replicas are a subset of the entire cluster,
+  and that they are ebing polled periodically. Hence the output of this command is not necessarily up-to-date
+  and does not represent all replicas in cluster. Examples:
+
+  orchestrator -c get-cluster-heuristic-lag -i instance.that.is.part.of.cluster.com
+
+  orchestrator -c get-cluster-heuristic-lag
+      -i not given, implicitly assumed local host, cluster implied
+
+  orchestrator -c get-cluster-heuristic-lag -alias some_alias
+      assuming some_alias is a known cluster alias (see ClusterNameToAlias or DetectClusterAliasQuery configuration)
+	`
+	CommandHelp["instance-status"] = `
+  Output short status on a given instance (name, replication status, noteable configuration). Example2:
+
+  orchestrator -c instance-status -i instance.to.investigate.com
+
+  orchestrator -c instance-status
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["snapshot-topologies"] = `
+  Take a snapshot of existing topologies. This will record minimal replication topology data: the identity
+  of an instance, its master and its cluster.
+  Taking a snapshot later allows for reviewing changes in topologies. One might wish to invoke this command
+  on a daily basis, and later be able to solve questions like 'where was this instacne replicating from before
+  we moved it?', 'which instances were replication from this instance a week ago?' etc. Example:
+
+  orchestrator -c snapshot-topologies
+	`
+
+	CommandHelp["discover"] = `
+  Request that orchestrator cotacts given instance, reads its status, and upsert it into
+  orchestrator's respository. Examples:
+
+  orchestrator -c discover -i instance.to.discover.com:3306
+
+  orchestrator -c discover -i cname.of.instance
+
+  orchestrator -c discover
+      -i not given, implicitly assumed local hostname
+
+  Orchestrator will resolve CNAMEs and VIPs.
+	`
+	CommandHelp["forget"] = `
+  Request that orchestrator removed given instance from its repository. If the instance is alive
+  and connected through replication to otherwise known and live instances, orchestrator will
+  re-discover it by nature of its discovery process. Instances are auto-removed via config's
+  UnseenAgentForgetHours. If you happen to know a machine is decommisioned, for example, it
+  can be nice to remove it from the repository before it auto-expires. Example:
+
+  orchestrator -c forget -i instance.to.forget.com
+
+  Orchestrator will *not* resolve CNAMEs and VIPs for given instance.
+	`
+	CommandHelp["begin-maintenance"] = `
+  Request a maintenance lock on an instance. Topology changes require placing locks on the minimal set of
+  affected instances, so as to avoid an incident of two uncoordinated operations on a smae instance (leading
+  to possible chaos). Locks are placed in the backend database, and so multiple orchestrator instances are safe.
+  Operations automatically acquire locks and release them. This command manually acquires a lock, and will
+  block other operations on the instance until lock is released.
+  Note that orchestrator automatically assumes locks to be expired after MaintenanceExpireMinutes (hard coded value).
+  Examples:
+
+  orchestrator -c begin-maintenance -i instance.to.lock.com --duration=3h --reason="load testing; do not disturb"
+      accepted duration format: 10s, 30m, 24h, 3d, 4w
+
+  orchestrator -c begin-maintenance -i instance.to.lock.com --reason="load testing; do not disturb"
+      --duration not given; default to MaintenanceExpireMinutes (hard coded value)
+	`
+	CommandHelp["end-maintenance"] = `
+  Remove maintenance lock; such lock may have been gained by an explicit begin-maintenance command implicitly
+  by a topology change. You should generally only remove locks you have placed manually; orchestrator will
+  automatically expire locks after MaintenanceExpireMinutes (hard coded value).
+  Example:
+
+  orchestrator -c end-maintenance -i locked.instance.com
+	`
+	CommandHelp["begin-downtime"] = `
+  Mark an instance as downtimed. A downtimed instance is assumed to be taken care of, and recovery-analysis does
+  not apply for such an instance. As result, no recommendation for recovery, and no automated-recovery are issued
+  on a downtimed instance.
+  Downtime is different than maintanence in that it places no lock (mainenance uses an exclusive lock on the instance).
+  It is OK to downtime an instance that is already downtimed -- the new begin-downtime command will override whatever
+  previous downtime attributes there were on downtimes instance.
+  Note that orchestrator automatically assumes downtime to be expired after MaintenanceExpireMinutes (hard coded value).
+  Examples:
+
+  orchestrator -c begin-downtime -i instance.to.downtime.com --duration=3h --reason="dba handling; do not do recovery"
+      accepted duration format: 10s, 30m, 24h, 3d, 4w
+
+  orchestrator -c begin-downtime -i instance.to.lock.com --reason="dba handling; do not do recovery"
+      --duration not given; default to MaintenanceExpireMinutes (hard coded value)
+	`
+	CommandHelp["end-downtime"] = `
+  Indicate an instance is no longer downtimed. Typically you should not need to use this since
+  a downtime is always bounded by a duration and auto-expires. But you may use this to forcibly
+  indicate the active downtime should be expired now.
+  Example:
+
+  orchestrator -c end-downtime -i downtimed.instance.com
+	`
+
+	CommandHelp["recover"] = `
+  Do auto-recovery given a dead instance. Orchestrator chooses the best course of action.
+  The given instance must be acknowledged as dead and have replicas, or else there's nothing to do.
+  See "replication-analysis" command.
+  Orchestrator executes external processes as configured by *Processes variables.
+  --debug is your friend. Example:
+
+  orchestrator -c recover -i dead.instance.com --debug
+	`
+	CommandHelp["recover-lite"] = `
+  Do auto-recovery given a dead instance. Orchestrator chooses the best course of action, exactly
+  as in "-c recover". Orchestratir will *not* execute external processes.
+
+  orchestrator -c recover-lite -i dead.instance.com --debug
+	`
+	CommandHelp["force-master-failover"] = `
+  Forcibly begin a master failover process, even if orchestrator does not see anything wrong
+  in particular with the master.
+  - This will not work in a master-master configuration
+	- Orchestrator just treats this command as a DeadMaster failover scenario
+  - Orchestrator will issue all relevant pre-failover and post-failover external processes.
+  - Orchestrator will not attempt to recover/reconnect the old master
+	`
+	CommandHelp["force-master-takeover"] = `
+	Forcibly discard master and promote another (direct child) instance instead, even if everything is running well.
+	This allows for planned switchover.
+	NOTE:
+	- You must specify the instance to promote via "-d"
+	- Promoted instance must be a direct child of the existing master
+	- This will not work in a master-master configuration
+	- Orchestrator just treats this command as a DeadMaster failover scenario
+	- It is STRONGLY suggested that you first relocate everything below your chosen instance-to-promote.
+	  It *is* a planned failover thing.
+	- Otherwise orchestrator will do its thing in moving instances around, hopefully promoting your requested
+	  server on top.
+	- Orchestrator will issue all relevant pre-failover and post-failover external processes.
+	- In this command orchestrator will not issue 'SET GLOBAL read_only=1' on the existing master, nor will
+	  it issue a 'FLUSH TABLES WITH READ LOCK'. Please see the 'graceful-master-takeover' command.
+	Examples:
+
+	orchestrator -c force-master-takeover -alias mycluster -d immediate.child.of.master.com
+			Indicate cluster by alias. Orchestrator automatically figures out the master
+
+	orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com -d immediate.child.of.master.com
+			Indicate cluster by an instance. You don't structly need to specify the master, orchestrator
+			will infer the master's identify.
+	`
+	CommandHelp["graceful-master-takeover"] = `
+	Gracefully discard master and promote another (direct child) instance instead, even if everything is running well.
+	This allows for planned switchover.
+	NOTE:
+	- Promoted instance must be a direct child of the existing master
+	- Promoted instance must be the *only* direct child of the existing master. It *is* a planned failover thing.
+	- Orchestrator will first issue a "set global read_only=1" on existing master
+	- It will promote candidate master to the binlog positions of the existing master after issuing the above
+	- There _could_ still be statements issued and executed on the existing master by SUPER users, but those are ignored.
+	- Orchestrator then proceeds to handle a DeadMaster failover scenario
+	- Orchestrator will issue all relevant pre-failover and post-failover external processes.
+	Examples:
+
+	orchestrator -c graceful-master-takeover -alias mycluster
+		Indicate cluster by alias. Orchestrator automatically figures out the master and verifies it has a single direct replica
+
+	orchestrator -c force-master-takeover -i instance.in.relevant.cluster.com
+		Indicate cluster by an instance. You don't structly need to specify the master, orchestrator
+		will infer the master's identify.
+	`
+	CommandHelp["replication-analysis"] = `
+  Request an analysis of potential crash incidents in all known topologies.
+  Output format is not yet stabilized and may change in the future. Do not trust the output
+  for automated parsing. Use web API instead, at this time. Example:
+
+  orchestrator -c replication-analysis
+	`
+	CommandHelp["ack-cluster-recoveries"] = `
+  Acknowledge recoveries for a given cluster; this unblocks pending future recoveries.
+  Acknowledging a recovery requires a comment (supply via --reason). Acknowledgement clears the in-active-period
+  flag for affected recoveries, which in turn affects any blocking recoveries.
+  Multiple recoveries may be affected. Only unacknowledged recoveries will be affected.
+  Examples:
+
+  orchestrator -c ack-cluster-recoveries -i instance.in.a.cluster.com --reason="dba has taken taken necessary steps"
+       Cluster is indicated by any of its members. The recovery need not necessarily be on/to given instance.
+
+  orchestrator -c ack-cluster-recoveries -alias some_alias --reason="dba has taken taken necessary steps"
+       Cluster indicated by alias
+	`
+	CommandHelp["ack-instance-recoveries"] = `
+  Acknowledge recoveries for a given instance; this unblocks pending future recoveries.
+  Acknowledging a recovery requires a comment (supply via --reason). Acknowledgement clears the in-active-period
+  flag for affected recoveries, which in turn affects any blocking recoveries.
+  Multiple recoveries may be affected. Only unacknowledged recoveries will be affected.
+  Example:
+
+  orchestrator -c ack-cluster-recoveries -i instance.that.failed.com --reason="dba has taken taken necessary steps"
+	`
+
+	CommandHelp["register-candidate"] = `
+  Indicate that a specific instance is a preferred candidate for master promotion. Upon a dead master
+  recovery, orchestrator will do its best to promote instances that are marked as candidates. However
+  orchestrator cannot guarantee this will always work. Issues like version compatabilities, binlog format
+  etc. are limiting factors.
+  You will want to mark an instance as a candidate when: it is replicating directly from the master, has
+  binary logs and log_slave_updates is enabled, uses same binlog_format as its siblings, compatible version
+  as its siblings. If you're using DataCenterPattern & PhysicalEnvironmentPattern (see configuration),
+  you would further wish to make sure you have a candidate in each data center.
+  Orchestrator first promotes the best-possible replica, and only then replaces it with your candidate,
+  and only if both in same datcenter and physical enviroment.
+  An instance needs to continuously be marked as candidate, so as to make sure orchestrator is not wasting
+  time with stale instances. Orchestrator periodically clears candidate-registration for instances that have
+  not been registeres for over CandidateInstanceExpireMinutes (see config).
+  Example:
+
+  orchestrator -c register-candidate -i candidate.instance.com
+
+  orchestrator -c register-candidate
+      -i not given, implicitly assumed local hostname
+	`
+	CommandHelp["register-hostname-unresolve"] = `
+  Assigns the given instance a virtual (aka "unresolved") name. When moving replicas under an instance with assigned
+  "unresolve" name, orchestrator issues a CHANGE MASTER TO MASTER_HOST='<the unresovled name instead of the fqdn>' ...
+  This is useful in cases where your master is behind virtual IP (e.g. active/passive masters with shared storage or DRBD,
+  e.g. binlog servers sharing common VIP).
+  A "repoint" command is useful after "register-hostname-unresolve": you can repoint replicas of the instance to their exact
+  same location, and orchestrator will swap the fqdn of their master with the unresolved name.
+  Such registration must be periodic. Orchestrator automatically expires such registration after ExpiryHostnameResolvesMinutes.
+  Example:
+
+  orchestrator -c register-hostname-unresolve -i instance.fqdn.com --hostname=virtual.name.com
+	`
+	CommandHelp["deregister-hostname-unresolve"] = `
+  Explicitly deregister/dosassociate a hostname with an "unresolved" name. Orchestrator merely remvoes the association, but does
+  not touch any replica at this point. A "repoint" command can be useful right after calling this command to change replica's master host
+  name (assumed to be an "unresolved" name, such as a VIP) with the real fqdn of the master host.
+  Example:
+
+  orchestrator -c deregister-hostname-unresolve -i instance.fqdn.com
+	`
+	CommandHelp["set-heuristic-domain-instance"] = `
+	This is a temporary (sync your watches, watch for next ice age) command which registers the cluster domain name of a given cluster
+	with the master/writer host for that cluster. It is a one-time-master-discovery operation.
+	At this time orchestrator may also act as a small & simple key-value store (recall the "temporary" indication).
+	Master failover operations will overwrite the domain instance identity. Orchestrator so turns into a mini master-discovery
+	service (I said "TEMPORARY"). Really there are other tools for the job. See also: which-heuristic-domain-instance
+	Example:
+
+	orchestrator -c set-heuristic-domain-instance --alias some_alias
+			Detects the domain name for given cluster, identifies the writer master of the cluster, associates the two in key-value store
+
+	orchestrator -c set-heuristic-domain-instance -i instance.of.some.cluster
+			Cluster is inferred by a member instance (the instance is not necessarily the master)
+	`
+
+	CommandHelp["continuous"] = `
+  Enter continuous mode, and actively poll for instances, diagnose problems, do maintenance etc.
+  This type of work is typically done in HTTP mode. However nothing prevents orchestrator from
+  doing it in command line. Invoking with "continuous" will run indefinitely. Example:
+
+  orchestrator -c continuous
+	`
+	CommandHelp["active-nodes"] = `
+	List orchestrator nodes or processes that are actively running or have most recently
+	executed. Output is in hostname:token format, where "token" is an internal unique identifier
+	of an orchestrator process. Example:
+
+	orchestrator -c active-nodes
+	`
+	CommandHelp["access-token"] = `
+	When running HTTP with "AuthenticationMethod" : "token", receive a new access token.
+	This token must be utilized within "AccessTokenUseExpirySeconds" and can then be used
+	until "AccessTokenExpiryMinutes" have passed.
+	In "token" authentication method a user is read-only unless able to provide with a fresh token.
+	A token may only be used once (two users must get two distinct tokens).
+	Submitting a token is done via "/web/access-token?publicToken=<received_token>". The token is then stored
+	in HTTP cookie.
+
+	orchestrator -c access-token
+	`
+	CommandHelp["reset-hostname-resolve-cache"] = `
+  Clear the hostname resolve cache; it will be refilled by following host discoveries
+
+  orchestrator -c reset-hostname-resolve-cache
+	`
+	CommandHelp["resolve"] = `
+  Utility command to resolve a CNAME and return resolved hostname name. Example:
+
+  orchestrator -c resolve -i cname.to.resolve
+	`
+	CommandHelp["redeploy-internal-db"] = `
+	Force internal schema migration to current backend structure. Orchestrator keeps track of the deployed
+	versions and will not reissue a migration for a version already deployed. Normally you should not use
+	this command, and it is provided mostly for building and testing purposes. Nonetheless it is safe to
+	use and at most it wastes some cycles.
+	`
+
+	for key := range CommandHelp {
+		CommandHelp[key] = strings.Trim(CommandHelp[key], "\n")
+	}
+}
+
+func HelpCommand(command string) {
+	fmt.Println(
+		fmt.Sprintf("%s:\n%s", command, CommandHelp[command]))
+}
--- a/go/vt/orchestrator/app/http.go
+++ b/go/vt/orchestrator/app/http.go
@ -0,0 +1,209 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package app
+
+import (
+	"net"
+	nethttp "net/http"
+	"strings"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/agent"
+	"vitess.io/vitess/go/vt/orchestrator/collection"
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/http"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+	"vitess.io/vitess/go/vt/orchestrator/logic"
+	"vitess.io/vitess/go/vt/orchestrator/process"
+	"vitess.io/vitess/go/vt/orchestrator/ssl"
+
+	"github.com/go-martini/martini"
+	"github.com/martini-contrib/auth"
+	"github.com/martini-contrib/gzip"
+	"github.com/martini-contrib/render"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+const discoveryMetricsName = "DISCOVERY_METRICS"
+
+var sslPEMPassword []byte
+var agentSSLPEMPassword []byte
+var discoveryMetrics *collection.Collection
+
+// Http starts serving
+func Http(continuousDiscovery bool) {
+	promptForSSLPasswords()
+	process.ContinuousRegistration(process.OrchestratorExecutionHttpMode, "")
+
+	martini.Env = martini.Prod
+	if config.Config.ServeAgentsHttp {
+		go agentsHttp()
+	}
+	standardHttp(continuousDiscovery)
+}
+
+// Iterate over the private keys and get passwords for them
+// Don't prompt for a password a second time if the files are the same
+func promptForSSLPasswords() {
+	if ssl.IsEncryptedPEM(config.Config.SSLPrivateKeyFile) {
+		sslPEMPassword = ssl.GetPEMPassword(config.Config.SSLPrivateKeyFile)
+	}
+	if ssl.IsEncryptedPEM(config.Config.AgentSSLPrivateKeyFile) {
+		if config.Config.AgentSSLPrivateKeyFile == config.Config.SSLPrivateKeyFile {
+			agentSSLPEMPassword = sslPEMPassword
+		} else {
+			agentSSLPEMPassword = ssl.GetPEMPassword(config.Config.AgentSSLPrivateKeyFile)
+		}
+	}
+}
+
+// standardHttp starts serving HTTP or HTTPS (api/web) requests, to be used by normal clients
+func standardHttp(continuousDiscovery bool) {
+	m := martini.Classic()
+
+	switch strings.ToLower(config.Config.AuthenticationMethod) {
+	case "basic":
+		{
+			if config.Config.HTTPAuthUser == "" {
+				// Still allowed; may be disallowed in future versions
+				log.Warning("AuthenticationMethod is configured as 'basic' but HTTPAuthUser undefined. Running without authentication.")
+			}
+			m.Use(auth.Basic(config.Config.HTTPAuthUser, config.Config.HTTPAuthPassword))
+		}
+	case "multi":
+		{
+			if config.Config.HTTPAuthUser == "" {
+				// Still allowed; may be disallowed in future versions
+				log.Fatal("AuthenticationMethod is configured as 'multi' but HTTPAuthUser undefined")
+			}
+
+			m.Use(auth.BasicFunc(func(username, password string) bool {
+				if username == "readonly" {
+					// Will be treated as "read-only"
+					return true
+				}
+				return auth.SecureCompare(username, config.Config.HTTPAuthUser) && auth.SecureCompare(password, config.Config.HTTPAuthPassword)
+			}))
+		}
+	default:
+		{
+			// We inject a dummy User object because we have function signatures with User argument in api.go
+			m.Map(auth.User(""))
+		}
+	}
+
+	m.Use(gzip.All())
+	// Render html templates from templates directory
+	m.Use(render.Renderer(render.Options{
+		Directory:       "resources",
+		Layout:          "templates/layout",
+		HTMLContentType: "text/html",
+	}))
+	m.Use(martini.Static("resources/public", martini.StaticOptions{Prefix: config.Config.URLPrefix}))
+	if config.Config.UseMutualTLS {
+		m.Use(ssl.VerifyOUs(config.Config.SSLValidOUs))
+	}
+
+	inst.SetMaintenanceOwner(process.ThisHostname)
+
+	if continuousDiscovery {
+		// start to expire metric collection info
+		discoveryMetrics = collection.CreateOrReturnCollection(discoveryMetricsName)
+		discoveryMetrics.SetExpirePeriod(time.Duration(config.Config.DiscoveryCollectionRetentionSeconds) * time.Second)
+
+		log.Info("Starting Discovery")
+		go logic.ContinuousDiscovery()
+	}
+
+	log.Info("Registering endpoints")
+	http.API.URLPrefix = config.Config.URLPrefix
+	http.Web.URLPrefix = config.Config.URLPrefix
+	http.API.RegisterRequests(m)
+	http.Web.RegisterRequests(m)
+
+	// Serve
+	if config.Config.ListenSocket != "" {
+		log.Infof("Starting HTTP listener on unix socket %v", config.Config.ListenSocket)
+		unixListener, err := net.Listen("unix", config.Config.ListenSocket)
+		if err != nil {
+			log.Fatale(err)
+		}
+		defer unixListener.Close()
+		if err := nethttp.Serve(unixListener, m); err != nil {
+			log.Fatale(err)
+		}
+	} else if config.Config.UseSSL {
+		log.Info("Starting HTTPS listener")
+		tlsConfig, err := ssl.NewTLSConfig(config.Config.SSLCAFile, config.Config.UseMutualTLS)
+		if err != nil {
+			log.Fatale(err)
+		}
+		tlsConfig.InsecureSkipVerify = config.Config.SSLSkipVerify
+		if err = ssl.AppendKeyPairWithPassword(tlsConfig, config.Config.SSLCertFile, config.Config.SSLPrivateKeyFile, sslPEMPassword); err != nil {
+			log.Fatale(err)
+		}
+		if err = ssl.ListenAndServeTLS(config.Config.ListenAddress, m, tlsConfig); err != nil {
+			log.Fatale(err)
+		}
+	} else {
+		log.Infof("Starting HTTP listener on %+v", config.Config.ListenAddress)
+		if err := nethttp.ListenAndServe(config.Config.ListenAddress, m); err != nil {
+			log.Fatale(err)
+		}
+	}
+	log.Info("Web server started")
+}
+
+// agentsHttp startes serving agents HTTP or HTTPS API requests
+func agentsHttp() {
+	m := martini.Classic()
+	m.Use(gzip.All())
+	m.Use(render.Renderer())
+	if config.Config.AgentsUseMutualTLS {
+		m.Use(ssl.VerifyOUs(config.Config.AgentSSLValidOUs))
+	}
+
+	log.Info("Starting agents listener")
+
+	agent.InitHttpClient()
+	go logic.ContinuousAgentsPoll()
+
+	http.AgentsAPI.URLPrefix = config.Config.URLPrefix
+	http.AgentsAPI.RegisterRequests(m)
+
+	// Serve
+	if config.Config.AgentsUseSSL {
+		log.Info("Starting agent HTTPS listener")
+		tlsConfig, err := ssl.NewTLSConfig(config.Config.AgentSSLCAFile, config.Config.AgentsUseMutualTLS)
+		if err != nil {
+			log.Fatale(err)
+		}
+		tlsConfig.InsecureSkipVerify = config.Config.AgentSSLSkipVerify
+		if err = ssl.AppendKeyPairWithPassword(tlsConfig, config.Config.AgentSSLCertFile, config.Config.AgentSSLPrivateKeyFile, agentSSLPEMPassword); err != nil {
+			log.Fatale(err)
+		}
+		if err = ssl.ListenAndServeTLS(config.Config.AgentsServerPort, m, tlsConfig); err != nil {
+			log.Fatale(err)
+		}
+	} else {
+		log.Info("Starting agent HTTP listener")
+		if err := nethttp.ListenAndServe(config.Config.AgentsServerPort, m); err != nil {
+			log.Fatale(err)
+		}
+	}
+	log.Info("Agent server started")
+}
--- a/go/vt/orchestrator/attributes/attributes.go
+++ b/go/vt/orchestrator/attributes/attributes.go
@ -0,0 +1,26 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package attributes
+
+// HostAttributes presnts attributes submitted by a host
+type HostAttributes struct {
+	Hostname        string
+	AttributeName   string
+	AttributeValue  string
+	SubmitTimestamp string
+	ExpireTimestamp string
+}
--- a/go/vt/orchestrator/attributes/attributes_dao.go
+++ b/go/vt/orchestrator/attributes/attributes_dao.go
@ -0,0 +1,164 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package attributes
+
+import (
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"strings"
+
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+)
+
+func readResponse(res *http.Response, err error) ([]byte, error) {
+	if err != nil {
+		return nil, err
+	}
+
+	defer res.Body.Close()
+	body, err := ioutil.ReadAll(res.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	if res.Status == "500" {
+		return body, errors.New("Response Status 500")
+	}
+
+	return body, nil
+}
+
+// SetHostAttributes
+func SetHostAttributes(hostname string, attributeName string, attributeValue string) error {
+	_, err := db.ExecOrchestrator(`
+			replace
+				into host_attributes (
+					hostname, attribute_name, attribute_value, submit_timestamp, expire_timestamp
+				) VALUES (
+					?, ?, ?, NOW(), NULL
+				)
+			`,
+		hostname,
+		attributeName,
+		attributeValue,
+	)
+	if err != nil {
+		return log.Errore(err)
+	}
+
+	return err
+}
+
+func getHostAttributesByClause(whereClause string, args []interface{}) ([]HostAttributes, error) {
+	res := []HostAttributes{}
+	query := fmt.Sprintf(`
+		select
+			hostname,
+			attribute_name,
+			attribute_value,
+			submit_timestamp ,
+			ifnull(expire_timestamp, '') as expire_timestamp
+		from
+			host_attributes
+		%s
+		order by
+			hostname, attribute_name
+		`, whereClause)
+
+	err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error {
+		hostAttributes := HostAttributes{}
+		hostAttributes.Hostname = m.GetString("hostname")
+		hostAttributes.AttributeName = m.GetString("attribute_name")
+		hostAttributes.AttributeValue = m.GetString("attribute_value")
+		hostAttributes.SubmitTimestamp = m.GetString("submit_timestamp")
+		hostAttributes.ExpireTimestamp = m.GetString("expire_timestamp")
+
+		res = append(res, hostAttributes)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// GetHostAttributesByMatch
+func GetHostAttributesByMatch(hostnameMatch string, attributeNameMatch string, attributeValueMatch string) ([]HostAttributes, error) {
+	terms := []string{}
+	args := sqlutils.Args()
+	if hostnameMatch != "" {
+		terms = append(terms, ` hostname rlike ? `)
+		args = append(args, hostnameMatch)
+	}
+	if attributeNameMatch != "" {
+		terms = append(terms, ` attribute_name rlike ? `)
+		args = append(args, attributeNameMatch)
+	}
+	if attributeValueMatch != "" {
+		terms = append(terms, ` attribute_value rlike ? `)
+		args = append(args, attributeValueMatch)
+	}
+
+	if len(terms) == 0 {
+		return getHostAttributesByClause("", args)
+	}
+	whereCondition := fmt.Sprintf(" where %s ", strings.Join(terms, " and "))
+
+	return getHostAttributesByClause(whereCondition, args)
+}
+
+// GetHostAttribute expects to return a single attribute for a given hostname/attribute-name combination
+// or error on empty result
+func GetHostAttribute(hostname string, attributeName string) (string, error) {
+	whereClause := `where hostname=? and attribute_name=?`
+	attributes, err := getHostAttributesByClause(whereClause, sqlutils.Args(hostname, attributeName))
+	if err != nil {
+		return "", err
+	}
+	if len(attributeName) == 0 {
+		return "", log.Errorf("No attribute found for %+v, %+v", hostname, attributeName)
+	}
+	return attributes[0].AttributeValue, nil
+}
+
+// SetGeneralAttribute sets an attribute not associated with a host. Its a key-value thing
+func SetGeneralAttribute(attributeName string, attributeValue string) error {
+	if attributeName == "" {
+		return nil
+	}
+	return SetHostAttributes("*", attributeName, attributeValue)
+}
+
+// GetGeneralAttribute expects to return a single attribute value (not associated with a specific hostname)
+func GetGeneralAttribute(attributeName string) (result string, err error) {
+	return GetHostAttribute("*", attributeName)
+}
+
+// GetHostAttributesByAttribute
+func GetHostAttributesByAttribute(attributeName string, valueMatch string) ([]HostAttributes, error) {
+	if valueMatch == "" {
+		valueMatch = ".?"
+	}
+	whereClause := ` where attribute_name = ? and attribute_value rlike ?`
+
+	return getHostAttributesByClause(whereClause, sqlutils.Args(attributeName, valueMatch))
+}
--- a/go/vt/orchestrator/collection/collection.go
+++ b/go/vt/orchestrator/collection/collection.go
@ -0,0 +1,292 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/*
+
+Package collection holds routines for collecting "high frequency"
+metrics and handling their auto-expiry based on a configured retention
+time. This becomes more interesting as the number of MySQL servers
+monitored by orchestrator increases.
+
+Most monitoring systems look at different metrics over a period
+like 1, 10, 30 or 60 seconds but even at second resolution orchestrator
+may have polled a number of servers.
+
+It can be helpful to collect the raw values, and then allow external
+monitoring to pull via an http api call either pre-cooked aggregate
+data or the raw data for custom analysis over the period requested.
+
+This is expected to be used for the following types of metric:
+
+* discovery metrics (time to poll a MySQL server and collect status)
+* queue metrics (statistics within the discovery queue itself)
+* query metrics (statistics on the number of queries made to the
+  backend MySQL database)
+
+Orchestrator code can just add a new metric without worrying about
+removing it later, and other code which serves API requests can
+pull out the data when needed for the requested time period.
+
+For current metrics two api urls have been provided: one provides
+the raw data and the other one provides a single set of aggregate
+data which is suitable for easy collection by monitoring systems.
+
+Expiry is triggered by default if the collection is created via
+CreateOrReturnCollection() and uses an expiry period of
+DiscoveryCollectionRetentionSeconds. It can also be enabled by
+calling StartAutoExpiration() after setting the required expire
+period with SetExpirePeriod().
+
+This will trigger periodic calls (every second) to ensure the removal
+of metrics which have passed the time specified. Not enabling expiry
+will mean data is collected but never freed which will make
+orchestrator run out of memory eventually.
+
+Current code uses DiscoveryCollectionRetentionSeconds as the
+time to keep metric data.
+
+*/
+package collection
+
+import (
+	"errors"
+	"sync"
+	"time"
+
+	//	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+)
+
+// Metric is an interface containing a metric
+type Metric interface {
+	When() time.Time // when the metric was taken
+}
+
+// Collection contains a collection of Metrics
+type Collection struct {
+	sync.Mutex        // for locking the structure
+	monitoring   bool // am I monitoring the queue size?
+	collection   []Metric
+	done         chan struct{} // to indicate that we are finishing expiry processing
+	expirePeriod time.Duration // time to keep the collection information for
+}
+
+// hard-coded at every second
+const defaultExpireTickerPeriod = time.Second
+
+// backendMetricCollection contains the last N backend "channelled"
+// metrics which can then be accessed via an API call for monitoring.
+var (
+	namedCollection     map[string](*Collection)
+	namedCollectionLock sync.Mutex
+)
+
+func init() {
+	namedCollection = make(map[string](*Collection))
+}
+
+// StopMonitoring stops monitoring all the collections
+func StopMonitoring() {
+	for _, q := range namedCollection {
+		q.StopAutoExpiration()
+	}
+}
+
+// CreateOrReturnCollection allows for creation of a new collection or
+// returning a pointer to an existing one given the name. This allows access
+// to the data structure from the api interface (http/api.go) and also when writing (inst).
+func CreateOrReturnCollection(name string) *Collection {
+	namedCollectionLock.Lock()
+	defer namedCollectionLock.Unlock()
+	if q, found := namedCollection[name]; found {
+		return q
+	}
+
+	qmc := &Collection{
+		collection: nil,
+		done:       make(chan struct{}),
+		// WARNING: use a different configuration name
+		expirePeriod: time.Duration(config.Config.DiscoveryCollectionRetentionSeconds) * time.Second,
+	}
+	go qmc.StartAutoExpiration()
+
+	namedCollection[name] = qmc
+
+	return qmc
+}
+
+// SetExpirePeriod determines after how long the collected data should be removed
+func (c *Collection) SetExpirePeriod(duration time.Duration) {
+	c.Lock()
+	defer c.Unlock()
+
+	c.expirePeriod = duration
+}
+
+// ExpirePeriod returns the currently configured expiration period
+func (c *Collection) ExpirePeriod() time.Duration {
+	c.Lock()
+	defer c.Unlock()
+	return c.expirePeriod
+}
+
+// StopAutoExpiration prepares to stop by terminating the auto-expiration process
+func (c *Collection) StopAutoExpiration() {
+	if c == nil {
+		return
+	}
+	c.Lock()
+	if !c.monitoring {
+		c.Unlock()
+		return
+	}
+	c.monitoring = false
+	c.Unlock()
+
+	// no locking here deliberately
+	c.done <- struct{}{}
+}
+
+// StartAutoExpiration initiates the auto expiry procedure which
+// periodically checks for metrics in the collection which need to
+// be expired according to bc.ExpirePeriod.
+func (c *Collection) StartAutoExpiration() {
+	if c == nil {
+		return
+	}
+	c.Lock()
+	if c.monitoring {
+		c.Unlock()
+		return
+	}
+	c.monitoring = true
+	c.Unlock()
+
+	// log.Infof("StartAutoExpiration: %p with expirePeriod: %v", c, c.expirePeriod)
+	ticker := time.NewTicker(defaultExpireTickerPeriod)
+
+	for {
+		select {
+		case <-ticker.C: // do the periodic expiry
+			c.removeBefore(time.Now().Add(-c.expirePeriod))
+		case <-c.done: // stop the ticker and return
+			ticker.Stop()
+			return
+		}
+	}
+}
+
+// Metrics returns a slice containing all the metric values
+func (c *Collection) Metrics() []Metric {
+	if c == nil {
+		return nil
+	}
+	c.Lock()
+	defer c.Unlock()
+
+	if len(c.collection) == 0 {
+		return nil // nothing to return
+	}
+	return c.collection
+}
+
+// Since returns the Metrics on or after the given time. We assume
+// the metrics are stored in ascending time.
+// Iterate backwards until we reach the first value before the given time
+// or the end of the array.
+func (c *Collection) Since(t time.Time) ([]Metric, error) {
+	if c == nil {
+		return nil, errors.New("Collection.Since: c == nil")
+	}
+	c.Lock()
+	defer c.Unlock()
+	if len(c.collection) == 0 {
+		return nil, nil // nothing to return
+	}
+	last := len(c.collection)
+	first := last - 1
+
+	done := false
+	for !done {
+		if c.collection[first].When().After(t) || c.collection[first].When().Equal(t) {
+			if first == 0 {
+				break // as can't go lower
+			}
+			first--
+		} else {
+			if first != last {
+				first++ // go back one (except if we're already at the end)
+			}
+			break
+		}
+	}
+
+	return c.collection[first:last], nil
+}
+
+// removeBefore is called by StartAutoExpiration and removes collection values
+// before the given time.
+func (c *Collection) removeBefore(t time.Time) error {
+	if c == nil {
+		return errors.New("Collection.removeBefore: c == nil")
+	}
+	c.Lock()
+	defer c.Unlock()
+
+	cLen := len(c.collection)
+	if cLen == 0 {
+		return nil // we have a collection but no data
+	}
+	// remove old data here.
+	first := 0
+	done := false
+	for !done {
+		if c.collection[first].When().Before(t) {
+			first++
+			if first == cLen {
+				break
+			}
+		} else {
+			first--
+			break
+		}
+	}
+
+	// get the interval we need.
+	if first == len(c.collection) {
+		c.collection = nil // remove all entries
+	} else if first != -1 {
+		c.collection = c.collection[first:]
+	}
+	return nil // no errors
+}
+
+// Append a new Metric to the existing collection
+func (c *Collection) Append(m Metric) error {
+	if c == nil {
+		return errors.New("Collection.Append: c == nil")
+	}
+	c.Lock()
+	defer c.Unlock()
+	// we don't want to add nil metrics
+	if c == nil {
+		return errors.New("Collection.Append: c == nil")
+	}
+	c.collection = append(c.collection, m)
+
+	return nil
+}
--- a/go/vt/orchestrator/collection/collection_test.go
+++ b/go/vt/orchestrator/collection/collection_test.go
@ -0,0 +1,104 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package collection
+
+import (
+	"testing"
+	"time"
+)
+
+var randomString = []string{
+	"RANDOM_STRING",
+	"SOME_OTHER_STRING",
+}
+
+// some random base timestamp
+var ts = time.Date(2016, 12, 27, 13, 36, 40, 0, time.Local)
+
+// TestCreateOrReturn tests the creation of a named Collection
+func TestCreateOrReturnCollection(t *testing.T) {
+	name := randomString[0]
+	// check we get the same reference with a single name
+	c1 := CreateOrReturnCollection(name)
+	if c1 == nil {
+		// should not be empty
+		t.Errorf("TestCreateOrReturn: c1 == nil, name=%s", name)
+	}
+	c2 := CreateOrReturnCollection(name)
+	if c2 == nil || c2 != c1 {
+		t.Errorf("TestCreateOrReturn: c2 == nil || c2 != c1")
+		// should not be empty, or different to c1
+	}
+
+	name = randomString[1]
+	// check we get a new reference and it's different to what we had before
+	c3 := CreateOrReturnCollection(name)
+	if c3 == nil || c3 == c1 {
+		// should not be empty, or same as c1
+		t.Errorf("TestCreateOrReturn: c3 == nil || c3 == c1")
+	}
+	c4 := CreateOrReturnCollection(name)
+	// check our reference matches c3 but not c2/c1
+	if c4 == nil || c4 != c3 || c4 == c2 {
+		t.Errorf("TestCreateOrReturn: c3 == nil || c4 != c3 || c4 == c2")
+	}
+}
+
+// TestExpirePeriod checks that the set expire period is returned
+func TestExpirePeriod(t *testing.T) {
+	oneSecond := time.Second
+	twoSeconds := 2 * oneSecond
+
+	// create a new collection
+	c := &Collection{}
+
+	// check if we change it we get back the value we provided
+	c.SetExpirePeriod(oneSecond)
+	if c.ExpirePeriod() != oneSecond {
+		t.Errorf("TestExpirePeriod: did not get back oneSecond")
+	}
+
+	// change the period and check again
+	c.SetExpirePeriod(twoSeconds)
+	if c.ExpirePeriod() != twoSeconds {
+		t.Errorf("TestExpirePeriod: did not get back twoSeconds")
+	}
+}
+
+// dummy structure for testing
+type testMetric struct {
+}
+
+func (tm *testMetric) When() time.Time {
+	return ts
+}
+
+// check that Append() works as expected
+func TestAppend(t *testing.T) {
+	c := &Collection{}
+
+	if len(c.Metrics()) != 0 {
+		t.Errorf("TestAppend: len(Metrics) = %d, expecting %d", len(c.Metrics()), 0)
+	}
+	for _, v := range []int{1, 2, 3} {
+		tm := &testMetric{}
+		c.Append(tm)
+		if len(c.Metrics()) != v {
+			t.Errorf("TestExpirePeriod: len(Metrics) = %d, expecting %d", len(c.Metrics()), v)
+		}
+	}
+}
--- a/go/vt/orchestrator/config/cli_flags.go
+++ b/go/vt/orchestrator/config/cli_flags.go
@ -0,0 +1,37 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package config
+
+// CLIFlags stores some command line flags that are globally available in the process' lifetime
+type CLIFlags struct {
+	Noop                       *bool
+	SkipUnresolve              *bool
+	SkipUnresolveCheck         *bool
+	BinlogFile                 *string
+	GrabElection               *bool
+	Version                    *bool
+	Statement                  *string
+	PromotionRule              *string
+	ConfiguredVersion          string
+	SkipBinlogSearch           *bool
+	SkipContinuousRegistration *bool
+	EnableDatabaseUpdate       *bool
+	IgnoreRaftSetup            *bool
+	Tag                        *string
+}
+
+var RuntimeCLIFlags CLIFlags
--- a/go/vt/orchestrator/config/config.go
+++ b/go/vt/orchestrator/config/config.go
@ -0,0 +1,677 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/url"
+	"os"
+	"regexp"
+	"strings"
+
+	"gopkg.in/gcfg.v1"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+var (
+	envVariableRegexp = regexp.MustCompile("[$][{](.*)[}]")
+)
+
+const (
+	LostInRecoveryDowntimeSeconds int = 60 * 60 * 24 * 365
+	DefaultStatusAPIEndpoint          = "/api/status"
+)
+
+var configurationLoaded chan bool = make(chan bool)
+
+const (
+	HealthPollSeconds                            = 1
+	RaftHealthPollSeconds                        = 10
+	RecoveryPollSeconds                          = 1
+	ActiveNodeExpireSeconds                      = 5
+	BinlogFileHistoryDays                        = 1
+	MaintenanceOwner                             = "orchestrator"
+	AuditPageSize                                = 20
+	MaintenancePurgeDays                         = 7
+	MySQLTopologyMaxPoolConnections              = 3
+	MaintenanceExpireMinutes                     = 10
+	AgentHttpTimeoutSeconds                      = 60
+	PseudoGTIDCoordinatesHistoryHeuristicMinutes = 2
+	DebugMetricsIntervalSeconds                  = 10
+	PseudoGTIDSchema                             = "_pseudo_gtid_"
+	PseudoGTIDIntervalSeconds                    = 5
+	PseudoGTIDExpireMinutes                      = 60
+	StaleInstanceCoordinatesExpireSeconds        = 60
+	CheckAutoPseudoGTIDGrantsIntervalSeconds     = 60
+	SelectTrueQuery                              = "select 1"
+)
+
+var deprecatedConfigurationVariables = []string{
+	"DatabaselessMode__experimental",
+	"BufferBinlogEvents",
+	"BinlogFileHistoryDays",
+	"MaintenanceOwner",
+	"ReadLongRunningQueries",
+	"DiscoveryPollSeconds",
+	"ActiveNodeExpireSeconds",
+	"AuditPageSize",
+	"SlaveStartPostWaitMilliseconds",
+	"MySQLTopologyMaxPoolConnections",
+	"MaintenancePurgeDays",
+	"MaintenanceExpireMinutes",
+	"HttpTimeoutSeconds",
+	"AgentAutoDiscover",
+	"PseudoGTIDCoordinatesHistoryHeuristicMinutes",
+	"PseudoGTIDPreferIndependentMultiMatch",
+	"MaxOutdatedKeysToShow",
+}
+
+// Configuration makes for orchestrator configuration input, which can be provided by user via JSON formatted file.
+// Some of the parameteres have reasonable default values, and some (like database credentials) are
+// strictly expected from user.
+type Configuration struct {
+	Debug                                      bool   // set debug mode (similar to --debug option)
+	EnableSyslog                               bool   // Should logs be directed (in addition) to syslog daemon?
+	ListenAddress                              string // Where orchestrator HTTP should listen for TCP
+	ListenSocket                               string // Where orchestrator HTTP should listen for unix socket (default: empty; when given, TCP is disabled)
+	HTTPAdvertise                              string // optional, for raft setups, what is the HTTP address this node will advertise to its peers (potentially use where behind NAT or when rerouting ports; example: "http://11.22.33.44:3030")
+	AgentsServerPort                           string // port orchestrator agents talk back to
+	MySQLTopologyUser                          string
+	MySQLTopologyPassword                      string
+	MySQLTopologyCredentialsConfigFile         string // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section
+	MySQLTopologySSLPrivateKeyFile             string // Private key file used to authenticate with a Topology mysql instance with TLS
+	MySQLTopologySSLCertFile                   string // Certificate PEM file used to authenticate with a Topology mysql instance with TLS
+	MySQLTopologySSLCAFile                     string // Certificate Authority PEM file used to authenticate with a Topology mysql instance with TLS
+	MySQLTopologySSLSkipVerify                 bool   // If true, do not strictly validate mutual TLS certs for Topology mysql instances
+	MySQLTopologyUseMutualTLS                  bool   // Turn on TLS authentication with the Topology MySQL instances
+	MySQLTopologyUseMixedTLS                   bool   // Mixed TLS and non-TLS authentication with the Topology MySQL instances
+	TLSCacheTTLFactor                          uint   // Factor of InstancePollSeconds that we set as TLS info cache expiry
+	BackendDB                                  string // EXPERIMENTAL: type of backend db; either "mysql" or "sqlite3"
+	SQLite3DataFile                            string // when BackendDB == "sqlite3", full path to sqlite3 datafile
+	SkipOrchestratorDatabaseUpdate             bool   // When true, do not check backend database schema nor attempt to update it. Useful when you may be running multiple versions of orchestrator, and you only wish certain boxes to dictate the db structure (or else any time a different orchestrator version runs it will rebuild database schema)
+	PanicIfDifferentDatabaseDeploy             bool   // When true, and this process finds the orchestrator backend DB was provisioned by a different version, panic
+	RaftEnabled                                bool   // When true, setup orchestrator in a raft consensus layout. When false (default) all Raft* variables are ignored
+	RaftBind                                   string
+	RaftAdvertise                              string
+	RaftDataDir                                string
+	DefaultRaftPort                            int      // if a RaftNodes entry does not specify port, use this one
+	RaftNodes                                  []string // Raft nodes to make initial connection with
+	ExpectFailureAnalysisConcensus             bool
+	MySQLOrchestratorHost                      string
+	MySQLOrchestratorMaxPoolConnections        int // The maximum size of the connection pool to the Orchestrator backend.
+	MySQLOrchestratorPort                      uint
+	MySQLOrchestratorDatabase                  string
+	MySQLOrchestratorUser                      string
+	MySQLOrchestratorPassword                  string
+	MySQLOrchestratorCredentialsConfigFile     string   // my.cnf style configuration file from where to pick credentials. Expecting `user`, `password` under `[client]` section
+	MySQLOrchestratorSSLPrivateKeyFile         string   // Private key file used to authenticate with the Orchestrator mysql instance with TLS
+	MySQLOrchestratorSSLCertFile               string   // Certificate PEM file used to authenticate with the Orchestrator mysql instance with TLS
+	MySQLOrchestratorSSLCAFile                 string   // Certificate Authority PEM file used to authenticate with the Orchestrator mysql instance with TLS
+	MySQLOrchestratorSSLSkipVerify             bool     // If true, do not strictly validate mutual TLS certs for the Orchestrator mysql instances
+	MySQLOrchestratorUseMutualTLS              bool     // Turn on TLS authentication with the Orchestrator MySQL instance
+	MySQLOrchestratorReadTimeoutSeconds        int      // Number of seconds before backend mysql read operation is aborted (driver-side)
+	MySQLOrchestratorRejectReadOnly            bool     // Reject read only connections https://github.com/go-sql-driver/mysql#rejectreadonly
+	MySQLConnectTimeoutSeconds                 int      // Number of seconds before connection is aborted (driver-side)
+	MySQLDiscoveryReadTimeoutSeconds           int      // Number of seconds before topology mysql read operation is aborted (driver-side). Used for discovery queries.
+	MySQLTopologyReadTimeoutSeconds            int      // Number of seconds before topology mysql read operation is aborted (driver-side). Used for all but discovery queries.
+	MySQLConnectionLifetimeSeconds             int      // Number of seconds the mysql driver will keep database connection alive before recycling it
+	DefaultInstancePort                        int      // In case port was not specified on command line
+	SlaveLagQuery                              string   // Synonym to ReplicationLagQuery
+	ReplicationLagQuery                        string   // custom query to check on replica lg (e.g. heartbeat table). Must return a single row with a single numeric column, which is the lag.
+	ReplicationCredentialsQuery                string   // custom query to get replication credentials. Must return a single row, with two text columns: 1st is username, 2nd is password. This is optional, and can be used by orchestrator to configure replication after master takeover or setup of co-masters. You need to ensure the orchestrator user has the privileges to run this query
+	DiscoverByShowSlaveHosts                   bool     // Attempt SHOW SLAVE HOSTS before PROCESSLIST
+	UseSuperReadOnly                           bool     // Should orchestrator super_read_only any time it sets read_only
+	InstancePollSeconds                        uint     // Number of seconds between instance reads
+	InstanceWriteBufferSize                    int      // Instance write buffer size (max number of instances to flush in one INSERT ODKU)
+	BufferInstanceWrites                       bool     // Set to 'true' for write-optimization on backend table (compromise: writes can be stale and overwrite non stale data)
+	InstanceFlushIntervalMilliseconds          int      // Max interval between instance write buffer flushes
+	SkipMaxScaleCheck                          bool     // If you don't ever have MaxScale BinlogServer in your topology (and most people don't), set this to 'true' to save some pointless queries
+	UnseenInstanceForgetHours                  uint     // Number of hours after which an unseen instance is forgotten
+	SnapshotTopologiesIntervalHours            uint     // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled)
+	DiscoveryMaxConcurrency                    uint     // Number of goroutines doing hosts discovery
+	DiscoveryQueueCapacity                     uint     // Buffer size of the discovery queue. Should be greater than the number of DB instances being discovered
+	DiscoveryQueueMaxStatisticsSize            int      // The maximum number of individual secondly statistics taken of the discovery queue
+	DiscoveryCollectionRetentionSeconds        uint     // Number of seconds to retain the discovery collection information
+	DiscoverySeeds                             []string // Hard coded array of hostname:port, ensuring orchestrator discovers these hosts upon startup, assuming not already known to orchestrator
+	InstanceBulkOperationsWaitTimeoutSeconds   uint     // Time to wait on a single instance when doing bulk (many instances) operation
+	HostnameResolveMethod                      string   // Method by which to "normalize" hostname ("none"/"default"/"cname")
+	MySQLHostnameResolveMethod                 string   // Method by which to "normalize" hostname via MySQL server. ("none"/"@@hostname"/"@@report_host"; default "@@hostname")
+	SkipBinlogServerUnresolveCheck             bool     // Skip the double-check that an unresolved hostname resolves back to same hostname for binlog servers
+	ExpiryHostnameResolvesMinutes              int      // Number of minutes after which to expire hostname-resolves
+	RejectHostnameResolvePattern               string   // Regexp pattern for resolved hostname that will not be accepted (not cached, not written to db). This is done to avoid storing wrong resolves due to network glitches.
+	ReasonableReplicationLagSeconds            int      // Above this value is considered a problem
+	ProblemIgnoreHostnameFilters               []string // Will minimize problem visualization for hostnames matching given regexp filters
+	VerifyReplicationFilters                   bool     // Include replication filters check before approving topology refactoring
+	ReasonableMaintenanceReplicationLagSeconds int      // Above this value move-up and move-below are blocked
+	CandidateInstanceExpireMinutes             uint     // Minutes after which a suggestion to use an instance as a candidate replica (to be preferably promoted on master failover) is expired.
+	AuditLogFile                               string   // Name of log file for audit operations. Disabled when empty.
+	AuditToSyslog                              bool     // If true, audit messages are written to syslog
+	AuditToBackendDB                           bool     // If true, audit messages are written to the backend DB's `audit` table (default: true)
+	AuditPurgeDays                             uint     // Days after which audit entries are purged from the database
+	RemoveTextFromHostnameDisplay              string   // Text to strip off the hostname on cluster/clusters pages
+	ReadOnly                                   bool
+	AuthenticationMethod                       string // Type of autherntication to use, if any. "" for none, "basic" for BasicAuth, "multi" for advanced BasicAuth, "proxy" for forwarded credentials via reverse proxy, "token" for token based access
+	OAuthClientId                              string
+	OAuthClientSecret                          string
+	OAuthScopes                                []string
+	HTTPAuthUser                               string            // Username for HTTP Basic authentication (blank disables authentication)
+	HTTPAuthPassword                           string            // Password for HTTP Basic authentication
+	AuthUserHeader                             string            // HTTP header indicating auth user, when AuthenticationMethod is "proxy"
+	PowerAuthUsers                             []string          // On AuthenticationMethod == "proxy", list of users that can make changes. All others are read-only.
+	PowerAuthGroups                            []string          // list of unix groups the authenticated user must be a member of to make changes.
+	AccessTokenUseExpirySeconds                uint              // Time by which an issued token must be used
+	AccessTokenExpiryMinutes                   uint              // Time after which HTTP access token expires
+	ClusterNameToAlias                         map[string]string // map between regex matching cluster name to a human friendly alias
+	DetectClusterAliasQuery                    string            // Optional query (executed on topology instance) that returns the alias of a cluster. Query will only be executed on cluster master (though until the topology's master is resovled it may execute on other/all replicas). If provided, must return one row, one column
+	DetectClusterDomainQuery                   string            // Optional query (executed on topology instance) that returns the VIP/CNAME/Alias/whatever domain name for the master of this cluster. Query will only be executed on cluster master (though until the topology's master is resovled it may execute on other/all replicas). If provided, must return one row, one column
+	DetectInstanceAliasQuery                   string            // Optional query (executed on topology instance) that returns the alias of an instance. If provided, must return one row, one column
+	DetectPromotionRuleQuery                   string            // Optional query (executed on topology instance) that returns the promotion rule of an instance. If provided, must return one row, one column.
+	DataCenterPattern                          string            // Regexp pattern with one group, extracting the datacenter name from the hostname
+	RegionPattern                              string            // Regexp pattern with one group, extracting the region name from the hostname
+	PhysicalEnvironmentPattern                 string            // Regexp pattern with one group, extracting physical environment info from hostname (e.g. combination of datacenter & prod/dev env)
+	DetectDataCenterQuery                      string            // Optional query (executed on topology instance) that returns the data center of an instance. If provided, must return one row, one column. Overrides DataCenterPattern and useful for installments where DC cannot be inferred by hostname
+	DetectRegionQuery                          string            // Optional query (executed on topology instance) that returns the region of an instance. If provided, must return one row, one column. Overrides RegionPattern and useful for installments where Region cannot be inferred by hostname
+	DetectPhysicalEnvironmentQuery             string            // Optional query (executed on topology instance) that returns the physical environment of an instance. If provided, must return one row, one column. Overrides PhysicalEnvironmentPattern and useful for installments where env cannot be inferred by hostname
+	DetectSemiSyncEnforcedQuery                string            // Optional query (executed on topology instance) to determine whether semi-sync is fully enforced for master writes (async fallback is not allowed under any circumstance). If provided, must return one row, one column, value 0 or 1.
+	SupportFuzzyPoolHostnames                  bool              // Should "submit-pool-instances" command be able to pass list of fuzzy instances (fuzzy means non-fqdn, but unique enough to recognize). Defaults 'true', implies more queries on backend db
+	InstancePoolExpiryMinutes                  uint              // Time after which entries in database_instance_pool are expired (resubmit via `submit-pool-instances`)
+	PromotionIgnoreHostnameFilters             []string          // Orchestrator will not promote replicas with hostname matching pattern (via -c recovery; for example, avoid promoting dev-dedicated machines)
+	ServeAgentsHttp                            bool              // Spawn another HTTP interface dedicated for orchestrator-agent
+	AgentsUseSSL                               bool              // When "true" orchestrator will listen on agents port with SSL as well as connect to agents via SSL
+	AgentsUseMutualTLS                         bool              // When "true" Use mutual TLS for the server to agent communication
+	AgentSSLSkipVerify                         bool              // When using SSL for the Agent, should we ignore SSL certification error
+	AgentSSLPrivateKeyFile                     string            // Name of Agent SSL private key file, applies only when AgentsUseSSL = true
+	AgentSSLCertFile                           string            // Name of Agent SSL certification file, applies only when AgentsUseSSL = true
+	AgentSSLCAFile                             string            // Name of the Agent Certificate Authority file, applies only when AgentsUseSSL = true
+	AgentSSLValidOUs                           []string          // Valid organizational units when using mutual TLS to communicate with the agents
+	UseSSL                                     bool              // Use SSL on the server web port
+	UseMutualTLS                               bool              // When "true" Use mutual TLS for the server's web and API connections
+	SSLSkipVerify                              bool              // When using SSL, should we ignore SSL certification error
+	SSLPrivateKeyFile                          string            // Name of SSL private key file, applies only when UseSSL = true
+	SSLCertFile                                string            // Name of SSL certification file, applies only when UseSSL = true
+	SSLCAFile                                  string            // Name of the Certificate Authority file, applies only when UseSSL = true
+	SSLValidOUs                                []string          // Valid organizational units when using mutual TLS
+	StatusEndpoint                             string            // Override the status endpoint.  Defaults to '/api/status'
+	StatusOUVerify                             bool              // If true, try to verify OUs when Mutual TLS is on.  Defaults to false
+	AgentPollMinutes                           uint              // Minutes between agent polling
+	UnseenAgentForgetHours                     uint              // Number of hours after which an unseen agent is forgotten
+	StaleSeedFailMinutes                       uint              // Number of minutes after which a stale (no progress) seed is considered failed.
+	SeedAcceptableBytesDiff                    int64             // Difference in bytes between seed source & target data size that is still considered as successful copy
+	SeedWaitSecondsBeforeSend                  int64             // Number of seconds for waiting before start send data command on agent
+	AutoPseudoGTID                             bool              // Should orchestrator automatically inject Pseudo-GTID entries to the masters
+	PseudoGTIDPattern                          string            // Pattern to look for in binary logs that makes for a unique entry (pseudo GTID). When empty, Pseudo-GTID based refactoring is disabled.
+	PseudoGTIDPatternIsFixedSubstring          bool              // If true, then PseudoGTIDPattern is not treated as regular expression but as fixed substring, and can boost search time
+	PseudoGTIDMonotonicHint                    string            // subtring in Pseudo-GTID entry which indicates Pseudo-GTID entries are expected to be monotonically increasing
+	DetectPseudoGTIDQuery                      string            // Optional query which is used to authoritatively decide whether pseudo gtid is enabled on instance
+	BinlogEventsChunkSize                      int               // Chunk size (X) for SHOW BINLOG|RELAYLOG EVENTS LIMIT ?,X statements. Smaller means less locking and mroe work to be done
+	SkipBinlogEventsContaining                 []string          // When scanning/comparing binlogs for Pseudo-GTID, skip entries containing given texts. These are NOT regular expressions (would consume too much CPU while scanning binlogs), just substrings to find.
+	ReduceReplicationAnalysisCount             bool              // When true, replication analysis will only report instances where possibility of handled problems is possible in the first place (e.g. will not report most leaf nodes, that are mostly uninteresting). When false, provides an entry for every known instance
+	FailureDetectionPeriodBlockMinutes         int               // The time for which an instance's failure discovery is kept "active", so as to avoid concurrent "discoveries" of the instance's failure; this preceeds any recovery process, if any.
+	RecoveryPeriodBlockMinutes                 int               // (supported for backwards compatibility but please use newer `RecoveryPeriodBlockSeconds` instead) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
+	RecoveryPeriodBlockSeconds                 int               // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
+	RecoveryIgnoreHostnameFilters              []string          // Recovery analysis will completely ignore hosts matching given patterns
+	RecoverMasterClusterFilters                []string          // Only do master recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything)
+	RecoverIntermediateMasterClusterFilters    []string          // Only do IM recovery on clusters matching these regexp patterns (of course the ".*" pattern matches everything)
+	ProcessesShellCommand                      string            // Shell that executes command scripts
+	OnFailureDetectionProcesses                []string          // Processes to execute when detecting a failover scenario (before making a decision whether to failover or not). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {autoMasterRecovery}, {autoIntermediateMasterRecovery}
+	PreGracefulTakeoverProcesses               []string          // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
+	PreFailoverProcesses                       []string          // Processes to execute before doing a failover (aborting operation should any once of them exits with non-zero code; order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {countReplicas}, {replicaHosts}, {isDowntimed}
+	PostFailoverProcesses                      []string          // Processes to execute after doing a failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
+	PostUnsuccessfulFailoverProcesses          []string          // Processes to execute after a not-completely-successful failover (order of execution undefined). May and should use some of these placeholders: {failureType}, {instanceType}, {isMaster}, {isCoMaster}, {failureDescription}, {command}, {failedHost}, {failureCluster}, {failureClusterAlias}, {failureClusterDomain}, {failedPort}, {successorHost}, {successorPort}, {successorAlias}, {countReplicas}, {replicaHosts}, {isDowntimed}, {isSuccessful}, {lostReplicas}, {countLostReplicas}
+	PostMasterFailoverProcesses                []string          // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses
+	PostIntermediateMasterFailoverProcesses    []string          // Processes to execute after doing a master failover (order of execution undefined). Uses same placeholders as PostFailoverProcesses
+	PostGracefulTakeoverProcesses              []string          // Processes to execute after runnign a graceful master takeover. Uses same placeholders as PostFailoverProcesses
+	PostTakeMasterProcesses                    []string          // Processes to execute after a successful Take-Master event has taken place
+	CoMasterRecoveryMustPromoteOtherCoMaster   bool              // When 'false', anything can get promoted (and candidates are prefered over others). When 'true', orchestrator will promote the other co-master or else fail
+	DetachLostSlavesAfterMasterFailover        bool              // synonym to DetachLostReplicasAfterMasterFailover
+	DetachLostReplicasAfterMasterFailover      bool              // Should replicas that are not to be lost in master recovery (i.e. were more up-to-date than promoted replica) be forcibly detached
+	ApplyMySQLPromotionAfterMasterFailover     bool              // Should orchestrator take upon itself to apply MySQL master promotion: set read_only=0, detach replication, etc.
+	PreventCrossDataCenterMasterFailover       bool              // When true (default: false), cross-DC master failover are not allowed, orchestrator will do all it can to only fail over within same DC, or else not fail over at all.
+	PreventCrossRegionMasterFailover           bool              // When true (default: false), cross-region master failover are not allowed, orchestrator will do all it can to only fail over within same region, or else not fail over at all.
+	MasterFailoverLostInstancesDowntimeMinutes uint              // Number of minutes to downtime any server that was lost after a master failover (including failed master & lost replicas). 0 to disable
+	MasterFailoverDetachSlaveMasterHost        bool              // synonym to MasterFailoverDetachReplicaMasterHost
+	MasterFailoverDetachReplicaMasterHost      bool              // Should orchestrator issue a detach-replica-master-host on newly promoted master (this makes sure the new master will not attempt to replicate old master if that comes back to life). Defaults 'false'. Meaningless if ApplyMySQLPromotionAfterMasterFailover is 'true'.
+	FailMasterPromotionOnLagMinutes            uint              // when > 0, fail a master promotion if the candidate replica is lagging >= configured number of minutes.
+	FailMasterPromotionIfSQLThreadNotUpToDate  bool              // when true, and a master failover takes place, if candidate master has not consumed all relay logs, promotion is aborted with error
+	DelayMasterPromotionIfSQLThreadNotUpToDate bool              // when true, and a master failover takes place, if candidate master has not consumed all relay logs, delay promotion until the sql thread has caught up
+	PostponeSlaveRecoveryOnLagMinutes          uint              // Synonym to PostponeReplicaRecoveryOnLagMinutes
+	PostponeReplicaRecoveryOnLagMinutes        uint              // On crash recovery, replicas that are lagging more than given minutes are only resurrected late in the recovery process, after master/IM has been elected and processes executed. Value of 0 disables this feature
+	OSCIgnoreHostnameFilters                   []string          // OSC replicas recommendation will ignore replica hostnames matching given patterns
+	GraphiteAddr                               string            // Optional; address of graphite port. If supplied, metrics will be written here
+	GraphitePath                               string            // Prefix for graphite path. May include {hostname} magic placeholder
+	GraphiteConvertHostnameDotsToUnderscores   bool              // If true, then hostname's dots are converted to underscores before being used in graphite path
+	GraphitePollSeconds                        int               // Graphite writes interval. 0 disables.
+	URLPrefix                                  string            // URL prefix to run orchestrator on non-root web path, e.g. /orchestrator to put it behind nginx.
+	DiscoveryIgnoreReplicaHostnameFilters      []string          // Regexp filters to apply to prevent auto-discovering new replicas. Usage: unreachable servers due to firewalls, applications which trigger binlog dumps
+	DiscoveryIgnoreMasterHostnameFilters       []string          // Regexp filters to apply to prevent auto-discovering a master. Usage: pointing your master temporarily to replicate seom data from external host
+	DiscoveryIgnoreHostnameFilters             []string          // Regexp filters to apply to prevent discovering instances of any kind
+	ConsulAddress                              string            // Address where Consul HTTP api is found. Example: 127.0.0.1:8500
+	ConsulScheme                               string            // Scheme (http or https) for Consul
+	ConsulAclToken                             string            // ACL token used to write to Consul KV
+	ConsulCrossDataCenterDistribution          bool              // should orchestrator automatically auto-deduce all consul DCs and write KVs in all DCs
+	ZkAddress                                  string            // UNSUPPERTED YET. Address where (single or multiple) ZooKeeper servers are found, in `srv1[:port1][,srv2[:port2]...]` format. Default port is 2181. Example: srv-a,srv-b:12181,srv-c
+	KVClusterMasterPrefix                      string            // Prefix to use for clusters' masters entries in KV stores (internal, consul, ZK), default: "mysql/master"
+	WebMessage                                 string            // If provided, will be shown on all web pages below the title bar
+	MaxConcurrentReplicaOperations             int               // Maximum number of concurrent operations on replicas
+}
+
+// ToJSONString will marshal this configuration as JSON
+func (this *Configuration) ToJSONString() string {
+	b, _ := json.Marshal(this)
+	return string(b)
+}
+
+// Config is *the* configuration instance, used globally to get configuration data
+var Config = newConfiguration()
+var readFileNames []string
+
+func newConfiguration() *Configuration {
+	return &Configuration{
+		Debug:                                      false,
+		EnableSyslog:                               false,
+		ListenAddress:                              ":3000",
+		ListenSocket:                               "",
+		HTTPAdvertise:                              "",
+		AgentsServerPort:                           ":3001",
+		StatusEndpoint:                             DefaultStatusAPIEndpoint,
+		StatusOUVerify:                             false,
+		BackendDB:                                  "mysql",
+		SQLite3DataFile:                            "",
+		SkipOrchestratorDatabaseUpdate:             false,
+		PanicIfDifferentDatabaseDeploy:             false,
+		RaftBind:                                   "127.0.0.1:10008",
+		RaftAdvertise:                              "",
+		RaftDataDir:                                "",
+		DefaultRaftPort:                            10008,
+		RaftNodes:                                  []string{},
+		ExpectFailureAnalysisConcensus:             true,
+		MySQLOrchestratorMaxPoolConnections:        128, // limit concurrent conns to backend DB
+		MySQLOrchestratorPort:                      3306,
+		MySQLTopologyUseMutualTLS:                  false,
+		MySQLTopologyUseMixedTLS:                   true,
+		MySQLOrchestratorUseMutualTLS:              false,
+		MySQLConnectTimeoutSeconds:                 2,
+		MySQLOrchestratorReadTimeoutSeconds:        30,
+		MySQLOrchestratorRejectReadOnly:            false,
+		MySQLDiscoveryReadTimeoutSeconds:           10,
+		MySQLTopologyReadTimeoutSeconds:            600,
+		MySQLConnectionLifetimeSeconds:             0,
+		DefaultInstancePort:                        3306,
+		TLSCacheTTLFactor:                          100,
+		InstancePollSeconds:                        5,
+		InstanceWriteBufferSize:                    100,
+		BufferInstanceWrites:                       false,
+		InstanceFlushIntervalMilliseconds:          100,
+		SkipMaxScaleCheck:                          true,
+		UnseenInstanceForgetHours:                  240,
+		SnapshotTopologiesIntervalHours:            0,
+		DiscoverByShowSlaveHosts:                   false,
+		UseSuperReadOnly:                           false,
+		DiscoveryMaxConcurrency:                    300,
+		DiscoveryQueueCapacity:                     100000,
+		DiscoveryQueueMaxStatisticsSize:            120,
+		DiscoveryCollectionRetentionSeconds:        120,
+		DiscoverySeeds:                             []string{},
+		InstanceBulkOperationsWaitTimeoutSeconds:   10,
+		HostnameResolveMethod:                      "default",
+		MySQLHostnameResolveMethod:                 "@@hostname",
+		SkipBinlogServerUnresolveCheck:             true,
+		ExpiryHostnameResolvesMinutes:              60,
+		RejectHostnameResolvePattern:               "",
+		ReasonableReplicationLagSeconds:            10,
+		ProblemIgnoreHostnameFilters:               []string{},
+		VerifyReplicationFilters:                   false,
+		ReasonableMaintenanceReplicationLagSeconds: 20,
+		CandidateInstanceExpireMinutes:             60,
+		AuditLogFile:                               "",
+		AuditToSyslog:                              false,
+		AuditToBackendDB:                           false,
+		AuditPurgeDays:                             7,
+		RemoveTextFromHostnameDisplay:              "",
+		ReadOnly:                                   false,
+		AuthenticationMethod:                       "",
+		HTTPAuthUser:                               "",
+		HTTPAuthPassword:                           "",
+		AuthUserHeader:                             "X-Forwarded-User",
+		PowerAuthUsers:                             []string{"*"},
+		PowerAuthGroups:                            []string{},
+		AccessTokenUseExpirySeconds:                60,
+		AccessTokenExpiryMinutes:                   1440,
+		ClusterNameToAlias:                         make(map[string]string),
+		DetectClusterAliasQuery:                    "",
+		DetectClusterDomainQuery:                   "",
+		DetectInstanceAliasQuery:                   "",
+		DetectPromotionRuleQuery:                   "",
+		DataCenterPattern:                          "",
+		PhysicalEnvironmentPattern:                 "",
+		DetectDataCenterQuery:                      "",
+		DetectPhysicalEnvironmentQuery:             "",
+		DetectSemiSyncEnforcedQuery:                "",
+		SupportFuzzyPoolHostnames:                  true,
+		InstancePoolExpiryMinutes:                  60,
+		PromotionIgnoreHostnameFilters:             []string{},
+		ServeAgentsHttp:                            false,
+		AgentsUseSSL:                               false,
+		AgentsUseMutualTLS:                         false,
+		AgentSSLValidOUs:                           []string{},
+		AgentSSLSkipVerify:                         false,
+		AgentSSLPrivateKeyFile:                     "",
+		AgentSSLCertFile:                           "",
+		AgentSSLCAFile:                             "",
+		UseSSL:                                     false,
+		UseMutualTLS:                               false,
+		SSLValidOUs:                                []string{},
+		SSLSkipVerify:                              false,
+		SSLPrivateKeyFile:                          "",
+		SSLCertFile:                                "",
+		SSLCAFile:                                  "",
+		AgentPollMinutes:                           60,
+		UnseenAgentForgetHours:                     6,
+		StaleSeedFailMinutes:                       60,
+		SeedAcceptableBytesDiff:                    8192,
+		SeedWaitSecondsBeforeSend:                  2,
+		AutoPseudoGTID:                             false,
+		PseudoGTIDPattern:                          "",
+		PseudoGTIDPatternIsFixedSubstring:          false,
+		PseudoGTIDMonotonicHint:                    "",
+		DetectPseudoGTIDQuery:                      "",
+		BinlogEventsChunkSize:                      10000,
+		SkipBinlogEventsContaining:                 []string{},
+		ReduceReplicationAnalysisCount:             true,
+		FailureDetectionPeriodBlockMinutes:         60,
+		RecoveryPeriodBlockMinutes:                 60,
+		RecoveryPeriodBlockSeconds:                 3600,
+		RecoveryIgnoreHostnameFilters:              []string{},
+		RecoverMasterClusterFilters:                []string{},
+		RecoverIntermediateMasterClusterFilters:    []string{},
+		ProcessesShellCommand:                      "bash",
+		OnFailureDetectionProcesses:                []string{},
+		PreGracefulTakeoverProcesses:               []string{},
+		PreFailoverProcesses:                       []string{},
+		PostMasterFailoverProcesses:                []string{},
+		PostIntermediateMasterFailoverProcesses:    []string{},
+		PostFailoverProcesses:                      []string{},
+		PostUnsuccessfulFailoverProcesses:          []string{},
+		PostGracefulTakeoverProcesses:              []string{},
+		PostTakeMasterProcesses:                    []string{},
+		CoMasterRecoveryMustPromoteOtherCoMaster:   true,
+		DetachLostSlavesAfterMasterFailover:        true,
+		ApplyMySQLPromotionAfterMasterFailover:     true,
+		PreventCrossDataCenterMasterFailover:       false,
+		PreventCrossRegionMasterFailover:           false,
+		MasterFailoverLostInstancesDowntimeMinutes: 0,
+		MasterFailoverDetachSlaveMasterHost:        false,
+		FailMasterPromotionOnLagMinutes:            0,
+		FailMasterPromotionIfSQLThreadNotUpToDate:  false,
+		DelayMasterPromotionIfSQLThreadNotUpToDate: false,
+		PostponeSlaveRecoveryOnLagMinutes:          0,
+		OSCIgnoreHostnameFilters:                   []string{},
+		GraphiteAddr:                               "",
+		GraphitePath:                               "",
+		GraphiteConvertHostnameDotsToUnderscores:   true,
+		GraphitePollSeconds:                        60,
+		URLPrefix:                                  "",
+		DiscoveryIgnoreReplicaHostnameFilters:      []string{},
+		ConsulAddress:                              "",
+		ConsulScheme:                               "http",
+		ConsulAclToken:                             "",
+		ConsulCrossDataCenterDistribution:          false,
+		ZkAddress:                                  "",
+		KVClusterMasterPrefix:                      "mysql/master",
+		WebMessage:                                 "",
+		MaxConcurrentReplicaOperations:             5,
+	}
+}
+
+func (this *Configuration) postReadAdjustments() error {
+	if this.MySQLOrchestratorCredentialsConfigFile != "" {
+		mySQLConfig := struct {
+			Client struct {
+				User     string
+				Password string
+			}
+		}{}
+		err := gcfg.ReadFileInto(&mySQLConfig, this.MySQLOrchestratorCredentialsConfigFile)
+		if err != nil {
+			log.Fatalf("Failed to parse gcfg data from file: %+v", err)
+		} else {
+			log.Debugf("Parsed orchestrator credentials from %s", this.MySQLOrchestratorCredentialsConfigFile)
+			this.MySQLOrchestratorUser = mySQLConfig.Client.User
+			this.MySQLOrchestratorPassword = mySQLConfig.Client.Password
+		}
+	}
+	{
+		// We accept password in the form "${SOME_ENV_VARIABLE}" in which case we pull
+		// the given variable from os env
+		submatch := envVariableRegexp.FindStringSubmatch(this.MySQLOrchestratorPassword)
+		if len(submatch) > 1 {
+			this.MySQLOrchestratorPassword = os.Getenv(submatch[1])
+		}
+	}
+	if this.MySQLTopologyCredentialsConfigFile != "" {
+		mySQLConfig := struct {
+			Client struct {
+				User     string
+				Password string
+			}
+		}{}
+		err := gcfg.ReadFileInto(&mySQLConfig, this.MySQLTopologyCredentialsConfigFile)
+		if err != nil {
+			log.Fatalf("Failed to parse gcfg data from file: %+v", err)
+		} else {
+			log.Debugf("Parsed topology credentials from %s", this.MySQLTopologyCredentialsConfigFile)
+			this.MySQLTopologyUser = mySQLConfig.Client.User
+			this.MySQLTopologyPassword = mySQLConfig.Client.Password
+		}
+	}
+	{
+		// We accept password in the form "${SOME_ENV_VARIABLE}" in which case we pull
+		// the given variable from os env
+		submatch := envVariableRegexp.FindStringSubmatch(this.MySQLTopologyPassword)
+		if len(submatch) > 1 {
+			this.MySQLTopologyPassword = os.Getenv(submatch[1])
+		}
+	}
+
+	if this.RecoveryPeriodBlockSeconds == 0 && this.RecoveryPeriodBlockMinutes > 0 {
+		// RecoveryPeriodBlockSeconds is a newer addition that overrides RecoveryPeriodBlockMinutes
+		// The code does not consider RecoveryPeriodBlockMinutes anymore, but RecoveryPeriodBlockMinutes
+		// still supported in config file for backwards compatibility
+		this.RecoveryPeriodBlockSeconds = this.RecoveryPeriodBlockMinutes * 60
+	}
+
+	{
+		if this.ReplicationLagQuery != "" && this.SlaveLagQuery != "" && this.ReplicationLagQuery != this.SlaveLagQuery {
+			return fmt.Errorf("config's ReplicationLagQuery and SlaveLagQuery are synonyms and cannot both be defined")
+		}
+		// ReplicationLagQuery is the replacement param to SlaveLagQuery
+		if this.ReplicationLagQuery == "" {
+			this.ReplicationLagQuery = this.SlaveLagQuery
+		}
+		// We reset SlaveLagQuery because we want to support multiple config file loading;
+		// One of the next config files may indicate a new value for ReplicationLagQuery.
+		// If we do not reset SlaveLagQuery, then the two will have a conflict.
+		this.SlaveLagQuery = ""
+	}
+
+	{
+		if this.DetachLostSlavesAfterMasterFailover {
+			this.DetachLostReplicasAfterMasterFailover = true
+		}
+	}
+
+	{
+		if this.MasterFailoverDetachSlaveMasterHost {
+			this.MasterFailoverDetachReplicaMasterHost = true
+		}
+	}
+	if this.FailMasterPromotionIfSQLThreadNotUpToDate && this.DelayMasterPromotionIfSQLThreadNotUpToDate {
+		return fmt.Errorf("Cannot have both FailMasterPromotionIfSQLThreadNotUpToDate and DelayMasterPromotionIfSQLThreadNotUpToDate enabled")
+	}
+	if this.FailMasterPromotionOnLagMinutes > 0 && this.ReplicationLagQuery == "" {
+		return fmt.Errorf("nonzero FailMasterPromotionOnLagMinutes requires ReplicationLagQuery to be set")
+	}
+	{
+		if this.PostponeReplicaRecoveryOnLagMinutes != 0 && this.PostponeSlaveRecoveryOnLagMinutes != 0 &&
+			this.PostponeReplicaRecoveryOnLagMinutes != this.PostponeSlaveRecoveryOnLagMinutes {
+			return fmt.Errorf("config's PostponeReplicaRecoveryOnLagMinutes and PostponeSlaveRecoveryOnLagMinutes are synonyms and cannot both be defined")
+		}
+		if this.PostponeSlaveRecoveryOnLagMinutes != 0 {
+			this.PostponeReplicaRecoveryOnLagMinutes = this.PostponeSlaveRecoveryOnLagMinutes
+		}
+	}
+
+	if this.URLPrefix != "" {
+		// Ensure the prefix starts with "/" and has no trailing one.
+		this.URLPrefix = strings.TrimLeft(this.URLPrefix, "/")
+		this.URLPrefix = strings.TrimRight(this.URLPrefix, "/")
+		this.URLPrefix = "/" + this.URLPrefix
+	}
+
+	if this.IsSQLite() && this.SQLite3DataFile == "" {
+		return fmt.Errorf("SQLite3DataFile must be set when BackendDB is sqlite3")
+	}
+	if this.IsSQLite() {
+		//		this.HostnameResolveMethod = "none"
+	}
+	if this.RaftEnabled && this.RaftDataDir == "" {
+		return fmt.Errorf("RaftDataDir must be defined since raft is enabled (RaftEnabled)")
+	}
+	if this.RaftEnabled && this.RaftBind == "" {
+		return fmt.Errorf("RaftBind must be defined since raft is enabled (RaftEnabled)")
+	}
+	if this.RaftAdvertise == "" {
+		this.RaftAdvertise = this.RaftBind
+	}
+	if this.KVClusterMasterPrefix != "/" {
+		// "/" remains "/"
+		// "prefix" turns to "prefix/"
+		// "some/prefix///" turns to "some/prefix/"
+		this.KVClusterMasterPrefix = strings.TrimRight(this.KVClusterMasterPrefix, "/")
+		this.KVClusterMasterPrefix = fmt.Sprintf("%s/", this.KVClusterMasterPrefix)
+	}
+	if this.AutoPseudoGTID {
+		this.PseudoGTIDPattern = "drop view if exists `_pseudo_gtid_`"
+		this.PseudoGTIDPatternIsFixedSubstring = true
+		this.PseudoGTIDMonotonicHint = "asc:"
+		this.DetectPseudoGTIDQuery = SelectTrueQuery
+	}
+	if this.HTTPAdvertise != "" {
+		u, err := url.Parse(this.HTTPAdvertise)
+		if err != nil {
+			return fmt.Errorf("Failed parsing HTTPAdvertise %s: %s", this.HTTPAdvertise, err.Error())
+		}
+		if u.Scheme == "" {
+			return fmt.Errorf("If specified, HTTPAdvertise must include scheme (http:// or https://)")
+		}
+		if u.Hostname() == "" {
+			return fmt.Errorf("If specified, HTTPAdvertise must include host name")
+		}
+		if u.Port() == "" {
+			return fmt.Errorf("If specified, HTTPAdvertise must include port number")
+		}
+		if u.Path != "" {
+			return fmt.Errorf("If specified, HTTPAdvertise must not specify a path")
+		}
+		if this.InstanceWriteBufferSize <= 0 {
+			this.BufferInstanceWrites = false
+		}
+	}
+	return nil
+}
+
+func (this *Configuration) IsSQLite() bool {
+	return strings.Contains(this.BackendDB, "sqlite")
+}
+
+func (this *Configuration) IsMySQL() bool {
+	return this.BackendDB == "mysql" || this.BackendDB == ""
+}
+
+// read reads configuration from given file, or silently skips if the file does not exist.
+// If the file does exist, then it is expected to be in valid JSON format or the function bails out.
+func read(fileName string) (*Configuration, error) {
+	if fileName == "" {
+		return Config, fmt.Errorf("Empty file name")
+	}
+	file, err := os.Open(fileName)
+	if err != nil {
+		return Config, err
+	}
+	decoder := json.NewDecoder(file)
+	err = decoder.Decode(Config)
+	if err == nil {
+		log.Infof("Read config: %s", fileName)
+	} else {
+		log.Fatal("Cannot read config file:", fileName, err)
+	}
+	if err := Config.postReadAdjustments(); err != nil {
+		log.Fatale(err)
+	}
+	return Config, err
+}
+
+// Read reads configuration from zero, either, some or all given files, in order of input.
+// A file can override configuration provided in previous file.
+func Read(fileNames ...string) *Configuration {
+	for _, fileName := range fileNames {
+		read(fileName)
+	}
+	readFileNames = fileNames
+	return Config
+}
+
+// ForceRead reads configuration from given file name or bails out if it fails
+func ForceRead(fileName string) *Configuration {
+	_, err := read(fileName)
+	if err != nil {
+		log.Fatal("Cannot read config file:", fileName, err)
+	}
+	readFileNames = []string{fileName}
+	return Config
+}
+
+// Reload re-reads configuration from last used files
+func Reload(extraFileNames ...string) *Configuration {
+	for _, fileName := range readFileNames {
+		read(fileName)
+	}
+	for _, fileName := range extraFileNames {
+		read(fileName)
+	}
+	return Config
+}
+
+// MarkConfigurationLoaded is called once configuration has first been loaded.
+// Listeners on ConfigurationLoaded will get a notification
+func MarkConfigurationLoaded() {
+	go func() {
+		for {
+			configurationLoaded <- true
+		}
+	}()
+	// wait for it
+	<-configurationLoaded
+}
+
+// WaitForConfigurationToBeLoaded does just that. It will return after
+// the configuration file has been read off disk.
+func WaitForConfigurationToBeLoaded() {
+	<-configurationLoaded
+}
--- a/go/vt/orchestrator/config/config_test.go
+++ b/go/vt/orchestrator/config/config_test.go
@ -0,0 +1,217 @@
+package config
+
+import (
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+func init() {
+	Config.HostnameResolveMethod = "none"
+	log.SetLevel(log.ERROR)
+}
+
+func TestReplicationLagQuery(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.SlaveLagQuery = "select 3"
+		c.ReplicationLagQuery = "select 4"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.SlaveLagQuery = "select 3"
+		c.ReplicationLagQuery = "select 3"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.SlaveLagQuery = "select 3"
+		c.ReplicationLagQuery = ""
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.ReplicationLagQuery, "select 3")
+	}
+}
+
+func TestPostponeReplicaRecoveryOnLagMinutes(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.PostponeSlaveRecoveryOnLagMinutes = 3
+		c.PostponeReplicaRecoveryOnLagMinutes = 5
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.PostponeSlaveRecoveryOnLagMinutes = 3
+		c.PostponeReplicaRecoveryOnLagMinutes = 3
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.PostponeSlaveRecoveryOnLagMinutes = 3
+		c.PostponeReplicaRecoveryOnLagMinutes = 0
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.PostponeReplicaRecoveryOnLagMinutes, uint(3))
+	}
+}
+
+func TestMasterFailoverDetachReplicaMasterHost(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.MasterFailoverDetachSlaveMasterHost = false
+		c.MasterFailoverDetachReplicaMasterHost = false
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectFalse(c.MasterFailoverDetachReplicaMasterHost)
+	}
+	{
+		c := newConfiguration()
+		c.MasterFailoverDetachSlaveMasterHost = false
+		c.MasterFailoverDetachReplicaMasterHost = true
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectTrue(c.MasterFailoverDetachReplicaMasterHost)
+	}
+	{
+		c := newConfiguration()
+		c.MasterFailoverDetachSlaveMasterHost = true
+		c.MasterFailoverDetachReplicaMasterHost = false
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectTrue(c.MasterFailoverDetachReplicaMasterHost)
+	}
+}
+
+func TestMasterFailoverDetachDetachLostReplicasAfterMasterFailover(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.DetachLostSlavesAfterMasterFailover = false
+		c.DetachLostReplicasAfterMasterFailover = false
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectFalse(c.DetachLostReplicasAfterMasterFailover)
+	}
+	{
+		c := newConfiguration()
+		c.DetachLostSlavesAfterMasterFailover = false
+		c.DetachLostReplicasAfterMasterFailover = true
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectTrue(c.DetachLostReplicasAfterMasterFailover)
+	}
+	{
+		c := newConfiguration()
+		c.DetachLostSlavesAfterMasterFailover = true
+		c.DetachLostReplicasAfterMasterFailover = false
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectTrue(c.DetachLostReplicasAfterMasterFailover)
+	}
+}
+
+func TestRecoveryPeriodBlock(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.RecoveryPeriodBlockSeconds = 0
+		c.RecoveryPeriodBlockMinutes = 0
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.RecoveryPeriodBlockSeconds, 0)
+	}
+	{
+		c := newConfiguration()
+		c.RecoveryPeriodBlockSeconds = 30
+		c.RecoveryPeriodBlockMinutes = 1
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.RecoveryPeriodBlockSeconds, 30)
+	}
+	{
+		c := newConfiguration()
+		c.RecoveryPeriodBlockSeconds = 0
+		c.RecoveryPeriodBlockMinutes = 2
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.RecoveryPeriodBlockSeconds, 120)
+	}
+	{
+		c := newConfiguration()
+		c.RecoveryPeriodBlockSeconds = 15
+		c.RecoveryPeriodBlockMinutes = 0
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.RecoveryPeriodBlockSeconds, 15)
+	}
+}
+
+func TestRaft(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.RaftBind = "1.2.3.4:1008"
+		c.RaftDataDir = "/path/to/somewhere"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+		test.S(t).ExpectEquals(c.RaftAdvertise, c.RaftBind)
+	}
+	{
+		c := newConfiguration()
+		c.RaftEnabled = true
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.RaftEnabled = true
+		c.RaftDataDir = "/path/to/somewhere"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.RaftEnabled = true
+		c.RaftDataDir = "/path/to/somewhere"
+		c.RaftBind = ""
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+}
+
+func TestHttpAdvertise(t *testing.T) {
+	{
+		c := newConfiguration()
+		c.HTTPAdvertise = ""
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.HTTPAdvertise = "http://127.0.0.1:1234"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.HTTPAdvertise = "http://127.0.0.1"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.HTTPAdvertise = "127.0.0.1:1234"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+	{
+		c := newConfiguration()
+		c.HTTPAdvertise = "http://127.0.0.1:1234/mypath"
+		err := c.postReadAdjustments()
+		test.S(t).ExpectNotNil(err)
+	}
+}
--- a/go/vt/orchestrator/db/db.go
+++ b/go/vt/orchestrator/db/db.go
@ -0,0 +1,416 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package db
+
+import (
+	"database/sql"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+)
+
+var (
+	EmptyArgs []interface{}
+)
+
+var mysqlURI string
+var dbMutex sync.Mutex
+
+type DummySqlResult struct {
+}
+
+func (this DummySqlResult) LastInsertId() (int64, error) {
+	return 0, nil
+}
+
+func (this DummySqlResult) RowsAffected() (int64, error) {
+	return 1, nil
+}
+
+func getMySQLURI() string {
+	dbMutex.Lock()
+	defer dbMutex.Unlock()
+	if mysqlURI != "" {
+		return mysqlURI
+	}
+	mysqlURI := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?timeout=%ds&readTimeout=%ds&rejectReadOnly=%t&interpolateParams=true",
+		config.Config.MySQLOrchestratorUser,
+		config.Config.MySQLOrchestratorPassword,
+		config.Config.MySQLOrchestratorHost,
+		config.Config.MySQLOrchestratorPort,
+		config.Config.MySQLOrchestratorDatabase,
+		config.Config.MySQLConnectTimeoutSeconds,
+		config.Config.MySQLOrchestratorReadTimeoutSeconds,
+		config.Config.MySQLOrchestratorRejectReadOnly,
+	)
+	if config.Config.MySQLOrchestratorUseMutualTLS {
+		mysqlURI, _ = SetupMySQLOrchestratorTLS(mysqlURI)
+	}
+	return mysqlURI
+}
+
+// OpenDiscovery returns a DB instance to access a topology instance.
+// It has lower read timeout than OpenTopology and is intended to
+// be used with low-latency discovery queries.
+func OpenDiscovery(host string, port int) (*sql.DB, error) {
+	return openTopology(host, port, config.Config.MySQLDiscoveryReadTimeoutSeconds)
+}
+
+// OpenTopology returns a DB instance to access a topology instance.
+func OpenTopology(host string, port int) (*sql.DB, error) {
+	return openTopology(host, port, config.Config.MySQLTopologyReadTimeoutSeconds)
+}
+
+func openTopology(host string, port int, readTimeout int) (db *sql.DB, err error) {
+	mysql_uri := fmt.Sprintf("%s:%s@tcp(%s:%d)/?timeout=%ds&readTimeout=%ds&interpolateParams=true",
+		config.Config.MySQLTopologyUser,
+		config.Config.MySQLTopologyPassword,
+		host, port,
+		config.Config.MySQLConnectTimeoutSeconds,
+		readTimeout,
+	)
+
+	if config.Config.MySQLTopologyUseMutualTLS ||
+		(config.Config.MySQLTopologyUseMixedTLS && requiresTLS(host, port, mysql_uri)) {
+		if mysql_uri, err = SetupMySQLTopologyTLS(mysql_uri); err != nil {
+			return nil, err
+		}
+	}
+	if db, _, err = sqlutils.GetDB(mysql_uri); err != nil {
+		return nil, err
+	}
+	if config.Config.MySQLConnectionLifetimeSeconds > 0 {
+		db.SetConnMaxLifetime(time.Duration(config.Config.MySQLConnectionLifetimeSeconds) * time.Second)
+	}
+	db.SetMaxOpenConns(config.MySQLTopologyMaxPoolConnections)
+	db.SetMaxIdleConns(config.MySQLTopologyMaxPoolConnections)
+	return db, err
+}
+
+func openOrchestratorMySQLGeneric() (db *sql.DB, fromCache bool, err error) {
+	uri := fmt.Sprintf("%s:%s@tcp(%s:%d)/?timeout=%ds&readTimeout=%ds&interpolateParams=true",
+		config.Config.MySQLOrchestratorUser,
+		config.Config.MySQLOrchestratorPassword,
+		config.Config.MySQLOrchestratorHost,
+		config.Config.MySQLOrchestratorPort,
+		config.Config.MySQLConnectTimeoutSeconds,
+		config.Config.MySQLOrchestratorReadTimeoutSeconds,
+	)
+	if config.Config.MySQLOrchestratorUseMutualTLS {
+		uri, _ = SetupMySQLOrchestratorTLS(uri)
+	}
+	return sqlutils.GetDB(uri)
+}
+
+func IsSQLite() bool {
+	return config.Config.IsSQLite()
+}
+
+func isInMemorySQLite() bool {
+	return config.Config.IsSQLite() && strings.Contains(config.Config.SQLite3DataFile, ":memory:")
+}
+
+// OpenTopology returns the DB instance for the orchestrator backed database
+func OpenOrchestrator() (db *sql.DB, err error) {
+	var fromCache bool
+	if IsSQLite() {
+		db, fromCache, err = sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile)
+		if err == nil && !fromCache {
+			log.Debugf("Connected to orchestrator backend: sqlite on %v", config.Config.SQLite3DataFile)
+		}
+		db.SetMaxOpenConns(1)
+		db.SetMaxIdleConns(1)
+	} else {
+		if db, fromCache, err := openOrchestratorMySQLGeneric(); err != nil {
+			return db, log.Errore(err)
+		} else if !fromCache {
+			// first time ever we talk to MySQL
+			query := fmt.Sprintf("create database if not exists %s", config.Config.MySQLOrchestratorDatabase)
+			if _, err := db.Exec(query); err != nil {
+				return db, log.Errore(err)
+			}
+		}
+		db, fromCache, err = sqlutils.GetDB(getMySQLURI())
+		if err == nil && !fromCache {
+			// do not show the password but do show what we connect to.
+			safeMySQLURI := fmt.Sprintf("%s:?@tcp(%s:%d)/%s?timeout=%ds", config.Config.MySQLOrchestratorUser,
+				config.Config.MySQLOrchestratorHost, config.Config.MySQLOrchestratorPort, config.Config.MySQLOrchestratorDatabase, config.Config.MySQLConnectTimeoutSeconds)
+			log.Debugf("Connected to orchestrator backend: %v", safeMySQLURI)
+			if config.Config.MySQLOrchestratorMaxPoolConnections > 0 {
+				log.Debugf("Orchestrator pool SetMaxOpenConns: %d", config.Config.MySQLOrchestratorMaxPoolConnections)
+				db.SetMaxOpenConns(config.Config.MySQLOrchestratorMaxPoolConnections)
+			}
+			if config.Config.MySQLConnectionLifetimeSeconds > 0 {
+				db.SetConnMaxLifetime(time.Duration(config.Config.MySQLConnectionLifetimeSeconds) * time.Second)
+			}
+		}
+	}
+	if err == nil && !fromCache {
+		if !config.Config.SkipOrchestratorDatabaseUpdate {
+			initOrchestratorDB(db)
+		}
+		// A low value here will trigger reconnects which could
+		// make the number of backend connections hit the tcp
+		// limit. That's bad.  I could make this setting dynamic
+		// but then people need to know which value to use. For now
+		// allow up to 25% of MySQLOrchestratorMaxPoolConnections
+		// to be idle.  That should provide a good number which
+		// does not keep the maximum number of connections open but
+		// at the same time does not trigger disconnections and
+		// reconnections too frequently.
+		maxIdleConns := int(config.Config.MySQLOrchestratorMaxPoolConnections * 25 / 100)
+		if maxIdleConns < 10 {
+			maxIdleConns = 10
+		}
+		log.Infof("Connecting to backend %s:%d: maxConnections: %d, maxIdleConns: %d",
+			config.Config.MySQLOrchestratorHost,
+			config.Config.MySQLOrchestratorPort,
+			config.Config.MySQLOrchestratorMaxPoolConnections,
+			maxIdleConns)
+		db.SetMaxIdleConns(maxIdleConns)
+	}
+	return db, err
+}
+
+func translateStatement(statement string) (string, error) {
+	if IsSQLite() {
+		statement = sqlutils.ToSqlite3Dialect(statement)
+	}
+	return statement, nil
+}
+
+// versionIsDeployed checks if given version has already been deployed
+func versionIsDeployed(db *sql.DB) (result bool, err error) {
+	query := `
+		select
+			count(*) as is_deployed
+		from
+			orchestrator_db_deployments
+		where
+			deployed_version = ?
+		`
+	err = db.QueryRow(query, config.RuntimeCLIFlags.ConfiguredVersion).Scan(&result)
+	// err means the table 'orchestrator_db_deployments' does not even exist, in which case we proceed
+	// to deploy.
+	// If there's another error to this, like DB gone bad, then we're about to find out anyway.
+	return result, err
+}
+
+// registerOrchestratorDeployment updates the orchestrator_metadata table upon successful deployment
+func registerOrchestratorDeployment(db *sql.DB) error {
+	query := `
+    	replace into orchestrator_db_deployments (
+				deployed_version, deployed_timestamp
+			) values (
+				?, NOW()
+			)
+				`
+	if _, err := execInternal(db, query, config.RuntimeCLIFlags.ConfiguredVersion); err != nil {
+		log.Fatalf("Unable to write to orchestrator_metadata: %+v", err)
+	}
+	log.Debugf("Migrated database schema to version [%+v]", config.RuntimeCLIFlags.ConfiguredVersion)
+	return nil
+}
+
+// deployStatements will issue given sql queries that are not already known to be deployed.
+// This iterates both lists (to-run and already-deployed) and also verifies no contraditions.
+func deployStatements(db *sql.DB, queries []string) error {
+	tx, err := db.Begin()
+	if err != nil {
+		log.Fatale(err)
+	}
+	// Ugly workaround ahead.
+	// Origin of this workaround is the existence of some "timestamp NOT NULL," column definitions,
+	// where in NO_ZERO_IN_DATE,NO_ZERO_DATE sql_mode are invalid (since default is implicitly "0")
+	// This means installation of orchestrator fails on such configured servers, and in particular on 5.7
+	// where this setting is the dfault.
+	// For purpose of backwards compatability, what we do is force sql_mode to be more relaxed, create the schemas
+	// along with the "invalid" definition, and then go ahead and fix those definitions via following ALTER statements.
+	// My bad.
+	originalSqlMode := ""
+	if config.Config.IsMySQL() {
+		err = tx.QueryRow(`select @@session.sql_mode`).Scan(&originalSqlMode)
+		if _, err := tx.Exec(`set @@session.sql_mode=REPLACE(@@session.sql_mode, 'NO_ZERO_DATE', '')`); err != nil {
+			log.Fatale(err)
+		}
+		if _, err := tx.Exec(`set @@session.sql_mode=REPLACE(@@session.sql_mode, 'NO_ZERO_IN_DATE', '')`); err != nil {
+			log.Fatale(err)
+		}
+	}
+	for i, query := range queries {
+		if i == 0 {
+			//log.Debugf("sql_mode is: %+v", originalSqlMode)
+		}
+
+		query, err := translateStatement(query)
+		if err != nil {
+			return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query)
+		}
+		if _, err := tx.Exec(query); err != nil {
+			if strings.Contains(err.Error(), "syntax error") {
+				return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query)
+			}
+			if !sqlutils.IsAlterTable(query) && !sqlutils.IsCreateIndex(query) && !sqlutils.IsDropIndex(query) {
+				return log.Fatalf("Cannot initiate orchestrator: %+v; query=%+v", err, query)
+			}
+			if !strings.Contains(err.Error(), "duplicate column name") &&
+				!strings.Contains(err.Error(), "Duplicate column name") &&
+				!strings.Contains(err.Error(), "check that column/key exists") &&
+				!strings.Contains(err.Error(), "already exists") &&
+				!strings.Contains(err.Error(), "Duplicate key name") {
+				log.Errorf("Error initiating orchestrator: %+v; query=%+v", err, query)
+			}
+		}
+	}
+	if config.Config.IsMySQL() {
+		if _, err := tx.Exec(`set session sql_mode=?`, originalSqlMode); err != nil {
+			log.Fatale(err)
+		}
+	}
+	if err := tx.Commit(); err != nil {
+		log.Fatale(err)
+	}
+	return nil
+}
+
+// initOrchestratorDB attempts to create/upgrade the orchestrator backend database. It is created once in the
+// application's lifetime.
+func initOrchestratorDB(db *sql.DB) error {
+	log.Debug("Initializing orchestrator")
+
+	versionAlreadyDeployed, err := versionIsDeployed(db)
+	if versionAlreadyDeployed && config.RuntimeCLIFlags.ConfiguredVersion != "" && err == nil {
+		// Already deployed with this version
+		return nil
+	}
+	if config.Config.PanicIfDifferentDatabaseDeploy && config.RuntimeCLIFlags.ConfiguredVersion != "" && !versionAlreadyDeployed {
+		log.Fatalf("PanicIfDifferentDatabaseDeploy is set. Configured version %s is not the version found in the database", config.RuntimeCLIFlags.ConfiguredVersion)
+	}
+	log.Debugf("Migrating database schema")
+	deployStatements(db, generateSQLBase)
+	deployStatements(db, generateSQLPatches)
+	registerOrchestratorDeployment(db)
+
+	if IsSQLite() {
+		ExecOrchestrator(`PRAGMA journal_mode = WAL`)
+		ExecOrchestrator(`PRAGMA synchronous = NORMAL`)
+	}
+
+	return nil
+}
+
+// execInternal
+func execInternal(db *sql.DB, query string, args ...interface{}) (sql.Result, error) {
+	var err error
+	query, err = translateStatement(query)
+	if err != nil {
+		return nil, err
+	}
+	res, err := sqlutils.ExecNoPrepare(db, query, args...)
+	return res, err
+}
+
+// ExecOrchestrator will execute given query on the orchestrator backend database.
+func ExecOrchestrator(query string, args ...interface{}) (sql.Result, error) {
+	var err error
+	query, err = translateStatement(query)
+	if err != nil {
+		return nil, err
+	}
+	db, err := OpenOrchestrator()
+	if err != nil {
+		return nil, err
+	}
+	res, err := sqlutils.ExecNoPrepare(db, query, args...)
+	return res, err
+}
+
+// QueryRowsMapOrchestrator
+func QueryOrchestratorRowsMap(query string, on_row func(sqlutils.RowMap) error) error {
+	query, err := translateStatement(query)
+	if err != nil {
+		return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query)
+	}
+	db, err := OpenOrchestrator()
+	if err != nil {
+		return err
+	}
+
+	return sqlutils.QueryRowsMap(db, query, on_row)
+}
+
+// QueryOrchestrator
+func QueryOrchestrator(query string, argsArray []interface{}, on_row func(sqlutils.RowMap) error) error {
+	query, err := translateStatement(query)
+	if err != nil {
+		return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query)
+	}
+	db, err := OpenOrchestrator()
+	if err != nil {
+		return err
+	}
+
+	return log.Criticale(sqlutils.QueryRowsMap(db, query, on_row, argsArray...))
+}
+
+// QueryOrchestratorRowsMapBuffered
+func QueryOrchestratorRowsMapBuffered(query string, on_row func(sqlutils.RowMap) error) error {
+	query, err := translateStatement(query)
+	if err != nil {
+		return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query)
+	}
+	db, err := OpenOrchestrator()
+	if err != nil {
+		return err
+	}
+
+	return sqlutils.QueryRowsMapBuffered(db, query, on_row)
+}
+
+// QueryOrchestratorBuffered
+func QueryOrchestratorBuffered(query string, argsArray []interface{}, on_row func(sqlutils.RowMap) error) error {
+	query, err := translateStatement(query)
+	if err != nil {
+		return log.Fatalf("Cannot query orchestrator: %+v; query=%+v", err, query)
+	}
+	db, err := OpenOrchestrator()
+	if err != nil {
+		return err
+	}
+
+	if argsArray == nil {
+		argsArray = EmptyArgs
+	}
+	return log.Criticale(sqlutils.QueryRowsMapBuffered(db, query, on_row, argsArray...))
+}
+
+// ReadTimeNow reads and returns the current timestamp as string. This is an unfortunate workaround
+// to support both MySQL and SQLite in all possible timezones. SQLite only speaks UTC where MySQL has
+// timezone support. By reading the time as string we get the database's de-facto notion of the time,
+// which we can then feed back to it.
+func ReadTimeNow() (timeNow string, err error) {
+	err = QueryOrchestrator(`select now() as time_now`, nil, func(m sqlutils.RowMap) error {
+		timeNow = m.GetString("time_now")
+		return nil
+	})
+	return timeNow, err
+}
--- a/go/vt/orchestrator/db/generate_base.go
+++ b/go/vt/orchestrator/db/generate_base.go
@ -0,0 +1,854 @@
+/*
+   Copyright 2017 Shlomi Noach, GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package db
+
+// generateSQLBase & generateSQLPatches are lists of SQL statements required to build the orchestrator backend
+var generateSQLBase = []string{
+	`
+        CREATE TABLE IF NOT EXISTS database_instance (
+          hostname varchar(128) CHARACTER SET ascii NOT NULL,
+          port smallint(5) unsigned NOT NULL,
+          last_checked timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+          last_seen timestamp NULL DEFAULT NULL,
+          server_id int(10) unsigned NOT NULL,
+          version varchar(128) CHARACTER SET ascii NOT NULL,
+          binlog_format varchar(16) CHARACTER SET ascii NOT NULL,
+          log_bin tinyint(3) unsigned NOT NULL,
+          log_slave_updates tinyint(3) unsigned NOT NULL,
+          binary_log_file varchar(128) CHARACTER SET ascii NOT NULL,
+          binary_log_pos bigint(20) unsigned NOT NULL,
+          master_host varchar(128) CHARACTER SET ascii NOT NULL,
+          master_port smallint(5) unsigned NOT NULL,
+          slave_sql_running tinyint(3) unsigned NOT NULL,
+          slave_io_running tinyint(3) unsigned NOT NULL,
+          master_log_file varchar(128) CHARACTER SET ascii NOT NULL,
+          read_master_log_pos bigint(20) unsigned NOT NULL,
+          relay_master_log_file varchar(128) CHARACTER SET ascii NOT NULL,
+          exec_master_log_pos bigint(20) unsigned NOT NULL,
+          seconds_behind_master bigint(20) unsigned DEFAULT NULL,
+          slave_lag_seconds bigint(20) unsigned DEFAULT NULL,
+          num_slave_hosts int(10) unsigned NOT NULL,
+          slave_hosts text CHARACTER SET ascii NOT NULL,
+          cluster_name varchar(128) CHARACTER SET ascii NOT NULL,
+          PRIMARY KEY (hostname,port)
+        ) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX cluster_name_idx ON database_instance
+	`,
+	`
+				CREATE INDEX cluster_name_idx_database_instance ON database_instance(cluster_name)
+	`,
+	`
+				DROP INDEX last_checked_idx ON database_instance
+	`,
+	`
+				CREATE INDEX last_checked_idx_database_instance ON database_instance(last_checked)
+	`,
+	`
+				DROP INDEX last_seen_idx ON database_instance
+	`,
+	`
+				CREATE INDEX last_seen_idx_database_instance ON database_instance(last_seen)
+	`,
+	`
+        CREATE TABLE IF NOT EXISTS database_instance_maintenance (
+          database_instance_maintenance_id int(10) unsigned NOT NULL AUTO_INCREMENT,
+          hostname varchar(128) NOT NULL,
+          port smallint(5) unsigned NOT NULL,
+          maintenance_active tinyint(4) DEFAULT NULL,
+          begin_timestamp timestamp NULL DEFAULT NULL,
+          end_timestamp timestamp NULL DEFAULT NULL,
+          owner varchar(128) CHARACTER SET utf8 NOT NULL,
+          reason text CHARACTER SET utf8 NOT NULL,
+          PRIMARY KEY (database_instance_maintenance_id)
+        ) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX maintenance_uidx ON database_instance_maintenance
+	`,
+	`
+				CREATE UNIQUE INDEX maintenance_uidx_database_instance_maintenance ON database_instance_maintenance (maintenance_active, hostname, port)
+	`,
+	`
+        CREATE TABLE IF NOT EXISTS database_instance_long_running_queries (
+          hostname varchar(128) NOT NULL,
+          port smallint(5) unsigned NOT NULL,
+          process_id bigint(20) NOT NULL,
+          process_started_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+          process_user varchar(16) CHARACTER SET utf8 NOT NULL,
+          process_host varchar(128) CHARACTER SET utf8 NOT NULL,
+          process_db varchar(128) CHARACTER SET utf8 NOT NULL,
+          process_command varchar(16) CHARACTER SET utf8 NOT NULL,
+          process_time_seconds int(11) NOT NULL,
+          process_state varchar(128) CHARACTER SET utf8 NOT NULL,
+          process_info varchar(1024) CHARACTER SET utf8 NOT NULL,
+          PRIMARY KEY (hostname,port,process_id)
+        ) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX process_started_at_idx ON database_instance_long_running_queries
+	`,
+	`
+				CREATE INDEX process_started_at_idx_database_instance_long_running_queries ON database_instance_long_running_queries (process_started_at)
+	`,
+	`
+        CREATE TABLE IF NOT EXISTS audit (
+          audit_id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+          audit_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+          audit_type varchar(128) CHARACTER SET ascii NOT NULL,
+          hostname varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '',
+          port smallint(5) unsigned NOT NULL,
+          message text CHARACTER SET utf8 NOT NULL,
+          PRIMARY KEY (audit_id)
+        ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+	`,
+	`
+				DROP INDEX audit_timestamp_idx ON audit
+	`,
+	`
+				CREATE INDEX audit_timestamp_idx_audit ON audit (audit_timestamp)
+	`,
+	`
+				DROP INDEX host_port_idx ON audit
+	`,
+	`
+				CREATE INDEX host_port_idx_audit ON audit (hostname, port, audit_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS host_agent (
+		  hostname varchar(128) NOT NULL,
+		  port smallint(5) unsigned NOT NULL,
+		  token varchar(128) NOT NULL,
+		  last_submitted timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  last_checked timestamp NULL DEFAULT NULL,
+		  last_seen timestamp NULL DEFAULT NULL,
+		  mysql_port smallint(5) unsigned DEFAULT NULL,
+		  count_mysql_snapshots smallint(5) unsigned NOT NULL,
+		  PRIMARY KEY (hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX token_idx ON host_agent
+	`,
+	`
+				CREATE INDEX token_idx_host_agent ON host_agent (token)
+	`,
+	`
+				DROP INDEX last_submitted_idx ON host_agent
+	`,
+	`
+				CREATE INDEX last_submitted_idx_host_agent ON host_agent (last_submitted)
+	`,
+	`
+				DROP INDEX last_checked_idx ON host_agent
+	`,
+	`
+				CREATE INDEX last_checked_idx_host_agent ON host_agent (last_checked)
+	`,
+	`
+				DROP INDEX last_seen_idx ON host_agent
+	`,
+	`
+				CREATE INDEX last_seen_idx_host_agent ON host_agent (last_seen)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS agent_seed (
+		  agent_seed_id int(10) unsigned NOT NULL AUTO_INCREMENT,
+		  target_hostname varchar(128) NOT NULL,
+		  source_hostname varchar(128) NOT NULL,
+		  start_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  end_timestamp timestamp NOT NULL DEFAULT '1971-01-01 00:00:00',
+		  is_complete tinyint(3) unsigned NOT NULL DEFAULT '0',
+		  is_successful tinyint(3) unsigned NOT NULL DEFAULT '0',
+		  PRIMARY KEY (agent_seed_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX target_hostname_idx ON agent_seed
+	`,
+	`
+				CREATE INDEX target_hostname_idx_agent_seed ON agent_seed (target_hostname,is_complete)
+	`,
+	`
+				DROP INDEX source_hostname_idx ON agent_seed
+	`,
+	`
+				CREATE INDEX source_hostname_idx_agent_seed ON agent_seed (source_hostname,is_complete)
+	`,
+	`
+				DROP INDEX start_timestamp_idx ON agent_seed
+	`,
+	`
+				CREATE INDEX start_timestamp_idx_agent_seed ON agent_seed (start_timestamp)
+	`,
+	`
+				DROP INDEX is_complete_idx ON agent_seed
+	`,
+	`
+				CREATE INDEX is_complete_idx_agent_seed ON agent_seed (is_complete,start_timestamp)
+	`,
+	`
+				DROP INDEX is_successful_idx ON agent_seed
+	`,
+	`
+				CREATE INDEX is_successful_idx_agent_seed ON agent_seed (is_successful, start_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS agent_seed_state (
+		  agent_seed_state_id int(10) unsigned NOT NULL AUTO_INCREMENT,
+		  agent_seed_id int(10) unsigned NOT NULL,
+		  state_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  state_action varchar(127) NOT NULL,
+		  error_message varchar(255) NOT NULL,
+		  PRIMARY KEY (agent_seed_state_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+				DROP INDEX agent_seed_idx ON agent_seed_state
+	`,
+	`
+				CREATE INDEX agent_seed_idx_agent_seed_state ON agent_seed_state (agent_seed_id, state_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS host_attributes (
+		  hostname varchar(128) NOT NULL,
+		  attribute_name varchar(128) NOT NULL,
+		  attribute_value varchar(128) NOT NULL,
+		  submit_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  expire_timestamp timestamp NULL DEFAULT NULL,
+		  PRIMARY KEY (hostname,attribute_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX attribute_name_idx ON host_attributes
+	`,
+	`
+		CREATE INDEX attribute_name_idx_host_attributes ON host_attributes (attribute_name)
+	`,
+	`
+		DROP INDEX attribute_value_idx ON host_attributes
+	`,
+	`
+		CREATE INDEX attribute_value_idx_host_attributes ON host_attributes (attribute_value)
+	`,
+	`
+		DROP INDEX submit_timestamp_idx ON host_attributes
+	`,
+	`
+		CREATE INDEX submit_timestamp_idx_host_attributes ON host_attributes (submit_timestamp)
+	`,
+	`
+		DROP INDEX expire_timestamp_idx ON host_attributes
+	`,
+	`
+		CREATE INDEX expire_timestamp_idx_host_attributes ON host_attributes (expire_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS hostname_resolve (
+		  hostname varchar(128) NOT NULL,
+		  resolved_hostname varchar(128) NOT NULL,
+		  resolved_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  PRIMARY KEY (hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX resolved_timestamp_idx ON hostname_resolve
+	`,
+	`
+		CREATE INDEX resolved_timestamp_idx_hostname_resolve ON hostname_resolve (resolved_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS cluster_alias (
+		  cluster_name varchar(128) CHARACTER SET ascii NOT NULL,
+		  alias varchar(128) NOT NULL,
+		  PRIMARY KEY (cluster_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS active_node (
+		  anchor tinyint unsigned NOT NULL,
+		  hostname varchar(128) CHARACTER SET ascii NOT NULL,
+		  token varchar(128) NOT NULL,
+		  last_seen_active timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  PRIMARY KEY (anchor)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		INSERT IGNORE INTO active_node (anchor, hostname, token, last_seen_active)
+			VALUES (1, '', '', NOW())
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS node_health (
+		  hostname varchar(128) CHARACTER SET ascii NOT NULL,
+		  token varchar(128) NOT NULL,
+		  last_seen_active timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  PRIMARY KEY (hostname, token)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP VIEW IF EXISTS _whats_wrong
+	`,
+	`
+		DROP VIEW IF EXISTS whats_wrong
+	`,
+	`
+		DROP VIEW IF EXISTS whats_wrong_summary
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS topology_recovery (
+			recovery_id bigint unsigned not null auto_increment,
+			hostname varchar(128) NOT NULL,
+			port smallint unsigned NOT NULL,
+			in_active_period tinyint unsigned NOT NULL DEFAULT 0,
+			start_active_period timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			end_active_period_unixtime int unsigned,
+			end_recovery timestamp NULL DEFAULT NULL,
+			processing_node_hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			processcing_node_token varchar(128) NOT NULL,
+			successor_hostname varchar(128) DEFAULT NULL,
+			successor_port smallint unsigned DEFAULT NULL,
+			PRIMARY KEY (recovery_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX in_active_start_period_idx ON topology_recovery
+	`,
+	`
+		CREATE INDEX in_active_start_period_idx_topology_recovery ON topology_recovery (in_active_period, start_active_period)
+	`,
+	`
+		DROP INDEX start_active_period_idx ON topology_recovery
+	`,
+	`
+		CREATE INDEX start_active_period_idx_topology_recovery ON topology_recovery (start_active_period)
+	`,
+	`
+		DROP INDEX hostname_port_active_period_uidx ON topology_recovery
+	`,
+	`
+		CREATE UNIQUE INDEX hostname_port_active_period_uidx_topology_recovery ON topology_recovery (hostname, port, in_active_period, end_active_period_unixtime)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS hostname_unresolve (
+		  hostname varchar(128) NOT NULL,
+		  unresolved_hostname varchar(128) NOT NULL,
+		  PRIMARY KEY (hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX unresolved_hostname_idx ON hostname_unresolve
+	`,
+	`
+		CREATE INDEX unresolved_hostname_idx_hostname_unresolve ON hostname_unresolve (unresolved_hostname)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_pool (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			pool varchar(128) NOT NULL,
+			PRIMARY KEY (hostname, port, pool)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX pool_idx ON database_instance_pool
+	`,
+	`
+		CREATE INDEX pool_idx_database_instance_pool ON database_instance_pool (pool)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_topology_history (
+			snapshot_unix_timestamp INT UNSIGNED NOT NULL,
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			master_host varchar(128) CHARACTER SET ascii NOT NULL,
+			master_port smallint(5) unsigned NOT NULL,
+			cluster_name tinytext CHARACTER SET ascii NOT NULL,
+			PRIMARY KEY (snapshot_unix_timestamp, hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX cluster_name_idx ON database_instance_topology_history
+	`,
+	`
+		CREATE INDEX cluster_name_idx_database_instance_topology_history ON database_instance_topology_history (snapshot_unix_timestamp, cluster_name(128))
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS candidate_database_instance (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			last_suggested TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX last_suggested_idx ON candidate_database_instance
+	`,
+	`
+		CREATE INDEX last_suggested_idx_candidate_database_instance ON candidate_database_instance (last_suggested)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_downtime (
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			downtime_active tinyint(4) DEFAULT NULL,
+			begin_timestamp timestamp DEFAULT CURRENT_TIMESTAMP,
+			end_timestamp timestamp NULL DEFAULT NULL,
+			owner varchar(128) CHARACTER SET utf8 NOT NULL,
+			reason text CHARACTER SET utf8 NOT NULL,
+			PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS topology_failure_detection (
+			detection_id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
+			hostname varchar(128) NOT NULL,
+			port smallint unsigned NOT NULL,
+			in_active_period tinyint unsigned NOT NULL DEFAULT '0',
+			start_active_period timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			end_active_period_unixtime int unsigned NOT NULL,
+			processing_node_hostname varchar(128) NOT NULL,
+			processcing_node_token varchar(128) NOT NULL,
+			analysis varchar(128) NOT NULL,
+			cluster_name varchar(128) NOT NULL,
+			cluster_alias varchar(128) NOT NULL,
+			count_affected_slaves int unsigned NOT NULL,
+			slave_hosts text NOT NULL,
+			PRIMARY KEY (detection_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX hostname_port_active_period_uidx ON topology_failure_detection
+	`,
+	`
+		DROP INDEX in_active_start_period_idx ON topology_failure_detection
+	`,
+	`
+		CREATE INDEX in_active_start_period_idx_topology_failure_detection ON topology_failure_detection (in_active_period, start_active_period)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS hostname_resolve_history (
+			resolved_hostname varchar(128) NOT NULL,
+			hostname varchar(128) NOT NULL,
+			resolved_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (resolved_hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX hostname ON hostname_resolve_history
+	`,
+	`
+		CREATE INDEX hostname_idx_hostname_resolve_history ON hostname_resolve_history (hostname)
+	`,
+	`
+		DROP INDEX resolved_timestamp_idx ON hostname_resolve_history
+	`,
+	`
+		CREATE INDEX resolved_timestamp_idx_hostname_resolve_history ON hostname_resolve_history (resolved_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS hostname_unresolve_history (
+			unresolved_hostname varchar(128) NOT NULL,
+			hostname varchar(128) NOT NULL,
+			last_registered TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (unresolved_hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX hostname ON hostname_unresolve_history
+	`,
+	`
+		CREATE INDEX hostname_idx_hostname_unresolve_history ON hostname_unresolve_history (hostname)
+	`,
+	`
+		DROP INDEX last_registered_idx ON hostname_unresolve_history
+	`,
+	`
+		CREATE INDEX last_registered_idx_hostname_unresolve_history ON hostname_unresolve_history (last_registered)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS cluster_domain_name (
+			cluster_name varchar(128) CHARACTER SET ascii NOT NULL,
+			domain_name varchar(128) NOT NULL,
+			PRIMARY KEY (cluster_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX domain_name_idx ON cluster_domain_name
+	`,
+	`
+		CREATE INDEX domain_name_idx_cluster_domain_name ON cluster_domain_name (domain_name(32))
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS master_position_equivalence (
+			equivalence_id bigint unsigned not null auto_increment,
+			master1_hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			master1_port smallint(5) unsigned NOT NULL,
+			master1_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL,
+			master1_binary_log_pos bigint(20) unsigned NOT NULL,
+			master2_hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			master2_port smallint(5) unsigned NOT NULL,
+			master2_binary_log_file varchar(128) CHARACTER SET ascii NOT NULL,
+			master2_binary_log_pos bigint(20) unsigned NOT NULL,
+			last_suggested TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (equivalence_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX equivalence_uidx ON master_position_equivalence
+	`,
+	`
+		CREATE UNIQUE INDEX equivalence_uidx_master_position_equivalence ON master_position_equivalence (master1_hostname, master1_port, master1_binary_log_file, master1_binary_log_pos, master2_hostname, master2_port)
+	`,
+	`
+		DROP INDEX master2_idx ON master_position_equivalence
+	`,
+	`
+		CREATE INDEX master2_idx_master_position_equivalence ON master_position_equivalence (master2_hostname, master2_port, master2_binary_log_file, master2_binary_log_pos)
+	`,
+	`
+		DROP INDEX last_suggested_idx ON master_position_equivalence
+	`,
+	`
+		CREATE INDEX last_suggested_idx_master_position_equivalence ON master_position_equivalence (last_suggested)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS async_request (
+			request_id bigint unsigned NOT NULL AUTO_INCREMENT,
+			command varchar(128) charset ascii not null,
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			destination_hostname varchar(128) NOT NULL,
+			destination_port smallint(5) unsigned NOT NULL,
+			pattern text CHARACTER SET utf8 NOT NULL,
+			gtid_hint varchar(32) charset ascii not null,
+			begin_timestamp timestamp NULL DEFAULT NULL,
+			end_timestamp timestamp NULL DEFAULT NULL,
+			story text CHARACTER SET utf8 NOT NULL,
+			PRIMARY KEY (request_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX begin_timestamp_idx ON async_request
+	`,
+	`
+		CREATE INDEX begin_timestamp_idx_async_request ON async_request (begin_timestamp)
+	`,
+	`
+		DROP INDEX end_timestamp_idx ON async_request
+	`,
+	`
+		CREATE INDEX end_timestamp_idx_async_request ON async_request (end_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS blocked_topology_recovery (
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			cluster_name varchar(128) NOT NULL,
+			analysis varchar(128) NOT NULL,
+			last_blocked_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			blocking_recovery_id bigint unsigned,
+			PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX cluster_blocked_idx ON blocked_topology_recovery
+	`,
+	`
+		CREATE INDEX cluster_blocked_idx_blocked_topology_recovery ON blocked_topology_recovery (cluster_name, last_blocked_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_last_analysis (
+		  hostname varchar(128) NOT NULL,
+		  port smallint(5) unsigned NOT NULL,
+		  analysis_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  analysis varchar(128) NOT NULL,
+		  PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX analysis_timestamp_idx ON database_instance_last_analysis
+	`,
+	`
+		CREATE INDEX analysis_timestamp_idx_database_instance_last_analysis ON database_instance_last_analysis (analysis_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_analysis_changelog (
+			changelog_id bigint unsigned not null auto_increment,
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			analysis_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			analysis varchar(128) NOT NULL,
+			PRIMARY KEY (changelog_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX analysis_timestamp_idx ON database_instance_analysis_changelog
+	`,
+	`
+		CREATE INDEX analysis_timestamp_idx_database_instance_analysis_changelog ON database_instance_analysis_changelog (analysis_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS node_health_history (
+			history_id bigint unsigned not null auto_increment,
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			token varchar(128) NOT NULL,
+			first_seen_active timestamp NOT NULL,
+			extra_info varchar(128) CHARACTER SET utf8 NOT NULL,
+			PRIMARY KEY (history_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX first_seen_active_idx ON node_health_history
+	`,
+	`
+		CREATE INDEX first_seen_active_idx_node_health_history ON node_health_history (first_seen_active)
+	`,
+	`
+		DROP INDEX hostname_token_idx ON node_health_history
+	`,
+	`
+		CREATE UNIQUE INDEX hostname_token_idx_node_health_history ON node_health_history (hostname, token)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_coordinates_history (
+			history_id bigint unsigned not null auto_increment,
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			recorded_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			binary_log_file varchar(128) NOT NULL,
+			binary_log_pos bigint(20) unsigned NOT NULL,
+			relay_log_file varchar(128) NOT NULL,
+			relay_log_pos bigint(20) unsigned NOT NULL,
+			PRIMARY KEY (history_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX hostname_port_recorded_timestmp_idx ON database_instance_coordinates_history
+	`,
+	`
+		CREATE INDEX hostname_port_recorded_idx_database_instance_coordinates_history ON database_instance_coordinates_history (hostname, port, recorded_timestamp)
+	`,
+	`
+		DROP INDEX recorded_timestmp_idx ON database_instance_coordinates_history
+	`,
+	`
+		CREATE INDEX recorded_timestmp_idx_database_instance_coordinates_history ON database_instance_coordinates_history (recorded_timestamp)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_binlog_files_history (
+			history_id bigint unsigned not null auto_increment,
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			binary_log_file varchar(128) NOT NULL,
+			binary_log_pos bigint(20) unsigned NOT NULL,
+			first_seen timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			last_seen timestamp NOT NULL DEFAULT '1971-01-01 00:00:00',
+			PRIMARY KEY (history_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX hostname_port_file_idx ON database_instance_binlog_files_history
+	`,
+	`
+		CREATE UNIQUE INDEX hostname_port_file_idx_database_instance_binlog_files_history ON database_instance_binlog_files_history (hostname, port, binary_log_file)
+	`,
+	`
+		DROP INDEX last_seen_idx ON database_instance_binlog_files_history
+	`,
+	`
+		CREATE INDEX last_seen_idx_database_instance_binlog_files_history ON database_instance_binlog_files_history (last_seen)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS access_token (
+			access_token_id bigint unsigned not null auto_increment,
+			public_token varchar(128) NOT NULL,
+			secret_token varchar(128) NOT NULL,
+			generated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			generated_by varchar(128) CHARACTER SET utf8 NOT NULL,
+			is_acquired tinyint unsigned NOT NULL DEFAULT '0',
+			PRIMARY KEY (access_token_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX public_token_idx ON access_token
+	`,
+	`
+		CREATE UNIQUE INDEX public_token_uidx_access_token ON access_token (public_token)
+	`,
+	`
+		DROP INDEX generated_at_idx ON access_token
+	`,
+	`
+		CREATE INDEX generated_at_idx_access_token ON access_token (generated_at)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_recent_relaylog_history (
+			hostname varchar(128) NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			current_relay_log_file varchar(128) NOT NULL,
+			current_relay_log_pos bigint(20) unsigned NOT NULL,
+			current_seen timestamp NOT NULL DEFAULT '1971-01-01 00:00:00',
+			prev_relay_log_file varchar(128) NOT NULL,
+			prev_relay_log_pos bigint(20) unsigned NOT NULL,
+			prev_seen timestamp NOT NULL DEFAULT '1971-01-01 00:00:00',
+			PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		DROP INDEX current_seen_idx ON database_instance_recent_relaylog_history
+	`,
+	`
+		CREATE INDEX current_seen_idx_database_instance_recent_relaylog_history ON database_instance_recent_relaylog_history (current_seen)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS orchestrator_metadata (
+			anchor tinyint unsigned NOT NULL,
+			last_deployed_version varchar(128) CHARACTER SET ascii NOT NULL,
+			last_deployed_timestamp timestamp NOT NULL,
+			PRIMARY KEY (anchor)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS orchestrator_db_deployments (
+			deployed_version varchar(128) CHARACTER SET ascii NOT NULL,
+			deployed_timestamp timestamp NOT NULL,
+			PRIMARY KEY (deployed_version)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS global_recovery_disable (
+			disable_recovery tinyint unsigned NOT NULL COMMENT 'Insert 1 to disable recovery globally',
+			PRIMARY KEY (disable_recovery)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS cluster_alias_override (
+			cluster_name varchar(128) CHARACTER SET ascii NOT NULL,
+			alias varchar(128) NOT NULL,
+			PRIMARY KEY (cluster_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS topology_recovery_steps (
+			recovery_step_id bigint unsigned not null auto_increment,
+			recovery_uid varchar(128) CHARACTER SET ascii NOT NULL,
+			audit_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			message text CHARACTER SET utf8 NOT NULL,
+			PRIMARY KEY (recovery_step_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS raft_store (
+			store_id bigint unsigned not null auto_increment,
+			store_key varbinary(512) not null,
+			store_value blob not null,
+			PRIMARY KEY (store_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE INDEX store_key_idx_raft_store ON raft_store (store_key)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS raft_log (
+			log_index bigint unsigned not null auto_increment,
+			term bigint not null,
+			log_type int not null,
+			data blob not null,
+			PRIMARY KEY (log_index)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS raft_snapshot (
+			snapshot_id bigint unsigned not null auto_increment,
+			snapshot_name varchar(128) CHARACTER SET utf8 NOT NULL,
+			snapshot_meta varchar(4096) CHARACTER SET utf8 NOT NULL,
+			PRIMARY KEY (snapshot_id)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE UNIQUE INDEX snapshot_name_uidx_raft_snapshot ON raft_snapshot (snapshot_name)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_peer_analysis (
+			peer varchar(128) NOT NULL,
+		  hostname varchar(128) NOT NULL,
+		  port smallint(5) unsigned NOT NULL,
+		  analysis_timestamp timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+		  analysis varchar(128) NOT NULL,
+		  PRIMARY KEY (peer, hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_tls (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			required tinyint unsigned NOT NULL DEFAULT 0,
+			PRIMARY KEY (hostname,port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS kv_store (
+			store_key varchar(255) CHARACTER SET ascii NOT NULL,
+			store_value text CHARACTER SET utf8 not null,
+			last_updated timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (store_key)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS cluster_injected_pseudo_gtid (
+			cluster_name varchar(128) NOT NULL,
+			time_injected timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (cluster_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS hostname_ips (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			ipv4 varchar(128) CHARACTER SET ascii NOT NULL,
+			ipv6 varchar(128) CHARACTER SET ascii NOT NULL,
+			last_updated timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (hostname)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_tags (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			tag_name varchar(128) CHARACTER SET utf8 NOT NULL,
+			tag_value varchar(128) CHARACTER SET utf8 NOT NULL,
+			last_updated timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (hostname, port, tag_name)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE INDEX tag_name_idx_database_instance_tags ON database_instance_tags (tag_name)
+	`,
+	`
+		CREATE TABLE IF NOT EXISTS database_instance_stale_binlog_coordinates (
+			hostname varchar(128) CHARACTER SET ascii NOT NULL,
+			port smallint(5) unsigned NOT NULL,
+			binary_log_file varchar(128) NOT NULL,
+			binary_log_pos bigint(20) unsigned NOT NULL,
+			first_seen timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+			PRIMARY KEY (hostname, port)
+		) ENGINE=InnoDB DEFAULT CHARSET=ascii
+	`,
+	`
+		CREATE INDEX first_seen_idx_database_instance_stale_binlog_coordinates ON database_instance_stale_binlog_coordinates (first_seen)
+	`,
+}
--- a/go/vt/orchestrator/db/generate_patches.go
+++ b/go/vt/orchestrator/db/generate_patches.go
@ -0,0 +1,620 @@
+/*
+   Copyright 2017 Shlomi Noach, GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package db
+
+// generateSQLPatches contains DDLs for patching schema to the latest version.
+// Add new statements at the end of the list so they form a changelog.
+var generateSQLPatches = []string{
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN read_only TINYINT UNSIGNED NOT NULL AFTER version
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN last_sql_error TEXT NOT NULL AFTER exec_master_log_pos
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN last_io_error TEXT NOT NULL AFTER last_sql_error
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN oracle_gtid TINYINT UNSIGNED NOT NULL AFTER slave_io_running
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN mariadb_gtid TINYINT UNSIGNED NOT NULL AFTER oracle_gtid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN relay_log_file varchar(128) CHARACTER SET ascii NOT NULL AFTER exec_master_log_pos
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN relay_log_pos bigint unsigned NOT NULL AFTER relay_log_file
+	`,
+	`
+		DROP INDEX master_host_port_idx ON database_instance
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD INDEX master_host_port_idx_database_instance (master_host, master_port)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN pseudo_gtid TINYINT UNSIGNED NOT NULL AFTER mariadb_gtid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN replication_depth TINYINT UNSIGNED NOT NULL AFTER cluster_name
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN has_replication_filters TINYINT UNSIGNED NOT NULL AFTER slave_io_running
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN data_center varchar(32) CHARACTER SET ascii NOT NULL AFTER cluster_name
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN physical_environment varchar(32) CHARACTER SET ascii NOT NULL AFTER data_center
+	`,
+	`
+		ALTER TABLE
+			database_instance_maintenance
+			ADD KEY active_timestamp_idx (maintenance_active, begin_timestamp)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN uptime INT UNSIGNED NOT NULL AFTER last_seen
+	`,
+	`
+		ALTER TABLE
+			cluster_alias
+			ADD UNIQUE KEY alias_uidx (alias)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN is_co_master TINYINT UNSIGNED NOT NULL AFTER replication_depth
+	`,
+	`
+		ALTER TABLE
+			database_instance_maintenance
+			ADD KEY active_end_timestamp_idx (maintenance_active, end_timestamp)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN sql_delay INT UNSIGNED NOT NULL AFTER slave_lag_seconds
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN analysis              varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN cluster_name          varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN cluster_alias         varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN count_affected_slaves int unsigned NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN slave_hosts text CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE hostname_unresolve
+			ADD COLUMN last_registered TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE hostname_unresolve
+			ADD KEY last_registered_idx (last_registered)
+	`,
+	`
+		ALTER TABLE topology_recovery
+			ADD KEY cluster_name_in_active_idx (cluster_name, in_active_period)
+	`,
+	`
+		ALTER TABLE topology_recovery
+			ADD KEY end_recovery_idx (end_recovery)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN binlog_server TINYINT UNSIGNED NOT NULL AFTER version
+	`,
+	`
+		ALTER TABLE cluster_domain_name
+			ADD COLUMN last_registered TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE cluster_domain_name
+			ADD KEY last_registered_idx (last_registered)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN supports_oracle_gtid TINYINT UNSIGNED NOT NULL AFTER oracle_gtid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN executed_gtid_set text CHARACTER SET ascii NOT NULL AFTER oracle_gtid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN server_uuid varchar(64) CHARACTER SET ascii NOT NULL AFTER server_id
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN suggested_cluster_alias varchar(128) CHARACTER SET ascii NOT NULL AFTER cluster_name
+	`,
+	`
+		ALTER TABLE cluster_alias
+			ADD COLUMN last_registered TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE cluster_alias
+			ADD KEY last_registered_idx (last_registered)
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN is_successful TINYINT UNSIGNED NOT NULL DEFAULT 0 AFTER processcing_node_token
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN acknowledged TINYINT UNSIGNED NOT NULL DEFAULT 0
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN acknowledged_by varchar(128) CHARACTER SET utf8 NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN acknowledge_comment text CHARACTER SET utf8 NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN participating_instances text CHARACTER SET ascii NOT NULL after slave_hosts
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN lost_slaves text CHARACTER SET ascii NOT NULL after participating_instances
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN all_errors text CHARACTER SET ascii NOT NULL after lost_slaves
+	`,
+	`
+		ALTER TABLE audit
+			ADD COLUMN cluster_name varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER port
+	`,
+	`
+		ALTER TABLE candidate_database_instance
+			ADD COLUMN priority TINYINT SIGNED NOT NULL DEFAULT 1 comment 'positive promote, nagative unpromotes'
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN acknowledged_at TIMESTAMP NULL after acknowledged
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD KEY acknowledged_idx (acknowledged, acknowledged_at)
+	`,
+	`
+		ALTER TABLE
+			blocked_topology_recovery
+			ADD KEY last_blocked_idx (last_blocked_timestamp)
+	`,
+	`
+		ALTER TABLE candidate_database_instance
+			ADD COLUMN promotion_rule enum('must', 'prefer', 'neutral', 'prefer_not', 'must_not') NOT NULL DEFAULT 'neutral'
+	`,
+	`
+		ALTER TABLE node_health /* sqlite3-skip */
+			DROP PRIMARY KEY,
+			ADD PRIMARY KEY (hostname, token)
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN extra_info varchar(128) CHARACTER SET utf8 NOT NULL
+	`,
+	`
+		ALTER TABLE agent_seed /* sqlite3-skip */
+			MODIFY end_timestamp timestamp NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE active_node /* sqlite3-skip */
+			MODIFY last_seen_active timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+
+	`
+		ALTER TABLE node_health /* sqlite3-skip */
+			MODIFY last_seen_active timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE candidate_database_instance /* sqlite3-skip */
+			MODIFY last_suggested timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE master_position_equivalence /* sqlite3-skip */
+			MODIFY last_suggested timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN last_attempted_check TIMESTAMP NOT NULL DEFAULT '1971-01-01 00:00:00' AFTER last_checked
+	`,
+	`
+		ALTER TABLE
+			database_instance /* sqlite3-skip */
+			MODIFY last_attempted_check TIMESTAMP NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE
+			database_instance_analysis_changelog
+			ADD KEY instance_timestamp_idx (hostname, port, analysis_timestamp)
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN last_detection_id bigint unsigned NOT NULL
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD KEY last_detection_idx (last_detection_id)
+	`,
+	`
+		ALTER TABLE node_health_history
+			ADD COLUMN command varchar(128) CHARACTER SET utf8 NOT NULL
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN command varchar(128) CHARACTER SET utf8 NOT NULL
+	`,
+	`
+		ALTER TABLE database_instance_topology_history
+			ADD COLUMN version varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN gtid_purged text CHARACTER SET ascii NOT NULL AFTER executed_gtid_set
+	`,
+	`
+		ALTER TABLE
+			database_instance_coordinates_history
+			ADD COLUMN last_seen timestamp NOT NULL DEFAULT '1971-01-01 00:00:00' AFTER recorded_timestamp
+	`,
+	`
+		ALTER TABLE
+			access_token
+			ADD COLUMN is_reentrant TINYINT UNSIGNED NOT NULL default 0
+	`,
+	`
+		ALTER TABLE
+			access_token
+			ADD COLUMN acquired_at timestamp NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE
+			database_instance_pool
+			ADD COLUMN registered_at timestamp NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN replication_credentials_available TINYINT UNSIGNED NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN has_replication_credentials TINYINT UNSIGNED NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN allow_tls TINYINT UNSIGNED NOT NULL AFTER sql_delay
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_enforced TINYINT UNSIGNED NOT NULL AFTER physical_environment
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN instance_alias varchar(128) CHARACTER SET ascii NOT NULL AFTER physical_environment
+	`,
+	`
+		ALTER TABLE
+			topology_recovery
+			ADD COLUMN successor_alias varchar(128) DEFAULT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance /* sqlite3-skip */
+			MODIFY cluster_name varchar(128) NOT NULL
+	`,
+	`
+		ALTER TABLE
+			node_health
+			ADD INDEX last_seen_active_idx (last_seen_active)
+	`,
+	`
+		ALTER TABLE
+			database_instance_maintenance
+			ADD COLUMN processing_node_hostname varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance_maintenance
+			ADD COLUMN processing_node_token varchar(128) NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance_maintenance
+			ADD COLUMN explicitly_bounded TINYINT UNSIGNED NOT NULL
+	`,
+	`
+		ALTER TABLE node_health_history
+			ADD COLUMN app_version varchar(64) CHARACTER SET ascii NOT NULL DEFAULT ""
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN app_version varchar(64) CHARACTER SET ascii NOT NULL DEFAULT ""
+	`,
+	`
+		ALTER TABLE node_health_history /* sqlite3-skip */
+			MODIFY app_version varchar(64) CHARACTER SET ascii NOT NULL DEFAULT ""
+	`,
+	`
+		ALTER TABLE node_health /* sqlite3-skip */
+			MODIFY app_version varchar(64) CHARACTER SET ascii NOT NULL DEFAULT ""
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN version_comment varchar(128) NOT NULL DEFAULT ''
+	`,
+	`
+		ALTER TABLE active_node
+			ADD COLUMN first_seen_active timestamp NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN first_seen_active timestamp NOT NULL DEFAULT '1971-01-01 00:00:00'
+	`,
+	`
+		ALTER TABLE database_instance
+			ADD COLUMN major_version varchar(16) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN binlog_row_image varchar(16) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE topology_recovery
+			ADD COLUMN uid varchar(128) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		CREATE INDEX uid_idx_topology_recovery ON topology_recovery(uid)
+	`,
+	`
+		CREATE INDEX recovery_uid_idx_topology_recovery_steps ON topology_recovery_steps(recovery_uid)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN last_discovery_latency bigint not null
+	`,
+	`
+		CREATE INDEX end_timestamp_idx_database_instance_downtime ON database_instance_downtime(end_timestamp)
+	`,
+	`
+		CREATE INDEX suggested_cluster_alias_idx_database_instance ON database_instance(suggested_cluster_alias)
+	`,
+	`
+		ALTER TABLE
+			topology_failure_detection
+			ADD COLUMN is_actionable tinyint not null default 0
+	`,
+	`
+		DROP INDEX hostname_port_active_period_uidx_topology_failure_detection ON topology_failure_detection
+	`,
+	`
+		CREATE UNIQUE INDEX host_port_active_recoverable_uidx_topology_failure_detection ON topology_failure_detection (hostname, port, in_active_period, end_active_period_unixtime, is_actionable)
+	`,
+	`
+		ALTER TABLE raft_snapshot
+			ADD COLUMN created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN db_backend varchar(255) CHARACTER SET ascii NOT NULL DEFAULT ""
+	`,
+	`
+		ALTER TABLE node_health
+			ADD COLUMN incrementing_indicator bigint not null default 0
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_master_enabled TINYINT UNSIGNED NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_replica_enabled TINYINT UNSIGNED NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN gtid_mode varchar(32) CHARACTER SET ascii NOT NULL
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN last_check_partial_success tinyint unsigned NOT NULL after last_attempted_check
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN master_uuid varchar(64) CHARACTER SET ascii NOT NULL AFTER oracle_gtid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN gtid_errant text CHARACTER SET ascii NOT NULL AFTER gtid_purged
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN ancestry_uuid text CHARACTER SET ascii NOT NULL AFTER master_uuid
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN replication_sql_thread_state tinyint signed not null default 0 AFTER slave_io_running
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN replication_io_thread_state tinyint signed not null default 0 AFTER replication_sql_thread_state
+	`,
+	`
+		ALTER TABLE
+		database_instance_tags /* sqlite3-skip */
+		DROP PRIMARY KEY,
+		ADD PRIMARY KEY (hostname, port, tag_name)
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN region varchar(32) CHARACTER SET ascii NOT NULL AFTER data_center
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_master_timeout INT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_master_enabled
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_master_wait_for_slave_count INT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_master_timeout
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_master_status TINYINT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_master_wait_for_slave_count
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_replica_status TINYINT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_master_status
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_master_clients INT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_master_status
+	`,
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN semi_sync_available TINYINT UNSIGNED NOT NULL DEFAULT 0 AFTER semi_sync_enforced
+	`,
+	`
+		ALTER TABLE /* sqlite3-skip */
+			database_instance
+			MODIFY semi_sync_master_timeout BIGINT UNSIGNED NOT NULL DEFAULT 0
+  `,
+	// Fields related to Replication Group the instance belongs to
+	`
+		ALTER TABLE
+			database_instance
+			ADD COLUMN replication_group_name VARCHAR(64) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER gtid_mode
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_is_single_primary_mode TINYINT UNSIGNED NOT NULL DEFAULT 1 AFTER replication_group_name
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_member_state VARCHAR(16) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER replication_group_is_single_primary_mode
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_member_role VARCHAR(16) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER replication_group_member_state
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_members text CHARACTER SET ascii NOT NULL AFTER replication_group_member_role
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_primary_host varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '' AFTER replication_group_members
+	`,
+	`
+		ALTER TABLE
+		database_instance
+			ADD COLUMN replication_group_primary_port smallint(5) unsigned NOT NULL DEFAULT 0 AFTER replication_group_primary_host
+	`,
+}
--- a/go/vt/orchestrator/db/tls.go
+++ b/go/vt/orchestrator/db/tls.go
@ -0,0 +1,146 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package db
+
+import (
+	"crypto/tls"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/go-sql-driver/mysql"
+	"github.com/patrickmn/go-cache"
+	"github.com/rcrowley/go-metrics"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/ssl"
+)
+
+const Error3159 = "Error 3159:"
+const Error1045 = "Access denied for user"
+
+// Track if a TLS has already been configured for topology
+var topologyTLSConfigured bool = false
+
+// Track if a TLS has already been configured for Orchestrator
+var orchestratorTLSConfigured bool = false
+
+var requireTLSCache *cache.Cache = cache.New(time.Duration(config.Config.TLSCacheTTLFactor*config.Config.InstancePollSeconds)*time.Second, time.Second)
+
+var readInstanceTLSCounter = metrics.NewCounter()
+var writeInstanceTLSCounter = metrics.NewCounter()
+var readInstanceTLSCacheCounter = metrics.NewCounter()
+var writeInstanceTLSCacheCounter = metrics.NewCounter()
+
+func init() {
+	metrics.Register("instance_tls.read", readInstanceTLSCounter)
+	metrics.Register("instance_tls.write", writeInstanceTLSCounter)
+	metrics.Register("instance_tls.read_cache", readInstanceTLSCacheCounter)
+	metrics.Register("instance_tls.write_cache", writeInstanceTLSCacheCounter)
+}
+
+func requiresTLS(host string, port int, mysql_uri string) bool {
+	cacheKey := fmt.Sprintf("%s:%d", host, port)
+
+	if value, found := requireTLSCache.Get(cacheKey); found {
+		readInstanceTLSCacheCounter.Inc(1)
+		return value.(bool)
+	}
+
+	required := false
+	db, _, _ := sqlutils.GetDB(mysql_uri)
+	if err := db.Ping(); err != nil && (strings.Contains(err.Error(), Error3159) || strings.Contains(err.Error(), Error1045)) {
+		required = true
+	}
+
+	query := `
+			insert into
+				database_instance_tls (
+					hostname, port, required
+				) values (
+					?, ?, ?
+				)
+				on duplicate key update
+					required=values(required)
+				`
+	if _, err := ExecOrchestrator(query, host, port, required); err != nil {
+		log.Errore(err)
+	}
+	writeInstanceTLSCounter.Inc(1)
+
+	requireTLSCache.Set(cacheKey, required, cache.DefaultExpiration)
+	writeInstanceTLSCacheCounter.Inc(1)
+
+	return required
+}
+
+// Create a TLS configuration from the config supplied CA, Certificate, and Private key.
+// Register the TLS config with the mysql drivers as the "topology" config
+// Modify the supplied URI to call the TLS config
+func SetupMySQLTopologyTLS(uri string) (string, error) {
+	if !topologyTLSConfigured {
+		tlsConfig, err := ssl.NewTLSConfig(config.Config.MySQLTopologySSLCAFile, !config.Config.MySQLTopologySSLSkipVerify)
+		// Drop to TLS 1.0 for talking to MySQL
+		tlsConfig.MinVersion = tls.VersionTLS10
+		if err != nil {
+			return "", log.Errorf("Can't create TLS configuration for Topology connection %s: %s", uri, err)
+		}
+		tlsConfig.InsecureSkipVerify = config.Config.MySQLTopologySSLSkipVerify
+
+		if (config.Config.MySQLTopologyUseMutualTLS && !config.Config.MySQLTopologySSLSkipVerify) &&
+			config.Config.MySQLTopologySSLCertFile != "" &&
+			config.Config.MySQLTopologySSLPrivateKeyFile != "" {
+			if err = ssl.AppendKeyPair(tlsConfig, config.Config.MySQLTopologySSLCertFile, config.Config.MySQLTopologySSLPrivateKeyFile); err != nil {
+				return "", log.Errorf("Can't setup TLS key pairs for %s: %s", uri, err)
+			}
+		}
+		if err = mysql.RegisterTLSConfig("topology", tlsConfig); err != nil {
+			return "", log.Errorf("Can't register mysql TLS config for topology: %s", err)
+		}
+		topologyTLSConfigured = true
+	}
+	return fmt.Sprintf("%s&tls=topology", uri), nil
+}
+
+// Create a TLS configuration from the config supplied CA, Certificate, and Private key.
+// Register the TLS config with the mysql drivers as the "orchestrator" config
+// Modify the supplied URI to call the TLS config
+func SetupMySQLOrchestratorTLS(uri string) (string, error) {
+	if !orchestratorTLSConfigured {
+		tlsConfig, err := ssl.NewTLSConfig(config.Config.MySQLOrchestratorSSLCAFile, !config.Config.MySQLOrchestratorSSLSkipVerify)
+		// Drop to TLS 1.0 for talking to MySQL
+		tlsConfig.MinVersion = tls.VersionTLS10
+		if err != nil {
+			return "", log.Fatalf("Can't create TLS configuration for Orchestrator connection %s: %s", uri, err)
+		}
+		tlsConfig.InsecureSkipVerify = config.Config.MySQLOrchestratorSSLSkipVerify
+		if (!config.Config.MySQLOrchestratorSSLSkipVerify) &&
+			config.Config.MySQLOrchestratorSSLCertFile != "" &&
+			config.Config.MySQLOrchestratorSSLPrivateKeyFile != "" {
+			if err = ssl.AppendKeyPair(tlsConfig, config.Config.MySQLOrchestratorSSLCertFile, config.Config.MySQLOrchestratorSSLPrivateKeyFile); err != nil {
+				return "", log.Fatalf("Can't setup TLS key pairs for %s: %s", uri, err)
+			}
+		}
+		if err = mysql.RegisterTLSConfig("orchestrator", tlsConfig); err != nil {
+			return "", log.Fatalf("Can't register mysql TLS config for orchestrator: %s", err)
+		}
+		orchestratorTLSConfigured = true
+	}
+	return fmt.Sprintf("%s&tls=orchestrator", uri), nil
+}
--- a/go/vt/orchestrator/discovery/aggregated.go
+++ b/go/vt/orchestrator/discovery/aggregated.go
@ -0,0 +1,200 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package discovery
+
+import (
+	"time"
+
+	"github.com/montanaflynn/stats"
+
+	"vitess.io/vitess/go/vt/orchestrator/collection"
+)
+
+// AggregatedDiscoveryMetrics contains aggregated metrics for instance discovery.
+// Called from api/discovery-metrics-aggregated/:seconds
+type AggregatedDiscoveryMetrics struct {
+	FirstSeen                       time.Time // timestamp of the first data seen
+	LastSeen                        time.Time // timestamp of the last data seen
+	CountDistinctInstanceKeys       int       // number of distinct Instances seen (note: this may not be true: distinct = succeeded + failed)
+	CountDistinctOkInstanceKeys     int       // number of distinct Instances which succeeded
+	CountDistinctFailedInstanceKeys int       // number of distinct Instances which failed
+	FailedDiscoveries               uint64    // number of failed discoveries
+	SuccessfulDiscoveries           uint64    // number of successful discoveries
+	MeanTotalSeconds                float64
+	MeanBackendSeconds              float64
+	MeanInstanceSeconds             float64
+	FailedMeanTotalSeconds          float64
+	FailedMeanBackendSeconds        float64
+	FailedMeanInstanceSeconds       float64
+	MaxTotalSeconds                 float64
+	MaxBackendSeconds               float64
+	MaxInstanceSeconds              float64
+	FailedMaxTotalSeconds           float64
+	FailedMaxBackendSeconds         float64
+	FailedMaxInstanceSeconds        float64
+	MedianTotalSeconds              float64
+	MedianBackendSeconds            float64
+	MedianInstanceSeconds           float64
+	FailedMedianTotalSeconds        float64
+	FailedMedianBackendSeconds      float64
+	FailedMedianInstanceSeconds     float64
+	P95TotalSeconds                 float64
+	P95BackendSeconds               float64
+	P95InstanceSeconds              float64
+	FailedP95TotalSeconds           float64
+	FailedP95BackendSeconds         float64
+	FailedP95InstanceSeconds        float64
+}
+
+// aggregate returns the aggregate values of the given metrics (assumed to be Metric)
+func aggregate(results []collection.Metric) AggregatedDiscoveryMetrics {
+	if len(results) == 0 {
+		return AggregatedDiscoveryMetrics{}
+	}
+
+	var (
+		first time.Time
+		last  time.Time
+	)
+
+	type counterKey string
+	type hostKey string
+	type timerKey string
+	const (
+		FailedDiscoveries     counterKey = "FailedDiscoveries"
+		Discoveries                      = "Discoveries"
+		InstanceKeys          hostKey    = "InstanceKeys"
+		OkInstanceKeys                   = "OkInstanceKeys"
+		FailedInstanceKeys               = "FailedInstanceKeys"
+		TotalSeconds          timerKey   = "TotalSeconds"
+		BackendSeconds                   = "BackendSeconds"
+		InstanceSeconds                  = "InstanceSeconds"
+		FailedTotalSeconds               = "FailedTotalSeconds"
+		FailedBackendSeconds             = "FailedBackendSeconds"
+		FailedInstanceSeconds            = "FailedInstanceSeconds"
+	)
+
+	counters := make(map[counterKey]uint64)           // map of string based counters
+	names := make(map[hostKey](map[string]int))       // map of string based names (using a map)
+	timings := make(map[timerKey](stats.Float64Data)) // map of string based float64 values
+
+	// initialise counters
+	for _, v := range []counterKey{FailedDiscoveries, Discoveries} {
+		counters[v] = 0
+	}
+	// initialise names
+	for _, v := range []hostKey{InstanceKeys, FailedInstanceKeys, OkInstanceKeys} {
+		names[v] = make(map[string]int)
+	}
+	// initialise timers
+	for _, v := range []timerKey{TotalSeconds, BackendSeconds, InstanceSeconds, FailedTotalSeconds, FailedBackendSeconds, FailedInstanceSeconds} {
+		timings[v] = nil
+	}
+
+	// iterate over results storing required values
+	for _, v2 := range results {
+		v := v2.(*Metric) // convert to the right type
+
+		// first and last
+		if first.IsZero() || first.After(v.Timestamp) {
+			first = v.Timestamp
+		}
+		if last.Before(v.Timestamp) {
+			last = v.Timestamp
+		}
+
+		// different names
+		x := names[InstanceKeys]
+		x[v.InstanceKey.String()] = 1 // Value doesn't matter
+		names[InstanceKeys] = x
+
+		if v.Err == nil {
+			// ok names
+			x := names[OkInstanceKeys]
+			x[v.InstanceKey.String()] = 1 // Value doesn't matter
+			names[OkInstanceKeys] = x
+		} else {
+			// failed names
+			x := names[FailedInstanceKeys]
+			x[v.InstanceKey.String()] = 1 // Value doesn't matter
+			names[FailedInstanceKeys] = x
+		}
+
+		// discoveries
+		counters[Discoveries]++
+		if v.Err != nil {
+			counters[FailedDiscoveries]++
+		}
+
+		// All timings
+		timings[TotalSeconds] = append(timings[TotalSeconds], v.TotalLatency.Seconds())
+		timings[BackendSeconds] = append(timings[BackendSeconds], v.BackendLatency.Seconds())
+		timings[InstanceSeconds] = append(timings[InstanceSeconds], v.InstanceLatency.Seconds())
+
+		// Failed timings
+		if v.Err != nil {
+			timings[FailedTotalSeconds] = append(timings[FailedTotalSeconds], v.TotalLatency.Seconds())
+			timings[FailedBackendSeconds] = append(timings[FailedBackendSeconds], v.BackendLatency.Seconds())
+			timings[FailedInstanceSeconds] = append(timings[FailedInstanceSeconds], v.InstanceLatency.Seconds())
+		}
+	}
+
+	return AggregatedDiscoveryMetrics{
+		FirstSeen:                       first,
+		LastSeen:                        last,
+		CountDistinctInstanceKeys:       len(names[InstanceKeys]),
+		CountDistinctOkInstanceKeys:     len(names[OkInstanceKeys]),
+		CountDistinctFailedInstanceKeys: len(names[FailedInstanceKeys]),
+		FailedDiscoveries:               counters[FailedDiscoveries],
+		SuccessfulDiscoveries:           counters[Discoveries],
+		MeanTotalSeconds:                mean(timings[TotalSeconds]),
+		MeanBackendSeconds:              mean(timings[BackendSeconds]),
+		MeanInstanceSeconds:             mean(timings[InstanceSeconds]),
+		FailedMeanTotalSeconds:          mean(timings[FailedTotalSeconds]),
+		FailedMeanBackendSeconds:        mean(timings[FailedBackendSeconds]),
+		FailedMeanInstanceSeconds:       mean(timings[FailedInstanceSeconds]),
+		MaxTotalSeconds:                 max(timings[TotalSeconds]),
+		MaxBackendSeconds:               max(timings[BackendSeconds]),
+		MaxInstanceSeconds:              max(timings[InstanceSeconds]),
+		FailedMaxTotalSeconds:           max(timings[FailedTotalSeconds]),
+		FailedMaxBackendSeconds:         max(timings[FailedBackendSeconds]),
+		FailedMaxInstanceSeconds:        max(timings[FailedInstanceSeconds]),
+		MedianTotalSeconds:              median(timings[TotalSeconds]),
+		MedianBackendSeconds:            median(timings[BackendSeconds]),
+		MedianInstanceSeconds:           median(timings[InstanceSeconds]),
+		FailedMedianTotalSeconds:        median(timings[FailedTotalSeconds]),
+		FailedMedianBackendSeconds:      median(timings[FailedBackendSeconds]),
+		FailedMedianInstanceSeconds:     median(timings[FailedInstanceSeconds]),
+		P95TotalSeconds:                 percentile(timings[TotalSeconds], 95),
+		P95BackendSeconds:               percentile(timings[BackendSeconds], 95),
+		P95InstanceSeconds:              percentile(timings[InstanceSeconds], 95),
+		FailedP95TotalSeconds:           percentile(timings[FailedTotalSeconds], 95),
+		FailedP95BackendSeconds:         percentile(timings[FailedBackendSeconds], 95),
+		FailedP95InstanceSeconds:        percentile(timings[FailedInstanceSeconds], 95),
+	}
+}
+
+// AggregatedSince returns a large number of aggregated metrics
+// based on the raw metrics collected since the given time.
+func AggregatedSince(c *collection.Collection, t time.Time) (AggregatedDiscoveryMetrics, error) {
+	results, err := c.Since(t)
+	if err != nil {
+		return AggregatedDiscoveryMetrics{}, err
+	}
+
+	return aggregate(results), nil
+}
--- a/go/vt/orchestrator/discovery/funcs.go
+++ b/go/vt/orchestrator/discovery/funcs.go
@ -0,0 +1,66 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package discovery
+
+import (
+	"github.com/montanaflynn/stats"
+)
+
+// internal routine to return the average value or 0
+func mean(values stats.Float64Data) float64 {
+	s, err := stats.Mean(values)
+	if err != nil {
+		return 0
+	}
+	return s
+}
+
+// internal routine to return the requested percentile value or 0
+func percentile(values stats.Float64Data, percent float64) float64 {
+	s, err := stats.Percentile(values, percent)
+	if err != nil {
+		return 0
+	}
+	return s
+}
+
+// internal routine to return the maximum value or 0
+func max(values stats.Float64Data) float64 {
+	s, err := stats.Max(values)
+	if err != nil {
+		return 0
+	}
+	return s
+}
+
+// internal routine to return the minimum value or 9e9
+func min(values stats.Float64Data) float64 {
+	s, err := stats.Min(values)
+	if err != nil {
+		return 9e9 // a large number (should use something better than this but it's ok for now)
+	}
+	return s
+}
+
+// internal routine to return the median or 0
+func median(values stats.Float64Data) float64 {
+	s, err := stats.Median(values)
+	if err != nil {
+		return 0
+	}
+	return s
+}
--- a/go/vt/orchestrator/discovery/metric.go
+++ b/go/vt/orchestrator/discovery/metric.go
@ -0,0 +1,40 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package discovery
+
+// Collect discovery metrics and manage their storage and retrieval for monitoring purposes.
+
+import (
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+// Metric holds a set of information of instance discovery metrics
+type Metric struct {
+	Timestamp       time.Time        // time the collection was taken
+	InstanceKey     inst.InstanceKey // instance being monitored
+	BackendLatency  time.Duration    // time taken talking to the backend
+	InstanceLatency time.Duration    // time taken talking to the instance
+	TotalLatency    time.Duration    // total time taken doing the discovery
+	Err             error            // error (if applicable) doing the discovery process
+}
+
+// When did the metric happen
+func (m Metric) When() time.Time {
+	return m.Timestamp
+}
--- a/go/vt/orchestrator/discovery/metric_json.go
+++ b/go/vt/orchestrator/discovery/metric_json.go
@ -0,0 +1,74 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package discovery
+
+// Collect discovery metrics and manage their storage and retrieval for monitoring purposes.
+
+import (
+	"errors"
+	"fmt"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/collection"
+)
+
+// formattedFloat is to force the JSON output to show 3 decimal places
+type formattedFloat float64
+
+func (m formattedFloat) String() string {
+	return fmt.Sprintf("%.3f", m)
+}
+
+// MetricJSON holds a structure which represents some discovery latency information
+type MetricJSON struct {
+	Timestamp              time.Time
+	Hostname               string
+	Port                   int
+	BackendLatencySeconds  formattedFloat
+	InstanceLatencySeconds formattedFloat
+	TotalLatencySeconds    formattedFloat
+	Err                    error
+}
+
+// JSONSince returns an API response of discovery metric collection information
+// in a printable JSON format.
+func JSONSince(c *collection.Collection, t time.Time) ([](MetricJSON), error) {
+	if c == nil {
+		return nil, errors.New("MetricCollection.JSONSince: c == nil")
+	}
+	raw, err := c.Since(t)
+	if err != nil {
+		return nil, err
+	}
+
+	// build up JSON response for each Metric we received
+	var s []MetricJSON
+	for i := range raw {
+		m := raw[i].(*Metric) // convert back to a real Metric rather than collection.Metric interface
+		mj := MetricJSON{
+			Timestamp:              m.Timestamp,
+			Hostname:               m.InstanceKey.Hostname,
+			Port:                   m.InstanceKey.Port,
+			BackendLatencySeconds:  formattedFloat(m.BackendLatency.Seconds()),
+			InstanceLatencySeconds: formattedFloat(m.InstanceLatency.Seconds()),
+			TotalLatencySeconds:    formattedFloat(m.TotalLatency.Seconds()),
+			Err:                    m.Err,
+		}
+		s = append(s, mj)
+	}
+	return s, nil
+}
--- a/go/vt/orchestrator/discovery/queue.go
+++ b/go/vt/orchestrator/discovery/queue.go
@ -0,0 +1,187 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/*
+
+package discovery manages a queue of discovery requests: an ordered
+queue with no duplicates.
+
+push() operation never blocks while pop() blocks on an empty queue.
+
+*/
+
+package discovery
+
+import (
+	"sync"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+// QueueMetric contains the queue's active and queued sizes
+type QueueMetric struct {
+	Active int
+	Queued int
+}
+
+// Queue contains information for managing discovery requests
+type Queue struct {
+	sync.Mutex
+
+	name         string
+	done         chan struct{}
+	queue        chan inst.InstanceKey
+	queuedKeys   map[inst.InstanceKey]time.Time
+	consumedKeys map[inst.InstanceKey]time.Time
+	metrics      []QueueMetric
+}
+
+// DiscoveryQueue contains the discovery queue which can then be accessed via an API call for monitoring.
+// Currently this is accessed by ContinuousDiscovery() but also from http api calls.
+// I may need to protect this better?
+var discoveryQueue map[string](*Queue)
+var dcLock sync.Mutex
+
+func init() {
+	discoveryQueue = make(map[string](*Queue))
+}
+
+// StopMonitoring stops monitoring all the queues
+func StopMonitoring() {
+	for _, q := range discoveryQueue {
+		q.stopMonitoring()
+	}
+}
+
+// CreateOrReturnQueue allows for creation of a new discovery queue or
+// returning a pointer to an existing one given the name.
+func CreateOrReturnQueue(name string) *Queue {
+	dcLock.Lock()
+	defer dcLock.Unlock()
+	if q, found := discoveryQueue[name]; found {
+		return q
+	}
+
+	q := &Queue{
+		name:         name,
+		queuedKeys:   make(map[inst.InstanceKey]time.Time),
+		consumedKeys: make(map[inst.InstanceKey]time.Time),
+		queue:        make(chan inst.InstanceKey, config.Config.DiscoveryQueueCapacity),
+	}
+	go q.startMonitoring()
+
+	discoveryQueue[name] = q
+
+	return q
+}
+
+// monitoring queue sizes until we are told to stop
+func (q *Queue) startMonitoring() {
+	log.Debugf("Queue.startMonitoring(%s)", q.name)
+	ticker := time.NewTicker(time.Second) // hard-coded at every second
+
+	for {
+		select {
+		case <-ticker.C: // do the periodic expiry
+			q.collectStatistics()
+		case <-q.done:
+			return
+		}
+	}
+}
+
+// Stop monitoring the queue
+func (q *Queue) stopMonitoring() {
+	q.done <- struct{}{}
+}
+
+// do a check of the entries in the queue, both those active and queued
+func (q *Queue) collectStatistics() {
+	q.Lock()
+	defer q.Unlock()
+
+	q.metrics = append(q.metrics, QueueMetric{Queued: len(q.queuedKeys), Active: len(q.consumedKeys)})
+
+	// remove old entries if we get too big
+	if len(q.metrics) > config.Config.DiscoveryQueueMaxStatisticsSize {
+		q.metrics = q.metrics[len(q.metrics)-config.Config.DiscoveryQueueMaxStatisticsSize:]
+	}
+}
+
+// QueueLen returns the length of the queue (channel size + queued size)
+func (q *Queue) QueueLen() int {
+	q.Lock()
+	defer q.Unlock()
+
+	return len(q.queue) + len(q.queuedKeys)
+}
+
+// Push enqueues a key if it is not on a queue and is not being
+// processed; silently returns otherwise.
+func (q *Queue) Push(key inst.InstanceKey) {
+	q.Lock()
+	defer q.Unlock()
+
+	// is it enqueued already?
+	if _, found := q.queuedKeys[key]; found {
+		return
+	}
+
+	// is it being processed now?
+	if _, found := q.consumedKeys[key]; found {
+		return
+	}
+
+	q.queuedKeys[key] = time.Now()
+	q.queue <- key
+}
+
+// Consume fetches a key to process; blocks if queue is empty.
+// Release must be called once after Consume.
+func (q *Queue) Consume() inst.InstanceKey {
+	q.Lock()
+	queue := q.queue
+	q.Unlock()
+
+	key := <-queue
+
+	q.Lock()
+	defer q.Unlock()
+
+	// alarm if have been waiting for too long
+	timeOnQueue := time.Since(q.queuedKeys[key])
+	if timeOnQueue > time.Duration(config.Config.InstancePollSeconds)*time.Second {
+		log.Warningf("key %v spent %.4fs waiting on a discoveryQueue", key, timeOnQueue.Seconds())
+	}
+
+	q.consumedKeys[key] = q.queuedKeys[key]
+
+	delete(q.queuedKeys, key)
+
+	return key
+}
+
+// Release removes a key from a list of being processed keys
+// which allows that key to be pushed into the queue again.
+func (q *Queue) Release(key inst.InstanceKey) {
+	q.Lock()
+	defer q.Unlock()
+
+	delete(q.consumedKeys, key)
+}
--- a/go/vt/orchestrator/discovery/queue_aggregated_stats.go
+++ b/go/vt/orchestrator/discovery/queue_aggregated_stats.go
@ -0,0 +1,95 @@
+/*
+   Copyright 2017 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package discovery
+
+import (
+	"github.com/montanaflynn/stats"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+// AggregatedQueueMetrics contains aggregate information some part queue metrics
+type AggregatedQueueMetrics struct {
+	ActiveMinEntries    float64
+	ActiveMeanEntries   float64
+	ActiveMedianEntries float64
+	ActiveP95Entries    float64
+	ActiveMaxEntries    float64
+	QueuedMinEntries    float64
+	QueuedMeanEntries   float64
+	QueuedMedianEntries float64
+	QueuedP95Entries    float64
+	QueuedMaxEntries    float64
+}
+
+// we pull out values in ints so convert to float64 for metric calculations
+func intSliceToFloat64Slice(someInts []int) stats.Float64Data {
+	var slice stats.Float64Data
+
+	for _, v := range someInts {
+		slice = append(slice, float64(v))
+	}
+
+	return slice
+}
+
+// DiscoveryQueueMetrics returns some raw queue metrics based on the
+// period (last N entries) requested.
+func (q *Queue) DiscoveryQueueMetrics(period int) []QueueMetric {
+	q.Lock()
+	defer q.Unlock()
+
+	// adjust period in case we ask for something that's too long
+	if period > len(q.metrics) {
+		log.Debugf("DiscoveryQueueMetrics: wanted: %d, adjusting period to %d", period, len(q.metrics))
+		period = len(q.metrics)
+	}
+
+	a := q.metrics[len(q.metrics)-period:]
+	log.Debugf("DiscoveryQueueMetrics: returning values: %+v", a)
+	return a
+}
+
+// AggregatedDiscoveryQueueMetrics Returns some aggregate statistics
+// based on the period (last N entries) requested.  We store up to
+// config.Config.DiscoveryQueueMaxStatisticsSize values and collect once
+// a second so we expect period to be a smaller value.
+func (q *Queue) AggregatedDiscoveryQueueMetrics(period int) *AggregatedQueueMetrics {
+	wanted := q.DiscoveryQueueMetrics(period)
+
+	var activeEntries, queuedEntries []int
+	// fill vars
+	for i := range wanted {
+		activeEntries = append(activeEntries, wanted[i].Active)
+		queuedEntries = append(queuedEntries, wanted[i].Queued)
+	}
+
+	a := &AggregatedQueueMetrics{
+		ActiveMinEntries:    min(intSliceToFloat64Slice(activeEntries)),
+		ActiveMeanEntries:   mean(intSliceToFloat64Slice(activeEntries)),
+		ActiveMedianEntries: median(intSliceToFloat64Slice(activeEntries)),
+		ActiveP95Entries:    percentile(intSliceToFloat64Slice(activeEntries), 95),
+		ActiveMaxEntries:    max(intSliceToFloat64Slice(activeEntries)),
+		QueuedMinEntries:    min(intSliceToFloat64Slice(queuedEntries)),
+		QueuedMeanEntries:   mean(intSliceToFloat64Slice(queuedEntries)),
+		QueuedMedianEntries: median(intSliceToFloat64Slice(queuedEntries)),
+		QueuedP95Entries:    percentile(intSliceToFloat64Slice(queuedEntries), 95),
+		QueuedMaxEntries:    max(intSliceToFloat64Slice(queuedEntries)),
+	}
+	log.Debugf("AggregatedDiscoveryQueueMetrics: returning values: %+v", a)
+	return a
+}
--- a/go/vt/orchestrator/external/golib/README.md
+++ b/go/vt/orchestrator/external/golib/README.md
@ -0,0 +1,9 @@
+Common Go libraries
+
+To import & use:
+```
+go get "github.com/openark/golib/math"
+go get "github.com/openark/golib/sqlutils"
+go get "github.com/openark/golib/tests"
+...
+```
--- a/go/vt/orchestrator/external/golib/log/log.go
+++ b/go/vt/orchestrator/external/golib/log/log.go
@ -0,0 +1,268 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package log
+
+import (
+	"errors"
+	"fmt"
+	"log/syslog"
+	"os"
+	"runtime/debug"
+	"time"
+)
+
+// LogLevel indicates the severity of a log entry
+type LogLevel int
+
+func (this LogLevel) String() string {
+	switch this {
+	case FATAL:
+		return "FATAL"
+	case CRITICAL:
+		return "CRITICAL"
+	case ERROR:
+		return "ERROR"
+	case WARNING:
+		return "WARNING"
+	case NOTICE:
+		return "NOTICE"
+	case INFO:
+		return "INFO"
+	case DEBUG:
+		return "DEBUG"
+	}
+	return "unknown"
+}
+
+func LogLevelFromString(logLevelName string) (LogLevel, error) {
+	switch logLevelName {
+	case "FATAL":
+		return FATAL, nil
+	case "CRITICAL":
+		return CRITICAL, nil
+	case "ERROR":
+		return ERROR, nil
+	case "WARNING":
+		return WARNING, nil
+	case "NOTICE":
+		return NOTICE, nil
+	case "INFO":
+		return INFO, nil
+	case "DEBUG":
+		return DEBUG, nil
+	}
+	return 0, fmt.Errorf("Unknown LogLevel name: %+v", logLevelName)
+}
+
+const (
+	FATAL LogLevel = iota
+	CRITICAL
+	ERROR
+	WARNING
+	NOTICE
+	INFO
+	DEBUG
+)
+
+const TimeFormat = "2006-01-02 15:04:05"
+
+// globalLogLevel indicates the global level filter for all logs (only entries with level equals or higher
+// than this value will be logged)
+var globalLogLevel LogLevel = DEBUG
+var printStackTrace bool = false
+
+// syslogWriter is optional, and defaults to nil (disabled)
+var syslogLevel LogLevel = ERROR
+var syslogWriter *syslog.Writer
+
+// SetPrintStackTrace enables/disables dumping the stack upon error logging
+func SetPrintStackTrace(shouldPrintStackTrace bool) {
+	printStackTrace = shouldPrintStackTrace
+}
+
+// SetLevel sets the global log level. Only entries with level equals or higher than
+// this value will be logged
+func SetLevel(logLevel LogLevel) {
+	globalLogLevel = logLevel
+}
+
+// GetLevel returns current global log level
+func GetLevel() LogLevel {
+	return globalLogLevel
+}
+
+// EnableSyslogWriter enables, if possible, writes to syslog. These will execute _in addition_ to normal logging
+func EnableSyslogWriter(tag string) (err error) {
+	syslogWriter, err = syslog.New(syslog.LOG_ERR, tag)
+	if err != nil {
+		syslogWriter = nil
+	}
+	return err
+}
+
+// SetSyslogLevel sets the minimal syslog level. Only entries with level equals or higher than
+// this value will be logged. However, this is also capped by the global log level. That is,
+// messages with lower level than global-log-level will be discarded at any case.
+func SetSyslogLevel(logLevel LogLevel) {
+	syslogLevel = logLevel
+}
+
+// logFormattedEntry nicely formats and emits a log entry
+func logFormattedEntry(logLevel LogLevel, message string, args ...interface{}) string {
+	if logLevel > globalLogLevel {
+		return ""
+	}
+	// if TZ env variable is set, update the timestamp timezone
+	localizedTime := time.Now()
+	tzLocation := os.Getenv("TZ")
+	if tzLocation != "" {
+		location, err := time.LoadLocation(tzLocation)
+		if err == nil { // if invalid tz location was provided, just leave it as the default
+			localizedTime = time.Now().In(location)
+		}
+	}
+
+	msgArgs := fmt.Sprintf(message, args...)
+	entryString := fmt.Sprintf("%s %s %s", localizedTime.Format(TimeFormat), logLevel, msgArgs)
+	fmt.Fprintln(os.Stderr, entryString)
+
+	if syslogWriter != nil {
+		go func() error {
+			if logLevel > syslogLevel {
+				return nil
+			}
+			switch logLevel {
+			case FATAL:
+				return syslogWriter.Emerg(msgArgs)
+			case CRITICAL:
+				return syslogWriter.Crit(msgArgs)
+			case ERROR:
+				return syslogWriter.Err(msgArgs)
+			case WARNING:
+				return syslogWriter.Warning(msgArgs)
+			case NOTICE:
+				return syslogWriter.Notice(msgArgs)
+			case INFO:
+				return syslogWriter.Info(msgArgs)
+			case DEBUG:
+				return syslogWriter.Debug(msgArgs)
+			}
+			return nil
+		}()
+	}
+	return entryString
+}
+
+// logEntry emits a formatted log entry
+func logEntry(logLevel LogLevel, message string, args ...interface{}) string {
+	entryString := message
+	for _, s := range args {
+		entryString += fmt.Sprintf(" %s", s)
+	}
+	return logFormattedEntry(logLevel, entryString)
+}
+
+// logErrorEntry emits a log entry based on given error object
+func logErrorEntry(logLevel LogLevel, err error) error {
+	if err == nil {
+		// No error
+		return nil
+	}
+	entryString := fmt.Sprintf("%+v", err)
+	logEntry(logLevel, entryString)
+	if printStackTrace {
+		debug.PrintStack()
+	}
+	return err
+}
+
+func Debug(message string, args ...interface{}) string {
+	return logEntry(DEBUG, message, args...)
+}
+
+func Debugf(message string, args ...interface{}) string {
+	return logFormattedEntry(DEBUG, message, args...)
+}
+
+func Info(message string, args ...interface{}) string {
+	return logEntry(INFO, message, args...)
+}
+
+func Infof(message string, args ...interface{}) string {
+	return logFormattedEntry(INFO, message, args...)
+}
+
+func Notice(message string, args ...interface{}) string {
+	return logEntry(NOTICE, message, args...)
+}
+
+func Noticef(message string, args ...interface{}) string {
+	return logFormattedEntry(NOTICE, message, args...)
+}
+
+func Warning(message string, args ...interface{}) error {
+	return errors.New(logEntry(WARNING, message, args...))
+}
+
+func Warningf(message string, args ...interface{}) error {
+	return errors.New(logFormattedEntry(WARNING, message, args...))
+}
+
+func Error(message string, args ...interface{}) error {
+	return errors.New(logEntry(ERROR, message, args...))
+}
+
+func Errorf(message string, args ...interface{}) error {
+	return errors.New(logFormattedEntry(ERROR, message, args...))
+}
+
+func Errore(err error) error {
+	return logErrorEntry(ERROR, err)
+}
+
+func Critical(message string, args ...interface{}) error {
+	return errors.New(logEntry(CRITICAL, message, args...))
+}
+
+func Criticalf(message string, args ...interface{}) error {
+	return errors.New(logFormattedEntry(CRITICAL, message, args...))
+}
+
+func Criticale(err error) error {
+	return logErrorEntry(CRITICAL, err)
+}
+
+// Fatal emits a FATAL level entry and exists the program
+func Fatal(message string, args ...interface{}) error {
+	logEntry(FATAL, message, args...)
+	os.Exit(1)
+	return errors.New(logEntry(CRITICAL, message, args...))
+}
+
+// Fatalf emits a FATAL level entry and exists the program
+func Fatalf(message string, args ...interface{}) error {
+	logFormattedEntry(FATAL, message, args...)
+	os.Exit(1)
+	return errors.New(logFormattedEntry(CRITICAL, message, args...))
+}
+
+// Fatale emits a FATAL level entry and exists the program
+func Fatale(err error) error {
+	logErrorEntry(FATAL, err)
+	os.Exit(1)
+	return err
+}
--- a/go/vt/orchestrator/external/golib/math/math.go
+++ b/go/vt/orchestrator/external/golib/math/math.go
@ -0,0 +1,119 @@
+/*
+   Copyright 2014 Shlomi Noach.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package math
+
+func MinInt(i1, i2 int) int {
+	if i1 < i2 {
+		return i1
+	}
+	return i2
+}
+
+func MaxInt(i1, i2 int) int {
+	if i1 > i2 {
+		return i1
+	}
+	return i2
+}
+
+func MinInt64(i1, i2 int64) int64 {
+	if i1 < i2 {
+		return i1
+	}
+	return i2
+}
+
+func MaxInt64(i1, i2 int64) int64 {
+	if i1 > i2 {
+		return i1
+	}
+	return i2
+}
+
+func MinUInt(i1, i2 uint) uint {
+	if i1 < i2 {
+		return i1
+	}
+	return i2
+}
+
+func MaxUInt(i1, i2 uint) uint {
+	if i1 > i2 {
+		return i1
+	}
+	return i2
+}
+
+func MinUInt64(i1, i2 uint64) uint64 {
+	if i1 < i2 {
+		return i1
+	}
+	return i2
+}
+
+func MaxUInt64(i1, i2 uint64) uint64 {
+	if i1 > i2 {
+		return i1
+	}
+	return i2
+}
+
+func MinString(i1, i2 string) string {
+	if i1 < i2 {
+		return i1
+	}
+	return i2
+}
+
+func MaxString(i1, i2 string) string {
+	if i1 > i2 {
+		return i1
+	}
+	return i2
+}
+
+// TernaryString acts like a "? :" C-style ternary operator for strings
+func TernaryString(condition bool, resTrue string, resFalse string) string {
+	if condition {
+		return resTrue
+	}
+	return resFalse
+}
+
+// TernaryString acts like a "? :" C-style ternary operator for ints
+func TernaryInt(condition bool, resTrue int, resFalse int) int {
+	if condition {
+		return resTrue
+	}
+	return resFalse
+}
+
+// AbsInt is an ABS function for int type
+func AbsInt(i int) int {
+	if i >= 0 {
+		return i
+	}
+	return -i
+}
+
+// AbsInt64 is an ABS function for int64 type
+func AbsInt64(i int64) int64 {
+	if i >= 0 {
+		return i
+	}
+	return -i
+}
--- a/go/vt/orchestrator/external/golib/sqlutils/dialect.go
+++ b/go/vt/orchestrator/external/golib/sqlutils/dialect.go
@ -0,0 +1,49 @@
+/*
+   Copyright 2017 GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package sqlutils
+
+import (
+	"regexp"
+	"strings"
+)
+
+type regexpMap struct {
+	r           *regexp.Regexp
+	replacement string
+}
+
+func (this *regexpMap) process(text string) (result string) {
+	return this.r.ReplaceAllString(text, this.replacement)
+}
+
+func rmap(regexpExpression string, replacement string) regexpMap {
+	return regexpMap{
+		r:           regexp.MustCompile(regexpSpaces(regexpExpression)),
+		replacement: replacement,
+	}
+}
+
+func regexpSpaces(statement string) string {
+	return strings.Replace(statement, " ", `[\s]+`, -1)
+}
+
+func applyConversions(statement string, conversions []regexpMap) string {
+	for _, rmap := range conversions {
+		statement = rmap.process(statement)
+	}
+	return statement
+}
--- a/go/vt/orchestrator/external/golib/sqlutils/sqlite_dialect.go
+++ b/go/vt/orchestrator/external/golib/sqlutils/sqlite_dialect.go
@ -0,0 +1,130 @@
+/*
+   Copyright 2017 GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// What's this about?
+// This is a brute-force regular-expression based conversion from MySQL syntax to sqlite3 syntax.
+// It is NOT meant to be a general purpose solution and is only expected & confirmed to run on
+// queries issued by orchestrator. There are known limitations to this design.
+// It's not even pretty.
+// In fact...
+// Well, it gets the job done at this time. Call it debt.
+
+package sqlutils
+
+import (
+	"regexp"
+)
+
+var sqlite3CreateTableConversions = []regexpMap{
+	rmap(`(?i) (character set|charset) [\S]+`, ``),
+	rmap(`(?i)int unsigned`, `int`),
+	rmap(`(?i)int[\s]*[(][\s]*([0-9]+)[\s]*[)] unsigned`, `int`),
+	rmap(`(?i)engine[\s]*=[\s]*(innodb|myisam|ndb|memory|tokudb)`, ``),
+	rmap(`(?i)DEFAULT CHARSET[\s]*=[\s]*[\S]+`, ``),
+	rmap(`(?i)[\S]*int( not null|) auto_increment`, `integer`),
+	rmap(`(?i)comment '[^']*'`, ``),
+	rmap(`(?i)after [\S]+`, ``),
+	rmap(`(?i)alter table ([\S]+) add (index|key) ([\S]+) (.+)`, `create index ${3}_${1} on $1 $4`),
+	rmap(`(?i)alter table ([\S]+) add unique (index|key) ([\S]+) (.+)`, `create unique index ${3}_${1} on $1 $4`),
+	rmap(`(?i)([\S]+) enum[\s]*([(].*?[)])`, `$1 text check($1 in $2)`),
+	rmap(`(?i)([\s\S]+[/][*] sqlite3-skip [*][/][\s\S]+)`, ``),
+	rmap(`(?i)timestamp default current_timestamp`, `timestamp default ('')`),
+	rmap(`(?i)timestamp not null default current_timestamp`, `timestamp not null default ('')`),
+
+	rmap(`(?i)add column (.*int) not null[\s]*$`, `add column $1 not null default 0`),
+	rmap(`(?i)add column (.* text) not null[\s]*$`, `add column $1 not null default ''`),
+	rmap(`(?i)add column (.* varchar.*) not null[\s]*$`, `add column $1 not null default ''`),
+}
+
+var sqlite3InsertConversions = []regexpMap{
+	rmap(`(?i)insert ignore ([\s\S]+) on duplicate key update [\s\S]+`, `insert or ignore $1`),
+	rmap(`(?i)insert ignore`, `insert or ignore`),
+	rmap(`(?i)now[(][)]`, `datetime('now')`),
+	rmap(`(?i)insert into ([\s\S]+) on duplicate key update [\s\S]+`, `replace into $1`),
+}
+
+var sqlite3GeneralConversions = []regexpMap{
+	rmap(`(?i)now[(][)][\s]*[-][\s]*interval [?] ([\w]+)`, `datetime('now', printf('-%d $1', ?))`),
+	rmap(`(?i)now[(][)][\s]*[+][\s]*interval [?] ([\w]+)`, `datetime('now', printf('+%d $1', ?))`),
+	rmap(`(?i)now[(][)][\s]*[-][\s]*interval ([0-9.]+) ([\w]+)`, `datetime('now', '-${1} $2')`),
+	rmap(`(?i)now[(][)][\s]*[+][\s]*interval ([0-9.]+) ([\w]+)`, `datetime('now', '+${1} $2')`),
+
+	rmap(`(?i)[=<>\s]([\S]+[.][\S]+)[\s]*[-][\s]*interval [?] ([\w]+)`, ` datetime($1, printf('-%d $2', ?))`),
+	rmap(`(?i)[=<>\s]([\S]+[.][\S]+)[\s]*[+][\s]*interval [?] ([\w]+)`, ` datetime($1, printf('+%d $2', ?))`),
+
+	rmap(`(?i)unix_timestamp[(][)]`, `strftime('%s', 'now')`),
+	rmap(`(?i)unix_timestamp[(]([^)]+)[)]`, `strftime('%s', $1)`),
+	rmap(`(?i)now[(][)]`, `datetime('now')`),
+	rmap(`(?i)cast[(][\s]*([\S]+) as signed[\s]*[)]`, `cast($1 as integer)`),
+
+	rmap(`(?i)\bconcat[(][\s]*([^,)]+)[\s]*,[\s]*([^,)]+)[\s]*[)]`, `($1 || $2)`),
+	rmap(`(?i)\bconcat[(][\s]*([^,)]+)[\s]*,[\s]*([^,)]+)[\s]*,[\s]*([^,)]+)[\s]*[)]`, `($1 || $2 || $3)`),
+
+	rmap(`(?i) rlike `, ` like `),
+
+	rmap(`(?i)create index([\s\S]+)[(][\s]*[0-9]+[\s]*[)]([\s\S]+)`, `create index ${1}${2}`),
+	rmap(`(?i)drop index ([\S]+) on ([\S]+)`, `drop index if exists $1`),
+}
+
+var (
+	sqlite3IdentifyCreateTableStatement = regexp.MustCompile(regexpSpaces(`(?i)^[\s]*create table`))
+	sqlite3IdentifyCreateIndexStatement = regexp.MustCompile(regexpSpaces(`(?i)^[\s]*create( unique|) index`))
+	sqlite3IdentifyDropIndexStatement   = regexp.MustCompile(regexpSpaces(`(?i)^[\s]*drop index`))
+	sqlite3IdentifyAlterTableStatement  = regexp.MustCompile(regexpSpaces(`(?i)^[\s]*alter table`))
+	sqlite3IdentifyInsertStatement      = regexp.MustCompile(regexpSpaces(`(?i)^[\s]*(insert|replace)`))
+)
+
+func IsInsert(statement string) bool {
+	return sqlite3IdentifyInsertStatement.MatchString(statement)
+}
+
+func IsCreateTable(statement string) bool {
+	return sqlite3IdentifyCreateTableStatement.MatchString(statement)
+}
+
+func IsCreateIndex(statement string) bool {
+	return sqlite3IdentifyCreateIndexStatement.MatchString(statement)
+}
+
+func IsDropIndex(statement string) bool {
+	return sqlite3IdentifyDropIndexStatement.MatchString(statement)
+}
+
+func IsAlterTable(statement string) bool {
+	return sqlite3IdentifyAlterTableStatement.MatchString(statement)
+}
+
+func ToSqlite3CreateTable(statement string) string {
+	return applyConversions(statement, sqlite3CreateTableConversions)
+}
+
+func ToSqlite3Insert(statement string) string {
+	return applyConversions(statement, sqlite3InsertConversions)
+}
+
+func ToSqlite3Dialect(statement string) (translated string) {
+	if IsCreateTable(statement) {
+		return ToSqlite3CreateTable(statement)
+	}
+	if IsAlterTable(statement) {
+		return ToSqlite3CreateTable(statement)
+	}
+	statement = applyConversions(statement, sqlite3GeneralConversions)
+	if IsInsert(statement) {
+		return ToSqlite3Insert(statement)
+	}
+	return statement
+}
--- a/go/vt/orchestrator/external/golib/sqlutils/sqlite_dialect_test.go
+++ b/go/vt/orchestrator/external/golib/sqlutils/sqlite_dialect_test.go
@ -0,0 +1,242 @@
+/*
+   Copyright 2017 GitHub Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package sqlutils
+
+import (
+	"regexp"
+	"strings"
+	"testing"
+
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+var spacesRegexp = regexp.MustCompile(`[\s]+`)
+
+func init() {
+}
+
+func stripSpaces(statement string) string {
+	statement = strings.TrimSpace(statement)
+	statement = spacesRegexp.ReplaceAllString(statement, " ")
+	return statement
+}
+
+func TestIsCreateTable(t *testing.T) {
+	test.S(t).ExpectTrue(IsCreateTable("create table t(id int)"))
+	test.S(t).ExpectTrue(IsCreateTable(" create table t(id int)"))
+	test.S(t).ExpectTrue(IsCreateTable("CREATE  TABLE t(id int)"))
+	test.S(t).ExpectTrue(IsCreateTable(`
+		create table t(id int)
+		`))
+	test.S(t).ExpectFalse(IsCreateTable("where create table t(id int)"))
+	test.S(t).ExpectFalse(IsCreateTable("insert"))
+}
+
+func TestToSqlite3CreateTable(t *testing.T) {
+	{
+		statement := "create table t(id int)"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, statement)
+	}
+	{
+		statement := "create table t(id int, v varchar(123) CHARACTER SET ascii NOT NULL default '')"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, "create table t(id int, v varchar(123) NOT NULL default '')")
+	}
+	{
+		statement := "create table t(id int, v varchar ( 123 ) CHARACTER SET ascii NOT NULL default '')"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, "create table t(id int, v varchar ( 123 ) NOT NULL default '')")
+	}
+	{
+		statement := "create table t(i smallint unsigned)"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, "create table t(i smallint)")
+	}
+	{
+		statement := "create table t(i smallint(5) unsigned)"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, "create table t(i smallint)")
+	}
+	{
+		statement := "create table t(i smallint ( 5 ) unsigned)"
+		result := ToSqlite3CreateTable(statement)
+		test.S(t).ExpectEquals(result, "create table t(i smallint)")
+	}
+}
+
+func TestToSqlite3AlterTable(t *testing.T) {
+	{
+		statement := `
+			ALTER TABLE
+				database_instance
+				ADD COLUMN sql_delay INT UNSIGNED NOT NULL AFTER slave_lag_seconds
+		`
+		result := stripSpaces(ToSqlite3Dialect(statement))
+		test.S(t).ExpectEquals(result, stripSpaces(`
+			ALTER TABLE
+				database_instance
+				add column sql_delay int not null default 0
+			`))
+	}
+	{
+		statement := `
+			ALTER TABLE
+				database_instance
+				ADD INDEX master_host_port_idx (master_host, master_port)
+		`
+		result := stripSpaces(ToSqlite3Dialect(statement))
+		test.S(t).ExpectEquals(result, stripSpaces(`
+			create index
+				master_host_port_idx_database_instance
+				on database_instance (master_host, master_port)
+			`))
+	}
+	{
+		statement := `
+				ALTER TABLE
+					topology_recovery
+					ADD KEY last_detection_idx (last_detection_id)
+			`
+		result := stripSpaces(ToSqlite3Dialect(statement))
+		test.S(t).ExpectEquals(result, stripSpaces(`
+			create index
+				last_detection_idx_topology_recovery
+				on topology_recovery (last_detection_id)
+			`))
+	}
+
+}
+
+func TestCreateIndex(t *testing.T) {
+	{
+		statement := `
+			create index
+				master_host_port_idx_database_instance
+				on database_instance (master_host(128), master_port)
+		`
+		result := stripSpaces(ToSqlite3Dialect(statement))
+		test.S(t).ExpectEquals(result, stripSpaces(`
+			create index
+				master_host_port_idx_database_instance
+				on database_instance (master_host, master_port)
+			`))
+	}
+}
+
+func TestIsInsert(t *testing.T) {
+	test.S(t).ExpectTrue(IsInsert("insert into t"))
+	test.S(t).ExpectTrue(IsInsert("insert ignore into t"))
+	test.S(t).ExpectTrue(IsInsert(`
+		  insert ignore into t
+			`))
+	test.S(t).ExpectFalse(IsInsert("where create table t(id int)"))
+	test.S(t).ExpectFalse(IsInsert("create table t(id int)"))
+	test.S(t).ExpectTrue(IsInsert(`
+		insert into
+				cluster_domain_name (cluster_name, domain_name, last_registered)
+			values
+				(?, ?, datetime('now'))
+			on duplicate key update
+				domain_name=values(domain_name),
+				last_registered=values(last_registered)
+	`))
+}
+
+func TestToSqlite3Insert(t *testing.T) {
+	{
+		statement := `
+			insert into
+					cluster_domain_name (cluster_name, domain_name, last_registered)
+				values
+					(?, ?, datetime('now'))
+				on duplicate key update
+					domain_name=values(domain_name),
+					last_registered=values(last_registered)
+		`
+		result := stripSpaces(ToSqlite3Dialect(statement))
+		test.S(t).ExpectEquals(result, stripSpaces(`
+			replace into
+					cluster_domain_name (cluster_name, domain_name, last_registered)
+				values
+					(?, ?, datetime('now'))
+			`))
+	}
+}
+
+func TestToSqlite3GeneralConversions(t *testing.T) {
+	{
+		statement := "select now()"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select datetime('now')")
+	}
+	{
+		statement := "select now() - interval ? second"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select datetime('now', printf('-%d second', ?))")
+	}
+	{
+		statement := "select now() + interval ? minute"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select datetime('now', printf('+%d minute', ?))")
+	}
+	{
+		statement := "select now() + interval 5 minute"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select datetime('now', '+5 minute')")
+	}
+	{
+		statement := "select some_table.some_column + interval ? minute"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select datetime(some_table.some_column, printf('+%d minute', ?))")
+	}
+	{
+		statement := "AND master_instance.last_attempted_check <= master_instance.last_seen + interval ? minute"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "AND master_instance.last_attempted_check <= datetime(master_instance.last_seen, printf('+%d minute', ?))")
+	}
+	{
+		statement := "select concat(master_instance.port, '') as port"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select (master_instance.port || '') as port")
+	}
+	{
+		statement := "select concat( 'abc' , 'def') as s"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select ('abc'  || 'def') as s")
+	}
+	{
+		statement := "select concat( 'abc' , 'def', last.col) as s"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select ('abc'  || 'def' || last.col) as s")
+	}
+	{
+		statement := "select concat(myself.only) as s"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select concat(myself.only) as s")
+	}
+	{
+		statement := "select concat(1, '2', 3, '4') as s"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select concat(1, '2', 3, '4') as s")
+	}
+	{
+		statement := "select group_concat( 'abc' , 'def') as s"
+		result := ToSqlite3Dialect(statement)
+		test.S(t).ExpectEquals(result, "select group_concat( 'abc' , 'def') as s")
+	}
+}
--- a/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go
+++ b/go/vt/orchestrator/external/golib/sqlutils/sqlutils.go
@ -0,0 +1,429 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package sqlutils
+
+import (
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	_ "github.com/go-sql-driver/mysql"
+	_ "github.com/mattn/go-sqlite3"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+const DateTimeFormat = "2006-01-02 15:04:05.999999"
+
+// RowMap represents one row in a result set. Its objective is to allow
+// for easy, typed getters by column name.
+type RowMap map[string]CellData
+
+// Cell data is the result of a single (atomic) column in a single row
+type CellData sql.NullString
+
+func (this *CellData) MarshalJSON() ([]byte, error) {
+	if this.Valid {
+		return json.Marshal(this.String)
+	} else {
+		return json.Marshal(nil)
+	}
+}
+
+// UnmarshalJSON reds this object from JSON
+func (this *CellData) UnmarshalJSON(b []byte) error {
+	var s string
+	if err := json.Unmarshal(b, &s); err != nil {
+		return err
+	}
+	(*this).String = s
+	(*this).Valid = true
+
+	return nil
+}
+
+func (this *CellData) NullString() *sql.NullString {
+	return (*sql.NullString)(this)
+}
+
+// RowData is the result of a single row, in positioned array format
+type RowData []CellData
+
+// MarshalJSON will marshal this map as JSON
+func (this *RowData) MarshalJSON() ([]byte, error) {
+	cells := make([](*CellData), len(*this), len(*this))
+	for i, val := range *this {
+		d := CellData(val)
+		cells[i] = &d
+	}
+	return json.Marshal(cells)
+}
+
+func (this *RowData) Args() []interface{} {
+	result := make([]interface{}, len(*this))
+	for i := range *this {
+		result[i] = (*(*this)[i].NullString())
+	}
+	return result
+}
+
+// ResultData is an ordered row set of RowData
+type ResultData []RowData
+type NamedResultData struct {
+	Columns []string
+	Data    ResultData
+}
+
+var EmptyResultData = ResultData{}
+
+func (this *RowMap) GetString(key string) string {
+	return (*this)[key].String
+}
+
+// GetStringD returns a string from the map, or a default value if the key does not exist
+func (this *RowMap) GetStringD(key string, def string) string {
+	if cell, ok := (*this)[key]; ok {
+		return cell.String
+	}
+	return def
+}
+
+func (this *RowMap) GetInt64(key string) int64 {
+	res, _ := strconv.ParseInt(this.GetString(key), 10, 0)
+	return res
+}
+
+func (this *RowMap) GetNullInt64(key string) sql.NullInt64 {
+	i, err := strconv.ParseInt(this.GetString(key), 10, 0)
+	if err == nil {
+		return sql.NullInt64{Int64: i, Valid: true}
+	} else {
+		return sql.NullInt64{Valid: false}
+	}
+}
+
+func (this *RowMap) GetInt(key string) int {
+	res, _ := strconv.Atoi(this.GetString(key))
+	return res
+}
+
+func (this *RowMap) GetIntD(key string, def int) int {
+	res, err := strconv.Atoi(this.GetString(key))
+	if err != nil {
+		return def
+	}
+	return res
+}
+
+func (this *RowMap) GetUint(key string) uint {
+	res, _ := strconv.ParseUint(this.GetString(key), 10, 0)
+	return uint(res)
+}
+
+func (this *RowMap) GetUintD(key string, def uint) uint {
+	res, err := strconv.Atoi(this.GetString(key))
+	if err != nil {
+		return def
+	}
+	return uint(res)
+}
+
+func (this *RowMap) GetUint64(key string) uint64 {
+	res, _ := strconv.ParseUint(this.GetString(key), 10, 0)
+	return res
+}
+
+func (this *RowMap) GetUint64D(key string, def uint64) uint64 {
+	res, err := strconv.ParseUint(this.GetString(key), 10, 0)
+	if err != nil {
+		return def
+	}
+	return uint64(res)
+}
+
+func (this *RowMap) GetBool(key string) bool {
+	return this.GetInt(key) != 0
+}
+
+func (this *RowMap) GetTime(key string) time.Time {
+	if t, err := time.Parse(DateTimeFormat, this.GetString(key)); err == nil {
+		return t
+	}
+	return time.Time{}
+}
+
+// knownDBs is a DB cache by uri
+var knownDBs map[string]*sql.DB = make(map[string]*sql.DB)
+var knownDBsMutex = &sync.Mutex{}
+
+// GetDB returns a DB instance based on uri.
+// bool result indicates whether the DB was returned from cache; err
+func GetGenericDB(driverName, dataSourceName string) (*sql.DB, bool, error) {
+	knownDBsMutex.Lock()
+	defer func() {
+		knownDBsMutex.Unlock()
+	}()
+
+	var exists bool
+	if _, exists = knownDBs[dataSourceName]; !exists {
+		if db, err := sql.Open(driverName, dataSourceName); err == nil {
+			knownDBs[dataSourceName] = db
+		} else {
+			return db, exists, err
+		}
+	}
+	return knownDBs[dataSourceName], exists, nil
+}
+
+// GetDB returns a MySQL DB instance based on uri.
+// bool result indicates whether the DB was returned from cache; err
+func GetDB(mysql_uri string) (*sql.DB, bool, error) {
+	return GetGenericDB("mysql", mysql_uri)
+}
+
+// GetDB returns a SQLite DB instance based on DB file name.
+// bool result indicates whether the DB was returned from cache; err
+func GetSQLiteDB(dbFile string) (*sql.DB, bool, error) {
+	return GetGenericDB("sqlite3", dbFile)
+}
+
+// RowToArray is a convenience function, typically not called directly, which maps a
+// single read database row into a NullString
+func RowToArray(rows *sql.Rows, columns []string) []CellData {
+	buff := make([]interface{}, len(columns))
+	data := make([]CellData, len(columns))
+	for i := range buff {
+		buff[i] = data[i].NullString()
+	}
+	rows.Scan(buff...)
+	return data
+}
+
+// ScanRowsToArrays is a convenience function, typically not called directly, which maps rows
+// already read from the databse into arrays of NullString
+func ScanRowsToArrays(rows *sql.Rows, on_row func([]CellData) error) error {
+	columns, _ := rows.Columns()
+	for rows.Next() {
+		arr := RowToArray(rows, columns)
+		err := on_row(arr)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func rowToMap(row []CellData, columns []string) map[string]CellData {
+	m := make(map[string]CellData)
+	for k, data_col := range row {
+		m[columns[k]] = data_col
+	}
+	return m
+}
+
+// ScanRowsToMaps is a convenience function, typically not called directly, which maps rows
+// already read from the databse into RowMap entries.
+func ScanRowsToMaps(rows *sql.Rows, on_row func(RowMap) error) error {
+	columns, _ := rows.Columns()
+	err := ScanRowsToArrays(rows, func(arr []CellData) error {
+		m := rowToMap(arr, columns)
+		err := on_row(m)
+		if err != nil {
+			return err
+		}
+		return nil
+	})
+	return err
+}
+
+// QueryRowsMap is a convenience function allowing querying a result set while poviding a callback
+// function activated per read row.
+func QueryRowsMap(db *sql.DB, query string, on_row func(RowMap) error, args ...interface{}) (err error) {
+	defer func() {
+		if derr := recover(); derr != nil {
+			err = fmt.Errorf("QueryRowsMap unexpected error: %+v", derr)
+		}
+	}()
+
+	var rows *sql.Rows
+	rows, err = db.Query(query, args...)
+	if rows != nil {
+		defer rows.Close()
+	}
+	if err != nil && err != sql.ErrNoRows {
+		return log.Errore(err)
+	}
+	err = ScanRowsToMaps(rows, on_row)
+	return
+}
+
+// queryResultData returns a raw array of rows for a given query, optionally reading and returning column names
+func queryResultData(db *sql.DB, query string, retrieveColumns bool, args ...interface{}) (resultData ResultData, columns []string, err error) {
+	defer func() {
+		if derr := recover(); derr != nil {
+			err = errors.New(fmt.Sprintf("QueryRowsMap unexpected error: %+v", derr))
+		}
+	}()
+
+	var rows *sql.Rows
+	rows, err = db.Query(query, args...)
+	defer rows.Close()
+	if err != nil && err != sql.ErrNoRows {
+		return EmptyResultData, columns, log.Errore(err)
+	}
+	if retrieveColumns {
+		// Don't pay if you don't want to
+		columns, _ = rows.Columns()
+	}
+	resultData = ResultData{}
+	err = ScanRowsToArrays(rows, func(rowData []CellData) error {
+		resultData = append(resultData, rowData)
+		return nil
+	})
+	return resultData, columns, err
+}
+
+// QueryResultData returns a raw array of rows
+func QueryResultData(db *sql.DB, query string, args ...interface{}) (ResultData, error) {
+	resultData, _, err := queryResultData(db, query, false, args...)
+	return resultData, err
+}
+
+// QueryResultDataNamed returns a raw array of rows, with column names
+func QueryNamedResultData(db *sql.DB, query string, args ...interface{}) (NamedResultData, error) {
+	resultData, columns, err := queryResultData(db, query, true, args...)
+	return NamedResultData{Columns: columns, Data: resultData}, err
+}
+
+// QueryRowsMapBuffered reads data from the database into a buffer, and only then applies the given function per row.
+// This allows the application to take its time with processing the data, albeit consuming as much memory as required by
+// the result set.
+func QueryRowsMapBuffered(db *sql.DB, query string, on_row func(RowMap) error, args ...interface{}) error {
+	resultData, columns, err := queryResultData(db, query, true, args...)
+	if err != nil {
+		// Already logged
+		return err
+	}
+	for _, row := range resultData {
+		err = on_row(rowToMap(row, columns))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// ExecNoPrepare executes given query using given args on given DB, without using prepared statements.
+func ExecNoPrepare(db *sql.DB, query string, args ...interface{}) (res sql.Result, err error) {
+	defer func() {
+		if derr := recover(); derr != nil {
+			err = errors.New(fmt.Sprintf("ExecNoPrepare unexpected error: %+v", derr))
+		}
+	}()
+
+	res, err = db.Exec(query, args...)
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// ExecQuery executes given query using given args on given DB. It will safele prepare, execute and close
+// the statement.
+func execInternal(silent bool, db *sql.DB, query string, args ...interface{}) (res sql.Result, err error) {
+	defer func() {
+		if derr := recover(); derr != nil {
+			err = errors.New(fmt.Sprintf("execInternal unexpected error: %+v", derr))
+		}
+	}()
+	var stmt *sql.Stmt
+	stmt, err = db.Prepare(query)
+	if err != nil {
+		return nil, err
+	}
+	defer stmt.Close()
+	res, err = stmt.Exec(args...)
+	if err != nil && !silent {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// Exec executes given query using given args on given DB. It will safele prepare, execute and close
+// the statement.
+func Exec(db *sql.DB, query string, args ...interface{}) (sql.Result, error) {
+	return execInternal(false, db, query, args...)
+}
+
+// ExecSilently acts like Exec but does not report any error
+func ExecSilently(db *sql.DB, query string, args ...interface{}) (sql.Result, error) {
+	return execInternal(true, db, query, args...)
+}
+
+func InClauseStringValues(terms []string) string {
+	quoted := []string{}
+	for _, s := range terms {
+		quoted = append(quoted, fmt.Sprintf("'%s'", strings.Replace(s, ",", "''", -1)))
+	}
+	return strings.Join(quoted, ", ")
+}
+
+// Convert variable length arguments into arguments array
+func Args(args ...interface{}) []interface{} {
+	return args
+}
+
+func NilIfZero(i int64) interface{} {
+	if i == 0 {
+		return nil
+	}
+	return i
+}
+
+func ScanTable(db *sql.DB, tableName string) (NamedResultData, error) {
+	query := fmt.Sprintf("select * from %s", tableName)
+	return QueryNamedResultData(db, query)
+}
+
+func WriteTable(db *sql.DB, tableName string, data NamedResultData) (err error) {
+	if len(data.Data) == 0 {
+		return nil
+	}
+	if len(data.Columns) == 0 {
+		return nil
+	}
+	placeholders := make([]string, len(data.Columns))
+	for i := range placeholders {
+		placeholders[i] = "?"
+	}
+	query := fmt.Sprintf(
+		`replace into %s (%s) values (%s)`,
+		tableName,
+		strings.Join(data.Columns, ","),
+		strings.Join(placeholders, ","),
+	)
+	for _, rowData := range data.Data {
+		if _, execErr := db.Exec(query, rowData.Args()...); execErr != nil {
+			err = execErr
+		}
+	}
+	return err
+}
--- a/go/vt/orchestrator/external/golib/tests/spec.go
+++ b/go/vt/orchestrator/external/golib/tests/spec.go
@ -0,0 +1,76 @@
+package tests
+
+import (
+	"testing"
+)
+
+// Spec is an access point to test Expections
+type Spec struct {
+	t *testing.T
+}
+
+// S generates a spec. You will want to use it once in a test file, once in a test or once per each check
+func S(t *testing.T) *Spec {
+	return &Spec{t: t}
+}
+
+// ExpectNil expects given value to be nil, or errors
+func (spec *Spec) ExpectNil(actual interface{}) {
+	if actual == nil {
+		return
+	}
+	spec.t.Errorf("Expected %+v to be nil", actual)
+}
+
+// ExpectNotNil expects given value to be not nil, or errors
+func (spec *Spec) ExpectNotNil(actual interface{}) {
+	if actual != nil {
+		return
+	}
+	spec.t.Errorf("Expected %+v to be not nil", actual)
+}
+
+// ExpectEquals expects given values to be equal (comparison via `==`), or errors
+func (spec *Spec) ExpectEquals(actual, value interface{}) {
+	if actual == value {
+		return
+	}
+	spec.t.Errorf("Expected:\n[[[%+v]]]\n- got:\n[[[%+v]]]", value, actual)
+}
+
+// ExpectNotEquals expects given values to be nonequal (comparison via `==`), or errors
+func (spec *Spec) ExpectNotEquals(actual, value interface{}) {
+	if !(actual == value) {
+		return
+	}
+	spec.t.Errorf("Expected not %+v", value)
+}
+
+// ExpectEqualsAny expects given actual to equal (comparison via `==`) at least one of given values, or errors
+func (spec *Spec) ExpectEqualsAny(actual interface{}, values ...interface{}) {
+	for _, value := range values {
+		if actual == value {
+			return
+		}
+	}
+	spec.t.Errorf("Expected %+v to equal any of given values", actual)
+}
+
+// ExpectNotEqualsAny expects given actual to be nonequal (comparison via `==`)tp any of given values, or errors
+func (spec *Spec) ExpectNotEqualsAny(actual interface{}, values ...interface{}) {
+	for _, value := range values {
+		if actual == value {
+			spec.t.Errorf("Expected not %+v", value)
+		}
+	}
+}
+
+// ExpectFalse expects given values to be false, or errors
+func (spec *Spec) ExpectFalse(actual interface{}) {
+	spec.ExpectEquals(actual, false)
+}
+
+// ExpectTrue expects given values to be true, or errors
+func (spec *Spec) ExpectTrue(actual interface{}) {
+	spec.ExpectEquals(actual, true)
+}
--- a/go/vt/orchestrator/external/golib/util/text.go
+++ b/go/vt/orchestrator/external/golib/util/text.go
@ -0,0 +1,103 @@
+/*
+   Copyright 2015 Shlomi Noach.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package util
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+const (
+	TabulateLeft  = 0
+	TabulateRight = 1
+)
+
+// ParseSimpleTime parses input in the format 7s, 55m, 3h, 31d, 4w (second, minute, hour, day, week)
+// The time.ParseDuration() function should have done this, but it does not support "d" and "w" extensions.
+func SimpleTimeToSeconds(simpleTime string) (int, error) {
+	if matched, _ := regexp.MatchString("^[0-9]+s$", simpleTime); matched {
+		i, _ := strconv.Atoi(simpleTime[0 : len(simpleTime)-1])
+		return i, nil
+	}
+	if matched, _ := regexp.MatchString("^[0-9]+m$", simpleTime); matched {
+		i, _ := strconv.Atoi(simpleTime[0 : len(simpleTime)-1])
+		return i * 60, nil
+	}
+	if matched, _ := regexp.MatchString("^[0-9]+h$", simpleTime); matched {
+		i, _ := strconv.Atoi(simpleTime[0 : len(simpleTime)-1])
+		return i * 60 * 60, nil
+	}
+	if matched, _ := regexp.MatchString("^[0-9]+d$", simpleTime); matched {
+		i, _ := strconv.Atoi(simpleTime[0 : len(simpleTime)-1])
+		return i * 60 * 60 * 24, nil
+	}
+	if matched, _ := regexp.MatchString("^[0-9]+w$", simpleTime); matched {
+		i, _ := strconv.Atoi(simpleTime[0 : len(simpleTime)-1])
+		return i * 60 * 60 * 24 * 7, nil
+	}
+	return 0, errors.New(fmt.Sprintf("Cannot parse simple time: %s", simpleTime))
+}
+
+func Tabulate(lines []string, separator string, outputSeparator string, directionFlags ...int) (result []string) {
+	tokens := make([][]string, 0)
+	widths := make([][]int, 0)
+	countColumns := 0
+	for _, line := range lines {
+		lineTokens := strings.Split(line, separator)
+		lineWidths := make([]int, len(lineTokens))
+		for i := range lineTokens {
+			lineWidths[i] = len(lineTokens[i])
+		}
+		tokens = append(tokens, lineTokens)
+		widths = append(widths, lineWidths)
+		if len(lineTokens) > countColumns {
+			countColumns = len(lineTokens)
+		}
+	}
+	columnWidths := make([]int, countColumns)
+	for _, lineTokens := range tokens {
+		for col, token := range lineTokens {
+			if len(token) > columnWidths[col] {
+				columnWidths[col] = len(token)
+			}
+		}
+	}
+	for _, lineTokens := range tokens {
+		resultRow := ""
+		for col := 0; col < countColumns; col++ {
+			token := ""
+			if col < len(lineTokens) {
+				token = lineTokens[col]
+			}
+			format := fmt.Sprintf("%%-%ds", columnWidths[col]) // format left
+			if col < len(directionFlags) && directionFlags[col] == TabulateRight {
+				format = fmt.Sprintf("%%%ds", columnWidths[col])
+			}
+			formattedToken := fmt.Sprintf(format, token)
+			if col == 0 {
+				resultRow = formattedToken
+			} else {
+				resultRow = fmt.Sprintf("%s%s%s", resultRow, outputSeparator, formattedToken)
+			}
+		}
+		result = append(result, resultRow)
+	}
+	return result
+}
--- a/go/vt/orchestrator/external/golib/util/text_test.go
+++ b/go/vt/orchestrator/external/golib/util/text_test.go
@ -0,0 +1,88 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package util
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+func init() {
+}
+
+func TestTabulate(t *testing.T) {
+	{
+		text := strings.TrimSpace(`
+a,b,c
+d,e,f
+g,h,i
+	`)
+
+		tabulated := Tabulate(strings.Split(text, "\n"), ",", ",")
+		expected := strings.Split(text, "\n")
+		test.S(t).ExpectTrue(reflect.DeepEqual(tabulated, expected))
+	}
+	{
+		text := strings.TrimSpace(`
+a,b,c
+d,e,f
+g,h,i
+	`)
+
+		tabulated := Tabulate(strings.Split(text, "\n"), ",", "|")
+		expected := []string{
+			"a|b|c",
+			"d|e|f",
+			"g|h|i",
+		}
+		test.S(t).ExpectTrue(reflect.DeepEqual(tabulated, expected))
+	}
+	{
+		text := strings.TrimSpace(`
+a,20,c
+d,e,100
+0000,h,i
+	`)
+
+		tabulated := Tabulate(strings.Split(text, "\n"), ",", "|")
+		expected := []string{
+			"a   |20|c  ",
+			"d   |e |100",
+			"0000|h |i  ",
+		}
+		test.S(t).ExpectTrue(reflect.DeepEqual(tabulated, expected))
+	}
+	{
+		text := strings.TrimSpace(`
+a,20,c
+d,1,100
+0000,3,i
+	`)
+
+		tabulated := Tabulate(strings.Split(text, "\n"), ",", "|", TabulateLeft, TabulateRight, TabulateRight)
+		expected := []string{
+			"a   |20|  c",
+			"d   | 1|100",
+			"0000| 3|  i",
+		}
+
+		test.S(t).ExpectTrue(reflect.DeepEqual(tabulated, expected))
+	}
+}
--- a/go/vt/orchestrator/external/raft/.gitignore
+++ b/go/vt/orchestrator/external/raft/.gitignore
@ -0,0 +1,23 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
--- a/go/vt/orchestrator/external/raft/.travis.yml
+++ b/go/vt/orchestrator/external/raft/.travis.yml
@ -0,0 +1,16 @@
+language: go
+
+go:
+    - 1.4
+    - 1.5
+    - 1.6
+    - tip
+
+install: make deps
+script:
+    - make integ
+
+notifications:
+    flowdock:
+        secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=
+
--- a/go/vt/orchestrator/external/raft/LICENSE
+++ b/go/vt/orchestrator/external/raft/LICENSE
@ -0,0 +1,354 @@
+Mozilla Public License, version 2.0
+
+1. Definitions
+
+1.1. “Contributor”
+
+     means each individual or legal entity that creates, contributes to the
+     creation of, or owns Covered Software.
+
+1.2. “Contributor Version”
+
+     means the combination of the Contributions of others (if any) used by a
+     Contributor and that particular Contributor’s Contribution.
+
+1.3. “Contribution”
+
+     means Covered Software of a particular Contributor.
+
+1.4. “Covered Software”
+
+     means Source Code Form to which the initial Contributor has attached the
+     notice in Exhibit A, the Executable Form of such Source Code Form, and
+     Modifications of such Source Code Form, in each case including portions
+     thereof.
+
+1.5. “Incompatible With Secondary Licenses”
+     means
+
+     a. that the initial Contributor has attached the notice described in
+        Exhibit B to the Covered Software; or
+
+     b. that the Covered Software was made available under the terms of version
+        1.1 or earlier of the License, but not also under the terms of a
+        Secondary License.
+
+1.6. “Executable Form”
+
+     means any form of the work other than Source Code Form.
+
+1.7. “Larger Work”
+
+     means a work that combines Covered Software with other material, in a separate
+     file or files, that is not Covered Software.
+
+1.8. “License”
+
+     means this document.
+
+1.9. “Licensable”
+
+     means having the right to grant, to the maximum extent possible, whether at the
+     time of the initial grant or subsequently, any and all of the rights conveyed by
+     this License.
+
+1.10. “Modifications”
+
+     means any of the following:
+
+     a. any file in Source Code Form that results from an addition to, deletion
+        from, or modification of the contents of Covered Software; or
+
+     b. any new file in Source Code Form that contains any Covered Software.
+
+1.11. “Patent Claims” of a Contributor
+
+      means any patent claim(s), including without limitation, method, process,
+      and apparatus claims, in any patent Licensable by such Contributor that
+      would be infringed, but for the grant of the License, by the making,
+      using, selling, offering for sale, having made, import, or transfer of
+      either its Contributions or its Contributor Version.
+
+1.12. “Secondary License”
+
+      means either the GNU General Public License, Version 2.0, the GNU Lesser
+      General Public License, Version 2.1, the GNU Affero General Public
+      License, Version 3.0, or any later versions of those licenses.
+
+1.13. “Source Code Form”
+
+      means the form of the work preferred for making modifications.
+
+1.14. “You” (or “Your”)
+
+      means an individual or a legal entity exercising rights under this
+      License. For legal entities, “You” includes any entity that controls, is
+      controlled by, or is under common control with You. For purposes of this
+      definition, “control” means (a) the power, direct or indirect, to cause
+      the direction or management of such entity, whether by contract or
+      otherwise, or (b) ownership of more than fifty percent (50%) of the
+      outstanding shares or beneficial ownership of such entity.
+
+
+2. License Grants and Conditions
+
+2.1. Grants
+
+     Each Contributor hereby grants You a world-wide, royalty-free,
+     non-exclusive license:
+
+     a. under intellectual property rights (other than patent or trademark)
+        Licensable by such Contributor to use, reproduce, make available,
+        modify, display, perform, distribute, and otherwise exploit its
+        Contributions, either on an unmodified basis, with Modifications, or as
+        part of a Larger Work; and
+
+     b. under Patent Claims of such Contributor to make, use, sell, offer for
+        sale, have made, import, and otherwise transfer either its Contributions
+        or its Contributor Version.
+
+2.2. Effective Date
+
+     The licenses granted in Section 2.1 with respect to any Contribution become
+     effective for each Contribution on the date the Contributor first distributes
+     such Contribution.
+
+2.3. Limitations on Grant Scope
+
+     The licenses granted in this Section 2 are the only rights granted under this
+     License. No additional rights or licenses will be implied from the distribution
+     or licensing of Covered Software under this License. Notwithstanding Section
+     2.1(b) above, no patent license is granted by a Contributor:
+
+     a. for any code that a Contributor has removed from Covered Software; or
+
+     b. for infringements caused by: (i) Your and any other third party’s
+        modifications of Covered Software, or (ii) the combination of its
+        Contributions with other software (except as part of its Contributor
+        Version); or
+
+     c. under Patent Claims infringed by Covered Software in the absence of its
+        Contributions.
+
+     This License does not grant any rights in the trademarks, service marks, or
+     logos of any Contributor (except as may be necessary to comply with the
+     notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+     No Contributor makes additional grants as a result of Your choice to
+     distribute the Covered Software under a subsequent version of this License
+     (see Section 10.2) or under the terms of a Secondary License (if permitted
+     under the terms of Section 3.3).
+
+2.5. Representation
+
+     Each Contributor represents that the Contributor believes its Contributions
+     are its original creation(s) or it has sufficient rights to grant the
+     rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+     This License is not intended to limit any rights You have under applicable
+     copyright doctrines of fair use, fair dealing, or other equivalents.
+
+2.7. Conditions
+
+     Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
+     Section 2.1.
+
+
+3. Responsibilities
+
+3.1. Distribution of Source Form
+
+     All distribution of Covered Software in Source Code Form, including any
+     Modifications that You create or to which You contribute, must be under the
+     terms of this License. You must inform recipients that the Source Code Form
+     of the Covered Software is governed by the terms of this License, and how
+     they can obtain a copy of this License. You may not attempt to alter or
+     restrict the recipients’ rights in the Source Code Form.
+
+3.2. Distribution of Executable Form
+
+     If You distribute Covered Software in Executable Form then:
+
+     a. such Covered Software must also be made available in Source Code Form,
+        as described in Section 3.1, and You must inform recipients of the
+        Executable Form how they can obtain a copy of such Source Code Form by
+        reasonable means in a timely manner, at a charge no more than the cost
+        of distribution to the recipient; and
+
+     b. You may distribute such Executable Form under the terms of this License,
+        or sublicense it under different terms, provided that the license for
+        the Executable Form does not attempt to limit or alter the recipients’
+        rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+     You may create and distribute a Larger Work under terms of Your choice,
+     provided that You also comply with the requirements of this License for the
+     Covered Software. If the Larger Work is a combination of Covered Software
+     with a work governed by one or more Secondary Licenses, and the Covered
+     Software is not Incompatible With Secondary Licenses, this License permits
+     You to additionally distribute such Covered Software under the terms of
+     such Secondary License(s), so that the recipient of the Larger Work may, at
+     their option, further distribute the Covered Software under the terms of
+     either this License or such Secondary License(s).
+
+3.4. Notices
+
+     You may not remove or alter the substance of any license notices (including
+     copyright notices, patent notices, disclaimers of warranty, or limitations
+     of liability) contained within the Source Code Form of the Covered
+     Software, except that You may alter any license notices to the extent
+     required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+     You may choose to offer, and to charge a fee for, warranty, support,
+     indemnity or liability obligations to one or more recipients of Covered
+     Software. However, You may do so only on Your own behalf, and not on behalf
+     of any Contributor. You must make it absolutely clear that any such
+     warranty, support, indemnity, or liability obligation is offered by You
+     alone, and You hereby agree to indemnify every Contributor for any
+     liability incurred by such Contributor as a result of warranty, support,
+     indemnity or liability terms You offer. You may include additional
+     disclaimers of warranty and limitations of liability specific to any
+     jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+
+   If it is impossible for You to comply with any of the terms of this License
+   with respect to some or all of the Covered Software due to statute, judicial
+   order, or regulation then You must: (a) comply with the terms of this License
+   to the maximum extent possible; and (b) describe the limitations and the code
+   they affect. Such description must be placed in a text file included with all
+   distributions of the Covered Software under this License. Except to the
+   extent prohibited by statute or regulation, such description must be
+   sufficiently detailed for a recipient of ordinary skill to be able to
+   understand it.
+
+5. Termination
+
+5.1. The rights granted under this License will terminate automatically if You
+     fail to comply with any of its terms. However, if You become compliant,
+     then the rights granted under this License from a particular Contributor
+     are reinstated (a) provisionally, unless and until such Contributor
+     explicitly and finally terminates Your grants, and (b) on an ongoing basis,
+     if such Contributor fails to notify You of the non-compliance by some
+     reasonable means prior to 60 days after You have come back into compliance.
+     Moreover, Your grants from a particular Contributor are reinstated on an
+     ongoing basis if such Contributor notifies You of the non-compliance by
+     some reasonable means, this is the first time You have received notice of
+     non-compliance with this License from such Contributor, and You become
+     compliant prior to 30 days after Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+     infringement claim (excluding declaratory judgment actions, counter-claims,
+     and cross-claims) alleging that a Contributor Version directly or
+     indirectly infringes any patent, then the rights granted to You by any and
+     all Contributors for the Covered Software under Section 2.1 of this License
+     shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
+     license agreements (excluding distributors and resellers) which have been
+     validly granted by You or Your distributors under this License prior to
+     termination shall survive termination.
+
+6. Disclaimer of Warranty
+
+   Covered Software is provided under this License on an “as is” basis, without
+   warranty of any kind, either expressed, implied, or statutory, including,
+   without limitation, warranties that the Covered Software is free of defects,
+   merchantable, fit for a particular purpose or non-infringing. The entire
+   risk as to the quality and performance of the Covered Software is with You.
+   Should any Covered Software prove defective in any respect, You (not any
+   Contributor) assume the cost of any necessary servicing, repair, or
+   correction. This disclaimer of warranty constitutes an essential part of this
+   License. No use of  any Covered Software is authorized under this License
+   except under this disclaimer.
+
+7. Limitation of Liability
+
+   Under no circumstances and under no legal theory, whether tort (including
+   negligence), contract, or otherwise, shall any Contributor, or anyone who
+   distributes Covered Software as permitted above, be liable to You for any
+   direct, indirect, special, incidental, or consequential damages of any
+   character including, without limitation, damages for lost profits, loss of
+   goodwill, work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses, even if such party shall have been
+   informed of the possibility of such damages. This limitation of liability
+   shall not apply to liability for death or personal injury resulting from such
+   party’s negligence to the extent applicable law prohibits such limitation.
+   Some jurisdictions do not allow the exclusion or limitation of incidental or
+   consequential damages, so this exclusion and limitation may not apply to You.
+
+8. Litigation
+
+   Any litigation relating to this License may be brought only in the courts of
+   a jurisdiction where the defendant maintains its principal place of business
+   and such litigation shall be governed by laws of that jurisdiction, without
+   reference to its conflict-of-law provisions. Nothing in this Section shall
+   prevent a party’s ability to bring cross-claims or counter-claims.
+
+9. Miscellaneous
+
+   This License represents the complete agreement concerning the subject matter
+   hereof. If any provision of this License is held to be unenforceable, such
+   provision shall be reformed only to the extent necessary to make it
+   enforceable. Any law or regulation which provides that the language of a
+   contract shall be construed against the drafter shall not be used to construe
+   this License against a Contributor.
+
+
+10. Versions of the License
+
+10.1. New Versions
+
+      Mozilla Foundation is the license steward. Except as provided in Section
+      10.3, no one other than the license steward has the right to modify or
+      publish new versions of this License. Each version will be given a
+      distinguishing version number.
+
+10.2. Effect of New Versions
+
+      You may distribute the Covered Software under the terms of the version of
+      the License under which You originally received the Covered Software, or
+      under the terms of any subsequent version published by the license
+      steward.
+
+10.3. Modified Versions
+
+      If you create software not governed by this License, and you want to
+      create a new license for such software, you may create and use a modified
+      version of this License if you rename the license and remove any
+      references to the name of the license steward (except to note that such
+      modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
+      If You choose to distribute Source Code Form that is Incompatible With
+      Secondary Licenses under the terms of this version of the License, the
+      notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+
+      This Source Code Form is subject to the
+      terms of the Mozilla Public License, v.
+      2.0. If a copy of the MPL was not
+      distributed with this file, You can
+      obtain one at
+      http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular file, then
+You may include the notice in a location (such as a LICENSE file in a relevant
+directory) where a recipient would be likely to look for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - “Incompatible With Secondary Licenses” Notice
+
+      This Source Code Form is “Incompatible
+      With Secondary Licenses”, as defined by
+      the Mozilla Public License, v. 2.0.
+
--- a/go/vt/orchestrator/external/raft/Makefile
+++ b/go/vt/orchestrator/external/raft/Makefile
@ -0,0 +1,17 @@
+DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
+
+test:
+	go test -timeout=30s ./...
+
+integ: test
+	INTEG_TESTS=yes go test -timeout=3s -run=Integ ./...
+
+deps:
+	go get -d -v ./...
+	echo $(DEPS) | xargs -n1 go get -d
+
+cov:
+	INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
+	open /tmp/coverage.html
+
+.PHONY: test cov integ deps
--- a/go/vt/orchestrator/external/raft/README.md
+++ b/go/vt/orchestrator/external/raft/README.md
@ -0,0 +1,89 @@
+raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
+====
+
+raft is a [Go](http://www.golang.org) library that manages a replicated
+log and can be used with an FSM to manage replicated state machines. It
+is a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
+
+The use cases for such a library are far-reaching as replicated state
+machines are a key component of many distributed systems. They enable
+building Consistent, Partition Tolerant (CP) systems, with limited
+fault tolerance as well.
+
+## Building
+
+If you wish to build raft you'll need Go version 1.2+ installed.
+
+Please check your installation with:
+
+```
+go version
+```
+
+## Documentation
+
+For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
+
+To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,
+called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
+for the `LogStore` and `StableStore`.
+
+A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
+[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
+and `StableStore`.
+
+## Protocol
+
+raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
+
+A high level overview of the Raft protocol is described below, but for details please read the full
+[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
+followed by the raft source. Any questions about the raft protocol should be sent to the
+[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
+
+### Protocol Description
+
+Raft nodes are always in one of three states: follower, candidate or leader. All
+nodes initially start out as a follower. In this state, nodes can accept log entries
+from a leader and cast votes. If no entries are received for some time, nodes
+self-promote to the candidate state. In the candidate state nodes request votes from
+their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
+The leader must accept new log entries and replicate to all the other followers.
+In addition, if stale reads are not acceptable, all queries must also be performed on
+the leader.
+
+Once a cluster has a leader, it is able to accept new log entries. A client can
+request that a leader append a new log entry, which is an opaque binary blob to
+Raft. The leader then writes the entry to durable storage and attempts to replicate
+to a quorum of followers. Once the log entry is considered *committed*, it can be
+*applied* to a finite state machine. The finite state machine is application specific,
+and is implemented using an interface.
+
+An obvious question relates to the unbounded nature of a replicated log. Raft provides
+a mechanism by which the current state is snapshotted, and the log is compacted. Because
+of the FSM abstraction, restoring the state of the FSM must result in the same state
+as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
+and then remove all the logs that were used to reach that state. This is performed automatically
+without user intervention, and prevents unbounded disk usage as well as minimizing
+time spent replaying logs.
+
+Lastly, there is the issue of updating the peer set when new servers are joining
+or existing servers are leaving. As long as a quorum of nodes is available, this
+is not an issue as Raft provides mechanisms to dynamically update the peer set.
+If a quorum of nodes is unavailable, then this becomes a very challenging issue.
+For example, suppose there are only 2 peers, A and B. The quorum size is also
+2, meaning both nodes must agree to commit a log entry. If either A or B fails,
+it is now impossible to reach quorum. This means the cluster is unable to add,
+or remove a node, or commit any additional log entries. This results in *unavailability*.
+At this point, manual intervention would be required to remove either A or B,
+and to restart the remaining node in bootstrap mode.
+
+A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
+of 5 can tolerate 2 node failures. The recommended configuration is to either
+run 3 or 5 raft servers. This maximizes availability without
+greatly sacrificing performance.
+
+In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
+committing a log entry requires a single round trip to half of the cluster.
+Thus performance is bound by disk I/O and network latency.
+
--- a/go/vt/orchestrator/external/raft/bench/bench.go
+++ b/go/vt/orchestrator/external/raft/bench/bench.go
@ -0,0 +1,172 @@
+package raftbench
+
+// raftbench provides common benchmarking functions which can be used by
+// anything which implements the raft.LogStore and raft.StableStore interfaces.
+// All functions accept these interfaces and perform benchmarking. This
+// makes comparing backend performance easier by sharing the tests.
+
+import (
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/raft"
+)
+
+func FirstIndex(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run FirstIndex a number of times
+	for n := 0; n < b.N; n++ {
+		store.FirstIndex()
+	}
+}
+
+func LastIndex(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run LastIndex a number of times
+	for n := 0; n < b.N; n++ {
+		store.LastIndex()
+	}
+}
+
+func GetLog(b *testing.B, store raft.LogStore) {
+	// Create some fake data
+	var logs []*raft.Log
+	for i := 1; i < 10; i++ {
+		logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Run GetLog a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.GetLog(5, new(raft.Log)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func StoreLog(b *testing.B, store raft.LogStore) {
+	// Run StoreLog a number of times
+	for n := 0; n < b.N; n++ {
+		log := &raft.Log{Index: uint64(n), Data: []byte("data")}
+		if err := store.StoreLog(log); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func StoreLogs(b *testing.B, store raft.LogStore) {
+	// Run StoreLogs a number of times. We want to set multiple logs each
+	// run, so we create 3 logs with incrementing indexes for each iteration.
+	for n := 0; n < b.N; n++ {
+		b.StopTimer()
+		offset := 3 * (n + 1)
+		logs := []*raft.Log{
+			{Index: uint64(offset - 2), Data: []byte("data")},
+			{Index: uint64(offset - 1), Data: []byte("data")},
+			{Index: uint64(offset), Data: []byte("data")},
+		}
+		b.StartTimer()
+
+		if err := store.StoreLogs(logs); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func DeleteRange(b *testing.B, store raft.LogStore) {
+	// Create some fake data. In this case, we create 3 new log entries for each
+	// test case, and separate them by index in multiples of 10. This allows
+	// some room so that we can test deleting ranges with "extra" logs to
+	// to ensure we stop going to the database once our max index is hit.
+	var logs []*raft.Log
+	for n := 0; n < b.N; n++ {
+		offset := 10 * n
+		for i := offset; i < offset+3; i++ {
+			logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
+		}
+	}
+	if err := store.StoreLogs(logs); err != nil {
+		b.Fatalf("err: %s", err)
+	}
+	b.ResetTimer()
+
+	// Delete a range of the data
+	for n := 0; n < b.N; n++ {
+		offset := 10 * n
+		if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func Set(b *testing.B, store raft.StableStore) {
+	// Run Set a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func Get(b *testing.B, store raft.StableStore) {
+	// Create some fake data
+	for i := 1; i < 10; i++ {
+		if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+	b.ResetTimer()
+
+	// Run Get a number of times
+	for n := 0; n < b.N; n++ {
+		if _, err := store.Get([]byte{0x05}); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func SetUint64(b *testing.B, store raft.StableStore) {
+	// Run SetUint64 a number of times
+	for n := 0; n < b.N; n++ {
+		if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
+
+func GetUint64(b *testing.B, store raft.StableStore) {
+	// Create some fake data
+	for i := 0; i < 10; i++ {
+		if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+	b.ResetTimer()
+
+	// Run GetUint64 a number of times
+	for n := 0; n < b.N; n++ {
+		if _, err := store.Get([]byte{0x05}); err != nil {
+			b.Fatalf("err: %s", err)
+		}
+	}
+}
--- a/go/vt/orchestrator/external/raft/commands.go
+++ b/go/vt/orchestrator/external/raft/commands.go
@ -0,0 +1,84 @@
+package raft
+
+// AppendEntriesRequest is the command used to append entries to the
+// replicated log.
+type AppendEntriesRequest struct {
+	// Provide the current term and leader
+	Term   uint64
+	Leader []byte
+
+	// Provide the previous entries for integrity checking
+	PrevLogEntry uint64
+	PrevLogTerm  uint64
+
+	// New entries to commit
+	Entries []*Log
+
+	// Commit index on the leader
+	LeaderCommitIndex uint64
+}
+
+// AppendEntriesResponse is the response returned from an
+// AppendEntriesRequest.
+type AppendEntriesResponse struct {
+	// Newer term if leader is out of date
+	Term uint64
+
+	// Last Log is a hint to help accelerate rebuilding slow nodes
+	LastLog uint64
+
+	// We may not succeed if we have a conflicting entry
+	Success bool
+
+	// There are scenarios where this request didn't succeed
+	// but there's no need to wait/back-off the next attempt.
+	NoRetryBackoff bool
+}
+
+// RequestVoteRequest is the command used by a candidate to ask a Raft peer
+// for a vote in an election.
+type RequestVoteRequest struct {
+	// Provide the term and our id
+	Term      uint64
+	Candidate []byte
+
+	// Used to ensure safety
+	LastLogIndex uint64
+	LastLogTerm  uint64
+}
+
+// RequestVoteResponse is the response returned from a RequestVoteRequest.
+type RequestVoteResponse struct {
+	// Newer term if leader is out of date
+	Term uint64
+
+	// Return the peers, so that a node can shutdown on removal
+	Peers []byte
+
+	// Is the vote granted
+	Granted bool
+}
+
+// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
+// log (and state machine) from a snapshot on another peer.
+type InstallSnapshotRequest struct {
+	Term   uint64
+	Leader []byte
+
+	// These are the last index/term included in the snapshot
+	LastLogIndex uint64
+	LastLogTerm  uint64
+
+	// Peer Set in the snapshot
+	Peers []byte
+
+	// Size of the snapshot
+	Size int64
+}
+
+// InstallSnapshotResponse is the response returned from an
+// InstallSnapshotRequest.
+type InstallSnapshotResponse struct {
+	Term    uint64
+	Success bool
+}
--- a/go/vt/orchestrator/external/raft/config.go
+++ b/go/vt/orchestrator/external/raft/config.go
@ -0,0 +1,136 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+	"log"
+	"time"
+)
+
+// Config provides any necessary configuration to
+// the Raft server
+type Config struct {
+	// HeartbeatTimeout specifies the time in follower state without
+	// a leader before we attempt an election.
+	HeartbeatTimeout time.Duration
+
+	// ElectionTimeout specifies the time in candidate state without
+	// a leader before we attempt an election.
+	ElectionTimeout time.Duration
+
+	// CommitTimeout controls the time without an Apply() operation
+	// before we heartbeat to ensure a timely commit. Due to random
+	// staggering, may be delayed as much as 2x this value.
+	CommitTimeout time.Duration
+
+	// MaxAppendEntries controls the maximum number of append entries
+	// to send at once. We want to strike a balance between efficiency
+	// and avoiding waste if the follower is going to reject because of
+	// an inconsistent log.
+	MaxAppendEntries int
+
+	// If we are a member of a cluster, and RemovePeer is invoked for the
+	// local node, then we forget all peers and transition into the follower state.
+	// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
+	// we can become a leader of a cluster containing only this node.
+	ShutdownOnRemove bool
+
+	// DisableBootstrapAfterElect is used to turn off EnableSingleNode
+	// after the node is elected. This is used to prevent self-election
+	// if the node is removed from the Raft cluster via RemovePeer. Setting
+	// it to false will keep the bootstrap mode, allowing the node to self-elect
+	// and potentially bootstrap a separate cluster.
+	DisableBootstrapAfterElect bool
+
+	// TrailingLogs controls how many logs we leave after a snapshot. This is
+	// used so that we can quickly replay logs on a follower instead of being
+	// forced to send an entire snapshot.
+	TrailingLogs uint64
+
+	// SnapshotInterval controls how often we check if we should perform a snapshot.
+	// We randomly stagger between this value and 2x this value to avoid the entire
+	// cluster from performing a snapshot at once.
+	SnapshotInterval time.Duration
+
+	// SnapshotThreshold controls how many outstanding logs there must be before
+	// we perform a snapshot. This is to prevent excessive snapshots when we can
+	// just replay a small set of logs.
+	SnapshotThreshold uint64
+
+	// EnableSingleNode allows for a single node mode of operation. This
+	// is false by default, which prevents a lone node from electing itself.
+	// leader.
+	EnableSingleNode bool
+
+	// LeaderLeaseTimeout is used to control how long the "lease" lasts
+	// for being the leader without being able to contact a quorum
+	// of nodes. If we reach this interval without contact, we will
+	// step down as leader.
+	LeaderLeaseTimeout time.Duration
+
+	// StartAsLeader forces Raft to start in the leader state. This should
+	// never be used except for testing purposes, as it can cause a split-brain.
+	StartAsLeader bool
+
+	// NotifyCh is used to provide a channel that will be notified of leadership
+	// changes. Raft will block writing to this channel, so it should either be
+	// buffered or aggressively consumed.
+	NotifyCh chan<- bool
+
+	// LogOutput is used as a sink for logs, unless Logger is specified.
+	// Defaults to os.Stderr.
+	LogOutput io.Writer
+
+	// Logger is a user-provided logger. If nil, a logger writing to LogOutput
+	// is used.
+	Logger *log.Logger
+}
+
+// DefaultConfig returns a Config with usable defaults.
+func DefaultConfig() *Config {
+	return &Config{
+		HeartbeatTimeout:           1000 * time.Millisecond,
+		ElectionTimeout:            1000 * time.Millisecond,
+		CommitTimeout:              50 * time.Millisecond,
+		MaxAppendEntries:           64,
+		ShutdownOnRemove:           true,
+		DisableBootstrapAfterElect: true,
+		TrailingLogs:               10240,
+		SnapshotInterval:           120 * time.Second,
+		SnapshotThreshold:          8192,
+		EnableSingleNode:           false,
+		LeaderLeaseTimeout:         500 * time.Millisecond,
+	}
+}
+
+// ValidateConfig is used to validate a sane configuration
+func ValidateConfig(config *Config) error {
+	if config.HeartbeatTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Heartbeat timeout is too low")
+	}
+	if config.ElectionTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Election timeout is too low")
+	}
+	if config.CommitTimeout < time.Millisecond {
+		return fmt.Errorf("Commit timeout is too low")
+	}
+	if config.MaxAppendEntries <= 0 {
+		return fmt.Errorf("MaxAppendEntries must be positive")
+	}
+	if config.MaxAppendEntries > 1024 {
+		return fmt.Errorf("MaxAppendEntries is too large")
+	}
+	if config.SnapshotInterval < 5*time.Millisecond {
+		return fmt.Errorf("Snapshot interval is too low")
+	}
+	if config.LeaderLeaseTimeout < 5*time.Millisecond {
+		return fmt.Errorf("Leader lease timeout is too low")
+	}
+	if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
+		return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
+	}
+	if config.ElectionTimeout < config.HeartbeatTimeout {
+		return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
+	}
+	return nil
+}
--- a/go/vt/orchestrator/external/raft/discard_snapshot.go
+++ b/go/vt/orchestrator/external/raft/discard_snapshot.go
@ -0,0 +1,48 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+)
+
+// DiscardSnapshotStore is used to successfully snapshot while
+// always discarding the snapshot. This is useful for when the
+// log should be truncated but no snapshot should be retained.
+// This should never be used for production use, and is only
+// suitable for testing.
+type DiscardSnapshotStore struct{}
+
+type DiscardSnapshotSink struct{}
+
+// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
+func NewDiscardSnapshotStore() *DiscardSnapshotStore {
+	return &DiscardSnapshotStore{}
+}
+
+func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
+	return &DiscardSnapshotSink{}, nil
+}
+
+func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
+	return nil, nil
+}
+
+func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
+	return nil, nil, fmt.Errorf("open is not supported")
+}
+
+func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
+	return len(b), nil
+}
+
+func (d *DiscardSnapshotSink) Close() error {
+	return nil
+}
+
+func (d *DiscardSnapshotSink) ID() string {
+	return "discard"
+}
+
+func (d *DiscardSnapshotSink) Cancel() error {
+	return nil
+}
--- a/go/vt/orchestrator/external/raft/discard_snapshot_test.go
+++ b/go/vt/orchestrator/external/raft/discard_snapshot_test.go
@ -0,0 +1,17 @@
+package raft
+
+import "testing"
+
+func TestDiscardSnapshotStoreImpl(t *testing.T) {
+	var impl interface{} = &DiscardSnapshotStore{}
+	if _, ok := impl.(SnapshotStore); !ok {
+		t.Fatalf("DiscardSnapshotStore not a SnapshotStore")
+	}
+}
+
+func TestDiscardSnapshotSinkImpl(t *testing.T) {
+	var impl interface{} = &DiscardSnapshotSink{}
+	if _, ok := impl.(SnapshotSink); !ok {
+		t.Fatalf("DiscardSnapshotSink not a SnapshotSink")
+	}
+}
--- a/go/vt/orchestrator/external/raft/file_snapshot.go
+++ b/go/vt/orchestrator/external/raft/file_snapshot.go
@ -0,0 +1,479 @@
+package raft
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"hash"
+	"hash/crc64"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+)
+
+const (
+	testPath      = "permTest"
+	snapPath      = "snapshots"
+	metaFilePath  = "meta.json"
+	stateFilePath = "state.bin"
+	tmpSuffix     = ".tmp"
+)
+
+// FileSnapshotStore implements the SnapshotStore interface and allows
+// snapshots to be made on the local disk.
+type FileSnapshotStore struct {
+	path   string
+	retain int
+	logger *log.Logger
+}
+
+type snapMetaSlice []*fileSnapshotMeta
+
+// FileSnapshotSink implements SnapshotSink with a file.
+type FileSnapshotSink struct {
+	store  *FileSnapshotStore
+	logger *log.Logger
+	dir    string
+	meta   fileSnapshotMeta
+
+	stateFile *os.File
+	stateHash hash.Hash64
+	buffered  *bufio.Writer
+
+	closed bool
+}
+
+// fileSnapshotMeta is stored on disk. We also put a CRC
+// on disk so that we can verify the snapshot.
+type fileSnapshotMeta struct {
+	SnapshotMeta
+	CRC []byte
+}
+
+// bufferedFile is returned when we open a snapshot. This way
+// reads are buffered and the file still gets closed.
+type bufferedFile struct {
+	bh *bufio.Reader
+	fh *os.File
+}
+
+func (b *bufferedFile) Read(p []byte) (n int, err error) {
+	return b.bh.Read(p)
+}
+
+func (b *bufferedFile) Close() error {
+	return b.fh.Close()
+}
+
+// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
+// on a base directory. The `retain` parameter controls how many
+// snapshots are retained. Must be at least 1.
+func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
+	if retain < 1 {
+		return nil, fmt.Errorf("must retain at least one snapshot")
+	}
+	if logger == nil {
+		logger = log.New(os.Stderr, "", log.LstdFlags)
+	}
+
+	// Ensure our path exists
+	path := filepath.Join(base, snapPath)
+	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
+		return nil, fmt.Errorf("snapshot path not accessible: %v", err)
+	}
+
+	// Setup the store
+	store := &FileSnapshotStore{
+		path:   path,
+		retain: retain,
+		logger: logger,
+	}
+
+	// Do a permissions test
+	if err := store.testPermissions(); err != nil {
+		return nil, fmt.Errorf("permissions test failed: %v", err)
+	}
+	return store, nil
+}
+
+// NewFileSnapshotStore creates a new FileSnapshotStore based
+// on a base directory. The `retain` parameter controls how many
+// snapshots are retained. Must be at least 1.
+func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
+	if logOutput == nil {
+		logOutput = os.Stderr
+	}
+	return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
+}
+
+// testPermissions tries to touch a file in our path to see if it works.
+func (f *FileSnapshotStore) testPermissions() error {
+	path := filepath.Join(f.path, testPath)
+	fh, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+
+	if err = fh.Close(); err != nil {
+		return err
+	}
+
+	if err = os.Remove(path); err != nil {
+		return err
+	}
+	return nil
+}
+
+// snapshotName generates a name for the snapshot.
+func snapshotName(term, index uint64) string {
+	now := time.Now()
+	msec := now.UnixNano() / int64(time.Millisecond)
+	return fmt.Sprintf("%d-%d-%d", term, index, msec)
+}
+
+// Create is used to start a new snapshot
+func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
+	// Create a new path
+	name := snapshotName(term, index)
+	path := filepath.Join(f.path, name+tmpSuffix)
+	f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
+
+	// Make the directory
+	if err := os.MkdirAll(path, 0755); err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
+		return nil, err
+	}
+
+	// Create the sink
+	sink := &FileSnapshotSink{
+		store:  f,
+		logger: f.logger,
+		dir:    path,
+		meta: fileSnapshotMeta{
+			SnapshotMeta: SnapshotMeta{
+				ID:    name,
+				Index: index,
+				Term:  term,
+				Peers: peers,
+			},
+			CRC: nil,
+		},
+	}
+
+	// Write out the meta data
+	if err := sink.writeMeta(); err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
+		return nil, err
+	}
+
+	// Open the state file
+	statePath := filepath.Join(path, stateFilePath)
+	fh, err := os.Create(statePath)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
+		return nil, err
+	}
+	sink.stateFile = fh
+
+	// Create a CRC64 hash
+	sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
+
+	// Wrap both the hash and file in a MultiWriter with buffering
+	multi := io.MultiWriter(sink.stateFile, sink.stateHash)
+	sink.buffered = bufio.NewWriter(multi)
+
+	// Done
+	return sink, nil
+}
+
+// List returns available snapshots in the store.
+func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
+	// Get the eligible snapshots
+	snapshots, err := f.getSnapshots()
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
+		return nil, err
+	}
+
+	var snapMeta []*SnapshotMeta
+	for _, meta := range snapshots {
+		snapMeta = append(snapMeta, &meta.SnapshotMeta)
+		if len(snapMeta) == f.retain {
+			break
+		}
+	}
+	return snapMeta, nil
+}
+
+// getSnapshots returns all the known snapshots.
+func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
+	// Get the eligible snapshots
+	snapshots, err := ioutil.ReadDir(f.path)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
+		return nil, err
+	}
+
+	// Populate the metadata
+	var snapMeta []*fileSnapshotMeta
+	for _, snap := range snapshots {
+		// Ignore any files
+		if !snap.IsDir() {
+			continue
+		}
+
+		// Ignore any temporary snapshots
+		dirName := snap.Name()
+		if strings.HasSuffix(dirName, tmpSuffix) {
+			f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
+			continue
+		}
+
+		// Try to read the meta data
+		meta, err := f.readMeta(dirName)
+		if err != nil {
+			f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
+			continue
+		}
+
+		// Append, but only return up to the retain count
+		snapMeta = append(snapMeta, meta)
+	}
+
+	// Sort the snapshot, reverse so we get new -> old
+	sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
+
+	return snapMeta, nil
+}
+
+// readMeta is used to read the meta data for a given named backup
+func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
+	// Open the meta file
+	metaPath := filepath.Join(f.path, name, metaFilePath)
+	fh, err := os.Open(metaPath)
+	if err != nil {
+		return nil, err
+	}
+	defer fh.Close()
+
+	// Buffer the file IO
+	buffered := bufio.NewReader(fh)
+
+	// Read in the JSON
+	meta := &fileSnapshotMeta{}
+	dec := json.NewDecoder(buffered)
+	if err := dec.Decode(meta); err != nil {
+		return nil, err
+	}
+	return meta, nil
+}
+
+// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
+func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
+	// Get the metadata
+	meta, err := f.readMeta(id)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
+		return nil, nil, err
+	}
+
+	// Open the state file
+	statePath := filepath.Join(f.path, id, stateFilePath)
+	fh, err := os.Open(statePath)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
+		return nil, nil, err
+	}
+
+	// Create a CRC64 hash
+	stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
+
+	// Compute the hash
+	_, err = io.Copy(stateHash, fh)
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
+		fh.Close()
+		return nil, nil, err
+	}
+
+	// Verify the hash
+	computed := stateHash.Sum(nil)
+	if bytes.Compare(meta.CRC, computed) != 0 {
+		f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
+			meta.CRC, computed)
+		fh.Close()
+		return nil, nil, fmt.Errorf("CRC mismatch")
+	}
+
+	// Seek to the start
+	if _, err := fh.Seek(0, 0); err != nil {
+		f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
+		fh.Close()
+		return nil, nil, err
+	}
+
+	// Return a buffered file
+	buffered := &bufferedFile{
+		bh: bufio.NewReader(fh),
+		fh: fh,
+	}
+
+	return &meta.SnapshotMeta, buffered, nil
+}
+
+// ReapSnapshots reaps any snapshots beyond the retain count.
+func (f *FileSnapshotStore) ReapSnapshots() error {
+	snapshots, err := f.getSnapshots()
+	if err != nil {
+		f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
+		return err
+	}
+
+	for i := f.retain; i < len(snapshots); i++ {
+		path := filepath.Join(f.path, snapshots[i].ID)
+		f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
+		if err := os.RemoveAll(path); err != nil {
+			f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
+			return err
+		}
+	}
+	return nil
+}
+
+// ID returns the ID of the snapshot, can be used with Open()
+// after the snapshot is finalized.
+func (s *FileSnapshotSink) ID() string {
+	return s.meta.ID
+}
+
+// Write is used to append to the state file. We write to the
+// buffered IO object to reduce the amount of context switches.
+func (s *FileSnapshotSink) Write(b []byte) (int, error) {
+	return s.buffered.Write(b)
+}
+
+// Close is used to indicate a successful end.
+func (s *FileSnapshotSink) Close() error {
+	// Make sure close is idempotent
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+
+	// Close the open handles
+	if err := s.finalize(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
+		return err
+	}
+
+	// Write out the meta data
+	if err := s.writeMeta(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
+		return err
+	}
+
+	// Move the directory into place
+	newPath := strings.TrimSuffix(s.dir, tmpSuffix)
+	if err := os.Rename(s.dir, newPath); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
+		return err
+	}
+
+	// Reap any old snapshots
+	if err := s.store.ReapSnapshots(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Cancel is used to indicate an unsuccessful end.
+func (s *FileSnapshotSink) Cancel() error {
+	// Make sure close is idempotent
+	if s.closed {
+		return nil
+	}
+	s.closed = true
+
+	// Close the open handles
+	if err := s.finalize(); err != nil {
+		s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
+		return err
+	}
+
+	// Attempt to remove all artifacts
+	return os.RemoveAll(s.dir)
+}
+
+// finalize is used to close all of our resources.
+func (s *FileSnapshotSink) finalize() error {
+	// Flush any remaining data
+	if err := s.buffered.Flush(); err != nil {
+		return err
+	}
+
+	// Get the file size
+	stat, statErr := s.stateFile.Stat()
+
+	// Close the file
+	if err := s.stateFile.Close(); err != nil {
+		return err
+	}
+
+	// Set the file size, check after we close
+	if statErr != nil {
+		return statErr
+	}
+	s.meta.Size = stat.Size()
+
+	// Set the CRC
+	s.meta.CRC = s.stateHash.Sum(nil)
+	return nil
+}
+
+// writeMeta is used to write out the metadata we have.
+func (s *FileSnapshotSink) writeMeta() error {
+	// Open the meta file
+	metaPath := filepath.Join(s.dir, metaFilePath)
+	fh, err := os.Create(metaPath)
+	if err != nil {
+		return err
+	}
+	defer fh.Close()
+
+	// Buffer the file IO
+	buffered := bufio.NewWriter(fh)
+	defer buffered.Flush()
+
+	// Write out as JSON
+	enc := json.NewEncoder(buffered)
+	if err := enc.Encode(&s.meta); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Implement the sort interface for []*fileSnapshotMeta.
+func (s snapMetaSlice) Len() int {
+	return len(s)
+}
+
+func (s snapMetaSlice) Less(i, j int) bool {
+	if s[i].Term != s[j].Term {
+		return s[i].Term < s[j].Term
+	}
+	if s[i].Index != s[j].Index {
+		return s[i].Index < s[j].Index
+	}
+	return s[i].ID < s[j].ID
+}
+
+func (s snapMetaSlice) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
--- a/go/vt/orchestrator/external/raft/file_snapshot_test.go
+++ b/go/vt/orchestrator/external/raft/file_snapshot_test.go
@ -0,0 +1,343 @@
+package raft
+
+import (
+	"bytes"
+	"io"
+	"io/ioutil"
+	"os"
+	"runtime"
+	"testing"
+)
+
+func FileSnapTest(t *testing.T) (string, *FileSnapshotStore) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	return dir, snap
+}
+
+func TestFileSnapshotStoreImpl(t *testing.T) {
+	var impl interface{} = &FileSnapshotStore{}
+	if _, ok := impl.(SnapshotStore); !ok {
+		t.Fatalf("FileSnapshotStore not a SnapshotStore")
+	}
+}
+
+func TestFileSnapshotSinkImpl(t *testing.T) {
+	var impl interface{} = &FileSnapshotSink{}
+	if _, ok := impl.(SnapshotSink); !ok {
+		t.Fatalf("FileSnapshotSink not a SnapshotSink")
+	}
+}
+
+func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) {
+	parent, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(parent)
+
+	dir, err := ioutil.TempDir(parent, "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	os.RemoveAll(parent)
+	peers := []byte("all my lovely friends")
+	_, err = snap.Create(10, 3, peers)
+	if err != nil {
+		t.Fatalf("should not fail when using non existing parent")
+	}
+
+}
+func TestFileSS_CreateSnapshot(t *testing.T) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(dir)
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Check no snapshots
+	snaps, err := snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 0 {
+		t.Fatalf("did not expect any snapshots: %v", snaps)
+	}
+
+	// Create a new sink
+	peers := []byte("all my lovely friends")
+	sink, err := snap.Create(10, 3, peers)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// The sink is not done, should not be in a list!
+	snaps, err = snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 0 {
+		t.Fatalf("did not expect any snapshots: %v", snaps)
+	}
+
+	// Write to the sink
+	_, err = sink.Write([]byte("first\n"))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	_, err = sink.Write([]byte("second\n"))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Done!
+	err = sink.Close()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Should have a snapshot!
+	snaps, err = snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 1 {
+		t.Fatalf("expect a snapshots: %v", snaps)
+	}
+
+	// Check the latest
+	latest := snaps[0]
+	if latest.Index != 10 {
+		t.Fatalf("bad snapshot: %v", *latest)
+	}
+	if latest.Term != 3 {
+		t.Fatalf("bad snapshot: %v", *latest)
+	}
+	if bytes.Compare(latest.Peers, peers) != 0 {
+		t.Fatalf("bad snapshot: %v", *latest)
+	}
+	if latest.Size != 13 {
+		t.Fatalf("bad snapshot: %v", *latest)
+	}
+
+	// Read the snapshot
+	_, r, err := snap.Open(latest.ID)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Read out everything
+	var buf bytes.Buffer
+	if _, err := io.Copy(&buf, r); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if err := r.Close(); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Ensure a match
+	if bytes.Compare(buf.Bytes(), []byte("first\nsecond\n")) != 0 {
+		t.Fatalf("content mismatch")
+	}
+}
+
+func TestFileSS_CancelSnapshot(t *testing.T) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(dir)
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Create a new sink
+	peers := []byte("all my lovely friends")
+	sink, err := snap.Create(10, 3, peers)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Cancel the snapshot! Should delete
+	err = sink.Cancel()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// The sink is canceled, should not be in a list!
+	snaps, err := snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 0 {
+		t.Fatalf("did not expect any snapshots: %v", snaps)
+	}
+}
+
+func TestFileSS_Retention(t *testing.T) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(dir)
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 2, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Create a new sink
+	peers := []byte("all my lovely friends")
+
+	// Create a few snapshots
+	for i := 10; i < 15; i++ {
+		sink, err := snap.Create(uint64(i), 3, peers)
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		err = sink.Close()
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+
+	// Should only have 2 listed!
+	snaps, err := snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 2 {
+		t.Fatalf("expect 2 snapshots: %v", snaps)
+	}
+
+	// Check they are the latest
+	if snaps[0].Index != 14 {
+		t.Fatalf("bad snap: %#v", *snaps[0])
+	}
+	if snaps[1].Index != 13 {
+		t.Fatalf("bad snap: %#v", *snaps[1])
+	}
+}
+
+func TestFileSS_BadPerm(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping file permission test on windows")
+	}
+
+	// Create a temp dir
+	dir1, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %s", err)
+	}
+	defer os.RemoveAll(dir1)
+
+	// Create a sub dir and remove all permissions
+	dir2, err := ioutil.TempDir(dir1, "badperm")
+	if err != nil {
+		t.Fatalf("err: %s", err)
+	}
+	if err := os.Chmod(dir2, 000); err != nil {
+		t.Fatalf("err: %s", err)
+	}
+	defer os.Chmod(dir2, 777) // Set perms back for delete
+
+	// Should fail
+	if _, err := NewFileSnapshotStore(dir2, 3, nil); err == nil {
+		t.Fatalf("should fail to use dir with bad perms")
+	}
+}
+
+func TestFileSS_MissingParentDir(t *testing.T) {
+	parent, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(parent)
+
+	dir, err := ioutil.TempDir(parent, "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+
+	os.RemoveAll(parent)
+	_, err = NewFileSnapshotStore(dir, 3, nil)
+	if err != nil {
+		t.Fatalf("should not fail when using non existing parent")
+	}
+}
+
+func TestFileSS_Ordering(t *testing.T) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(dir)
+
+	snap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Create a new sink
+	peers := []byte("all my lovely friends")
+
+	sink, err := snap.Create(130350, 5, peers)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	err = sink.Close()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	sink, err = snap.Create(204917, 36, peers)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	err = sink.Close()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Should only have 2 listed!
+	snaps, err := snap.List()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(snaps) != 2 {
+		t.Fatalf("expect 2 snapshots: %v", snaps)
+	}
+
+	// Check they are ordered
+	if snaps[0].Term != 36 {
+		t.Fatalf("bad snap: %#v", *snaps[0])
+	}
+	if snaps[1].Term != 5 {
+		t.Fatalf("bad snap: %#v", *snaps[1])
+	}
+}
--- a/go/vt/orchestrator/external/raft/fsm.go
+++ b/go/vt/orchestrator/external/raft/fsm.go
@ -0,0 +1,40 @@
+package raft
+
+import (
+	"io"
+)
+
+// FSM provides an interface that can be implemented by
+// clients to make use of the replicated log.
+type FSM interface {
+	// Apply log is invoked once a log entry is committed.
+	// It returns a value which will be made available in the
+	// ApplyFuture returned by Raft.Apply method if that
+	// method was called on the same Raft node as the FSM.
+	Apply(*Log) interface{}
+
+	// Snapshot is used to support log compaction. This call should
+	// return an FSMSnapshot which can be used to save a point-in-time
+	// snapshot of the FSM. Apply and Snapshot are not called in multiple
+	// threads, but Apply will be called concurrently with Persist. This means
+	// the FSM should be implemented in a fashion that allows for concurrent
+	// updates while a snapshot is happening.
+	Snapshot() (FSMSnapshot, error)
+
+	// Restore is used to restore an FSM from a snapshot. It is not called
+	// concurrently with any other command. The FSM must discard all previous
+	// state.
+	Restore(io.ReadCloser) error
+}
+
+// FSMSnapshot is returned by an FSM in response to a Snapshot
+// It must be safe to invoke FSMSnapshot methods with concurrent
+// calls to Apply.
+type FSMSnapshot interface {
+	// Persist should dump all necessary state to the WriteCloser 'sink',
+	// and call sink.Close() when finished or call sink.Cancel() on error.
+	Persist(sink SnapshotSink) error
+
+	// Release is invoked when we are finished with the snapshot.
+	Release()
+}
--- a/go/vt/orchestrator/external/raft/future.go
+++ b/go/vt/orchestrator/external/raft/future.go
@ -0,0 +1,203 @@
+package raft
+
+import (
+	"sync"
+	"time"
+)
+
+// Future is used to represent an action that may occur in the future.
+type Future interface {
+	// Error blocks until the future arrives and then
+	// returns the error status of the future.
+	// This may be called any number of times - all
+	// calls will return the same value.
+	// Note that it is not OK to call this method
+	// twice concurrently on the same Future instance.
+	Error() error
+}
+
+// ApplyFuture is used for Apply() and can returns the FSM response.
+type ApplyFuture interface {
+	Future
+
+	// Response returns the FSM response as returned
+	// by the FSM.Apply method. This must not be called
+	// until after the Error method has returned.
+	Response() interface{}
+
+	// Index holds the index of the newly applied log entry.
+	// This must not be called
+	// until after the Error method has returned.
+	Index() uint64
+}
+
+// errorFuture is used to return a static error.
+type errorFuture struct {
+	err error
+}
+
+func (e errorFuture) Error() error {
+	return e.err
+}
+
+func (e errorFuture) Response() interface{} {
+	return nil
+}
+
+func (e errorFuture) Index() uint64 {
+	return 0
+}
+
+// deferError can be embedded to allow a future
+// to provide an error in the future.
+type deferError struct {
+	err       error
+	errCh     chan error
+	responded bool
+}
+
+func (d *deferError) init() {
+	d.errCh = make(chan error, 1)
+}
+
+func (d *deferError) Error() error {
+	if d.err != nil {
+		// Note that when we've received a nil error, this
+		// won't trigger, but the channel is closed after
+		// send so we'll still return nil below.
+		return d.err
+	}
+	if d.errCh == nil {
+		panic("waiting for response on nil channel")
+	}
+	d.err = <-d.errCh
+	return d.err
+}
+
+func (d *deferError) respond(err error) {
+	if d.errCh == nil {
+		return
+	}
+	if d.responded {
+		return
+	}
+	d.errCh <- err
+	close(d.errCh)
+	d.responded = true
+}
+
+// logFuture is used to apply a log entry and waits until
+// the log is considered committed.
+type logFuture struct {
+	deferError
+	log      Log
+	policy   quorumPolicy
+	response interface{}
+	dispatch time.Time
+}
+
+func (l *logFuture) Response() interface{} {
+	return l.response
+}
+
+func (l *logFuture) Index() uint64 {
+	return l.log.Index
+}
+
+type peerFuture struct {
+	deferError
+	peers []string
+}
+
+type shutdownFuture struct {
+	raft *Raft
+}
+
+func (s *shutdownFuture) Error() error {
+	if s.raft == nil {
+		return nil
+	}
+	s.raft.waitShutdown()
+	if closeable, ok := s.raft.trans.(WithClose); ok {
+		closeable.Close()
+	}
+	return nil
+}
+
+// snapshotFuture is used for waiting on a snapshot to complete.
+type snapshotFuture struct {
+	deferError
+}
+
+// reqSnapshotFuture is used for requesting a snapshot start.
+// It is only used internally.
+type reqSnapshotFuture struct {
+	deferError
+
+	// snapshot details provided by the FSM runner before responding
+	index    uint64
+	term     uint64
+	peers    []string
+	snapshot FSMSnapshot
+}
+
+// restoreFuture is used for requesting an FSM to perform a
+// snapshot restore. Used internally only.
+type restoreFuture struct {
+	deferError
+	ID string
+}
+
+// verifyFuture is used to verify the current node is still
+// the leader. This is to prevent a stale read.
+type verifyFuture struct {
+	deferError
+	notifyCh   chan *verifyFuture
+	quorumSize int
+	votes      int
+	voteLock   sync.Mutex
+}
+
+// vote is used to respond to a verifyFuture.
+// This may block when responding on the notifyCh.
+func (v *verifyFuture) vote(leader bool) {
+	v.voteLock.Lock()
+	defer v.voteLock.Unlock()
+
+	// Guard against having notified already
+	if v.notifyCh == nil {
+		return
+	}
+
+	if leader {
+		v.votes++
+		if v.votes >= v.quorumSize {
+			v.notifyCh <- v
+			v.notifyCh = nil
+		}
+	} else {
+		v.notifyCh <- v
+		v.notifyCh = nil
+	}
+}
+
+// appendFuture is used for waiting on a pipelined append
+// entries RPC.
+type appendFuture struct {
+	deferError
+	start time.Time
+	args  *AppendEntriesRequest
+	resp  *AppendEntriesResponse
+}
+
+func (a *appendFuture) Start() time.Time {
+	return a.start
+}
+
+func (a *appendFuture) Request() *AppendEntriesRequest {
+	return a.args
+}
+
+func (a *appendFuture) Response() *AppendEntriesResponse {
+	return a.resp
+}
--- a/go/vt/orchestrator/external/raft/future_test.go
+++ b/go/vt/orchestrator/external/raft/future_test.go
@ -0,0 +1,42 @@
+package raft
+
+import (
+	"errors"
+	"testing"
+)
+
+func TestDeferFutureSuccess(t *testing.T) {
+	var f deferError
+	f.init()
+	f.respond(nil)
+	if err := f.Error(); err != nil {
+		t.Fatalf("unexpected error result; got %#v want nil", err)
+	}
+	if err := f.Error(); err != nil {
+		t.Fatalf("unexpected error result; got %#v want nil", err)
+	}
+}
+
+func TestDeferFutureError(t *testing.T) {
+	want := errors.New("x")
+	var f deferError
+	f.init()
+	f.respond(want)
+	if got := f.Error(); got != want {
+		t.Fatalf("unexpected error result; got %#v want %#v", got, want)
+	}
+	if got := f.Error(); got != want {
+		t.Fatalf("unexpected error result; got %#v want %#v", got, want)
+	}
+}
+
+func TestDeferFutureConcurrent(t *testing.T) {
+	// Food for the race detector.
+	want := errors.New("x")
+	var f deferError
+	f.init()
+	go f.respond(want)
+	if got := f.Error(); got != want {
+		t.Errorf("unexpected error result; got %#v want %#v", got, want)
+	}
+}
--- a/go/vt/orchestrator/external/raft/inflight.go
+++ b/go/vt/orchestrator/external/raft/inflight.go
@ -0,0 +1,213 @@
+package raft
+
+import (
+	"container/list"
+	"sync"
+)
+
+// QuorumPolicy allows individual logFutures to have different
+// commitment rules while still using the inflight mechanism.
+type quorumPolicy interface {
+	// Checks if a commit from a given peer is enough to
+	// satisfy the commitment rules
+	Commit() bool
+
+	// Checks if a commit is committed
+	IsCommitted() bool
+}
+
+// MajorityQuorum is used by Apply transactions and requires
+// a simple majority of nodes.
+type majorityQuorum struct {
+	count       int
+	votesNeeded int
+}
+
+func newMajorityQuorum(clusterSize int) *majorityQuorum {
+	votesNeeded := (clusterSize / 2) + 1
+	return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
+}
+
+func (m *majorityQuorum) Commit() bool {
+	m.count++
+	return m.count >= m.votesNeeded
+}
+
+func (m *majorityQuorum) IsCommitted() bool {
+	return m.count >= m.votesNeeded
+}
+
+// Inflight is used to track operations that are still in-flight.
+type inflight struct {
+	sync.Mutex
+	committed  *list.List
+	commitCh   chan struct{}
+	minCommit  uint64
+	maxCommit  uint64
+	operations map[uint64]*logFuture
+	stopCh     chan struct{}
+}
+
+// NewInflight returns an inflight struct that notifies
+// the provided channel when logs are finished committing.
+func newInflight(commitCh chan struct{}) *inflight {
+	return &inflight{
+		committed:  list.New(),
+		commitCh:   commitCh,
+		minCommit:  0,
+		maxCommit:  0,
+		operations: make(map[uint64]*logFuture),
+		stopCh:     make(chan struct{}),
+	}
+}
+
+// Start is used to mark a logFuture as being inflight. It
+// also commits the entry, as it is assumed the leader is
+// starting.
+func (i *inflight) Start(l *logFuture) {
+	i.Lock()
+	defer i.Unlock()
+	i.start(l)
+}
+
+// StartAll is used to mark a list of logFuture's as being
+// inflight. It also commits each entry as the leader is
+// assumed to be starting.
+func (i *inflight) StartAll(logs []*logFuture) {
+	i.Lock()
+	defer i.Unlock()
+	for _, l := range logs {
+		i.start(l)
+	}
+}
+
+// start is used to mark a single entry as inflight,
+// must be invoked with the lock held.
+func (i *inflight) start(l *logFuture) {
+	idx := l.log.Index
+	i.operations[idx] = l
+
+	if idx > i.maxCommit {
+		i.maxCommit = idx
+	}
+	if i.minCommit == 0 {
+		i.minCommit = idx
+	}
+	i.commit(idx)
+}
+
+// Cancel is used to cancel all in-flight operations.
+// This is done when the leader steps down, and all futures
+// are sent the given error.
+func (i *inflight) Cancel(err error) {
+	// Close the channel first to unblock any pending commits
+	close(i.stopCh)
+
+	// Lock after close to avoid deadlock
+	i.Lock()
+	defer i.Unlock()
+
+	// Respond to all inflight operations
+	for _, op := range i.operations {
+		op.respond(err)
+	}
+
+	// Clear all the committed but not processed
+	for e := i.committed.Front(); e != nil; e = e.Next() {
+		e.Value.(*logFuture).respond(err)
+	}
+
+	// Clear the map
+	i.operations = make(map[uint64]*logFuture)
+
+	// Clear the list of committed
+	i.committed = list.New()
+
+	// Close the commmitCh
+	close(i.commitCh)
+
+	// Reset indexes
+	i.minCommit = 0
+	i.maxCommit = 0
+}
+
+// Committed returns all the committed operations in order.
+func (i *inflight) Committed() (l *list.List) {
+	i.Lock()
+	l, i.committed = i.committed, list.New()
+	i.Unlock()
+	return l
+}
+
+// Commit is used by leader replication routines to indicate that
+// a follower was finished committing a log to disk.
+func (i *inflight) Commit(index uint64) {
+	i.Lock()
+	defer i.Unlock()
+	i.commit(index)
+}
+
+// CommitRange is used to commit a range of indexes inclusively.
+// It is optimized to avoid commits for indexes that are not tracked.
+func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
+	i.Lock()
+	defer i.Unlock()
+
+	// Update the minimum index
+	minIndex = max(i.minCommit, minIndex)
+
+	// Commit each index
+	for idx := minIndex; idx <= maxIndex; idx++ {
+		i.commit(idx)
+	}
+}
+
+// commit is used to commit a single index. Must be called with the lock held.
+func (i *inflight) commit(index uint64) {
+	op, ok := i.operations[index]
+	if !ok {
+		// Ignore if not in the map, as it may be committed already
+		return
+	}
+
+	// Check if we've satisfied the commit
+	if !op.policy.Commit() {
+		return
+	}
+
+	// Cannot commit if this is not the minimum inflight. This can happen
+	// if the quorum size changes, meaning a previous commit requires a larger
+	// quorum that this commit. We MUST block until the previous log is committed,
+	// otherwise logs will be applied out of order.
+	if index != i.minCommit {
+		return
+	}
+
+NOTIFY:
+	// Add the operation to the committed list
+	i.committed.PushBack(op)
+
+	// Stop tracking since it is committed
+	delete(i.operations, index)
+
+	// Update the indexes
+	if index == i.maxCommit {
+		i.minCommit = 0
+		i.maxCommit = 0
+
+	} else {
+		i.minCommit++
+	}
+
+	// Check if the next in-flight operation is ready
+	if i.minCommit != 0 {
+		op = i.operations[i.minCommit]
+		if op.policy.IsCommitted() {
+			index = i.minCommit
+			goto NOTIFY
+		}
+	}
+
+	// Async notify of ready operations
+	asyncNotifyCh(i.commitCh)
+}
--- a/go/vt/orchestrator/external/raft/inflight_test.go
+++ b/go/vt/orchestrator/external/raft/inflight_test.go
@ -0,0 +1,150 @@
+package raft
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestInflight_StartCommit(t *testing.T) {
+	commitCh := make(chan struct{}, 1)
+	in := newInflight(commitCh)
+
+	// Commit a transaction as being in flight
+	l := &logFuture{log: Log{Index: 1}}
+	l.policy = newMajorityQuorum(5)
+	in.Start(l)
+
+	// Commit 3 times
+	in.Commit(1)
+	if in.Committed().Len() != 0 {
+		t.Fatalf("should not be commited")
+	}
+
+	in.Commit(1)
+	if in.Committed().Len() != 1 {
+		t.Fatalf("should be commited")
+	}
+
+	// Already committed but should work anyways
+	in.Commit(1)
+}
+
+func TestInflight_Cancel(t *testing.T) {
+	commitCh := make(chan struct{}, 1)
+	in := newInflight(commitCh)
+
+	// Commit a transaction as being in flight
+	l := &logFuture{
+		log: Log{Index: 1},
+	}
+	l.init()
+	l.policy = newMajorityQuorum(3)
+	in.Start(l)
+
+	// Cancel with an error
+	err := fmt.Errorf("error 1")
+	in.Cancel(err)
+
+	// Should get an error return
+	if l.Error() != err {
+		t.Fatalf("expected error")
+	}
+}
+
+func TestInflight_StartAll(t *testing.T) {
+	commitCh := make(chan struct{}, 1)
+	in := newInflight(commitCh)
+
+	// Commit a few transaction as being in flight
+	l1 := &logFuture{log: Log{Index: 2}}
+	l1.policy = newMajorityQuorum(5)
+	l2 := &logFuture{log: Log{Index: 3}}
+	l2.policy = newMajorityQuorum(5)
+	l3 := &logFuture{log: Log{Index: 4}}
+	l3.policy = newMajorityQuorum(5)
+
+	// Start all the entries
+	in.StartAll([]*logFuture{l1, l2, l3})
+
+	// Commit ranges
+	in.CommitRange(1, 5)
+	in.CommitRange(1, 4)
+	in.CommitRange(1, 10)
+
+	// Should get 3 back
+	if in.Committed().Len() != 3 {
+		t.Fatalf("expected all 3 to commit")
+	}
+}
+
+func TestInflight_CommitRange(t *testing.T) {
+	commitCh := make(chan struct{}, 1)
+	in := newInflight(commitCh)
+
+	// Commit a few transaction as being in flight
+	l1 := &logFuture{log: Log{Index: 2}}
+	l1.policy = newMajorityQuorum(5)
+	in.Start(l1)
+
+	l2 := &logFuture{log: Log{Index: 3}}
+	l2.policy = newMajorityQuorum(5)
+	in.Start(l2)
+
+	l3 := &logFuture{log: Log{Index: 4}}
+	l3.policy = newMajorityQuorum(5)
+	in.Start(l3)
+
+	// Commit ranges
+	in.CommitRange(1, 5)
+	in.CommitRange(1, 4)
+	in.CommitRange(1, 10)
+
+	// Should get 3 back
+	if in.Committed().Len() != 3 {
+		t.Fatalf("expected all 3 to commit")
+	}
+}
+
+// Should panic if we commit non contiguously!
+func TestInflight_NonContiguous(t *testing.T) {
+	commitCh := make(chan struct{}, 1)
+	in := newInflight(commitCh)
+
+	// Commit a few transaction as being in flight
+	l1 := &logFuture{log: Log{Index: 2}}
+	l1.policy = newMajorityQuorum(5)
+	in.Start(l1)
+
+	l2 := &logFuture{log: Log{Index: 3}}
+	l2.policy = newMajorityQuorum(5)
+	in.Start(l2)
+
+	in.Commit(3)
+	in.Commit(3)
+	in.Commit(3) // panic!
+
+	if in.Committed().Len() != 0 {
+		t.Fatalf("should not commit")
+	}
+
+	in.Commit(2)
+	in.Commit(2)
+	in.Commit(2) // panic!
+
+	committed := in.Committed()
+	if committed.Len() != 2 {
+		t.Fatalf("should commit both")
+	}
+
+	current := committed.Front()
+	l := current.Value.(*logFuture)
+	if l.log.Index != 2 {
+		t.Fatalf("bad: %v", *l)
+	}
+
+	current = current.Next()
+	l = current.Value.(*logFuture)
+	if l.log.Index != 3 {
+		t.Fatalf("bad: %v", *l)
+	}
+}
--- a/go/vt/orchestrator/external/raft/inmem_store.go
+++ b/go/vt/orchestrator/external/raft/inmem_store.go
@ -0,0 +1,116 @@
+package raft
+
+import (
+	"sync"
+)
+
+// InmemStore implements the LogStore and StableStore interface.
+// It should NOT EVER be used for production. It is used only for
+// unit tests. Use the MDBStore implementation instead.
+type InmemStore struct {
+	l         sync.RWMutex
+	lowIndex  uint64
+	highIndex uint64
+	logs      map[uint64]*Log
+	kv        map[string][]byte
+	kvInt     map[string]uint64
+}
+
+// NewInmemStore returns a new in-memory backend. Do not ever
+// use for production. Only for testing.
+func NewInmemStore() *InmemStore {
+	i := &InmemStore{
+		logs:  make(map[uint64]*Log),
+		kv:    make(map[string][]byte),
+		kvInt: make(map[string]uint64),
+	}
+	return i
+}
+
+// FirstIndex implements the LogStore interface.
+func (i *InmemStore) FirstIndex() (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.lowIndex, nil
+}
+
+// LastIndex implements the LogStore interface.
+func (i *InmemStore) LastIndex() (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.highIndex, nil
+}
+
+// GetLog implements the LogStore interface.
+func (i *InmemStore) GetLog(index uint64, log *Log) error {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	l, ok := i.logs[index]
+	if !ok {
+		return ErrLogNotFound
+	}
+	*log = *l
+	return nil
+}
+
+// StoreLog implements the LogStore interface.
+func (i *InmemStore) StoreLog(log *Log) error {
+	return i.StoreLogs([]*Log{log})
+}
+
+// StoreLogs implements the LogStore interface.
+func (i *InmemStore) StoreLogs(logs []*Log) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	for _, l := range logs {
+		i.logs[l.Index] = l
+		if i.lowIndex == 0 {
+			i.lowIndex = l.Index
+		}
+		if l.Index > i.highIndex {
+			i.highIndex = l.Index
+		}
+	}
+	return nil
+}
+
+// DeleteRange implements the LogStore interface.
+func (i *InmemStore) DeleteRange(min, max uint64) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	for j := min; j <= max; j++ {
+		delete(i.logs, j)
+	}
+	i.lowIndex = max + 1
+	return nil
+}
+
+// Set implements the StableStore interface.
+func (i *InmemStore) Set(key []byte, val []byte) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	i.kv[string(key)] = val
+	return nil
+}
+
+// Get implements the StableStore interface.
+func (i *InmemStore) Get(key []byte) ([]byte, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.kv[string(key)], nil
+}
+
+// SetUint64 implements the StableStore interface.
+func (i *InmemStore) SetUint64(key []byte, val uint64) error {
+	i.l.Lock()
+	defer i.l.Unlock()
+	i.kvInt[string(key)] = val
+	return nil
+}
+
+// GetUint64 implements the StableStore interface.
+func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
+	i.l.RLock()
+	defer i.l.RUnlock()
+	return i.kvInt[string(key)], nil
+}
--- a/go/vt/orchestrator/external/raft/inmem_transport.go
+++ b/go/vt/orchestrator/external/raft/inmem_transport.go
@ -0,0 +1,324 @@
+package raft
+
+import (
+	"fmt"
+	"io"
+	"sync"
+	"time"
+)
+
+// NewInmemAddr returns a new in-memory addr with
+// a randomly generate UUID as the ID.
+func NewInmemAddr() string {
+	return generateUUID()
+}
+
+// inmemPipeline is used to pipeline requests for the in-mem transport.
+type inmemPipeline struct {
+	trans    *InmemTransport
+	peer     *InmemTransport
+	peerAddr string
+
+	doneCh       chan AppendFuture
+	inprogressCh chan *inmemPipelineInflight
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+}
+
+type inmemPipelineInflight struct {
+	future *appendFuture
+	respCh <-chan RPCResponse
+}
+
+// InmemTransport Implements the Transport interface, to allow Raft to be
+// tested in-memory without going over a network.
+type InmemTransport struct {
+	sync.RWMutex
+	consumerCh chan RPC
+	localAddr  string
+	peers      map[string]*InmemTransport
+	pipelines  []*inmemPipeline
+	timeout    time.Duration
+}
+
+// NewInmemTransport is used to initialize a new transport
+// and generates a random local address if none is specified
+func NewInmemTransport(addr string) (string, *InmemTransport) {
+	if addr == "" {
+		addr = NewInmemAddr()
+	}
+	trans := &InmemTransport{
+		consumerCh: make(chan RPC, 16),
+		localAddr:  addr,
+		peers:      make(map[string]*InmemTransport),
+		timeout:    50 * time.Millisecond,
+	}
+	return addr, trans
+}
+
+// SetHeartbeatHandler is used to set optional fast-path for
+// heartbeats, not supported for this transport.
+func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
+}
+
+// Consumer implements the Transport interface.
+func (i *InmemTransport) Consumer() <-chan RPC {
+	return i.consumerCh
+}
+
+// LocalAddr implements the Transport interface.
+func (i *InmemTransport) LocalAddr() string {
+	return i.localAddr
+}
+
+// AppendEntriesPipeline returns an interface that can be used to pipeline
+// AppendEntries requests.
+func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
+	i.RLock()
+	peer, ok := i.peers[target]
+	i.RUnlock()
+	if !ok {
+		return nil, fmt.Errorf("failed to connect to peer: %v", target)
+	}
+	pipeline := newInmemPipeline(i, peer, target)
+	i.Lock()
+	i.pipelines = append(i.pipelines, pipeline)
+	i.Unlock()
+	return pipeline, nil
+}
+
+// AppendEntries implements the Transport interface.
+func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
+	rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*AppendEntriesResponse)
+	*resp = *out
+	return nil
+}
+
+// RequestVote implements the Transport interface.
+func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
+	rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*RequestVoteResponse)
+	*resp = *out
+	return nil
+}
+
+// InstallSnapshot implements the Transport interface.
+func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
+	rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
+	if err != nil {
+		return err
+	}
+
+	// Copy the result back
+	out := rpcResp.Response.(*InstallSnapshotResponse)
+	*resp = *out
+	return nil
+}
+
+func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
+	i.RLock()
+	peer, ok := i.peers[target]
+	i.RUnlock()
+
+	if !ok {
+		err = fmt.Errorf("failed to connect to peer: %v", target)
+		return
+	}
+
+	// Send the RPC over
+	respCh := make(chan RPCResponse)
+	peer.consumerCh <- RPC{
+		Command:  args,
+		Reader:   r,
+		RespChan: respCh,
+	}
+
+	// Wait for a response
+	select {
+	case rpcResp = <-respCh:
+		if rpcResp.Error != nil {
+			err = rpcResp.Error
+		}
+	case <-time.After(timeout):
+		err = fmt.Errorf("command timed out")
+	}
+	return
+}
+
+// EncodePeer implements the Transport interface. It uses the UUID as the
+// address directly.
+func (i *InmemTransport) EncodePeer(p string) []byte {
+	return []byte(p)
+}
+
+// DecodePeer implements the Transport interface. It wraps the UUID in an
+// InmemAddr.
+func (i *InmemTransport) DecodePeer(buf []byte) string {
+	return string(buf)
+}
+
+// Connect is used to connect this transport to another transport for
+// a given peer name. This allows for local routing.
+func (i *InmemTransport) Connect(peer string, t Transport) {
+	trans := t.(*InmemTransport)
+	i.Lock()
+	defer i.Unlock()
+	i.peers[peer] = trans
+}
+
+// Disconnect is used to remove the ability to route to a given peer.
+func (i *InmemTransport) Disconnect(peer string) {
+	i.Lock()
+	defer i.Unlock()
+	delete(i.peers, peer)
+
+	// Disconnect any pipelines
+	n := len(i.pipelines)
+	for idx := 0; idx < n; idx++ {
+		if i.pipelines[idx].peerAddr == peer {
+			i.pipelines[idx].Close()
+			i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
+			idx--
+			n--
+		}
+	}
+	i.pipelines = i.pipelines[:n]
+}
+
+// DisconnectAll is used to remove all routes to peers.
+func (i *InmemTransport) DisconnectAll() {
+	i.Lock()
+	defer i.Unlock()
+	i.peers = make(map[string]*InmemTransport)
+
+	// Handle pipelines
+	for _, pipeline := range i.pipelines {
+		pipeline.Close()
+	}
+	i.pipelines = nil
+}
+
+// Close is used to permanently disable the transport
+func (i *InmemTransport) Close() error {
+	i.DisconnectAll()
+	return nil
+}
+
+func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline {
+	i := &inmemPipeline{
+		trans:        trans,
+		peer:         peer,
+		peerAddr:     addr,
+		doneCh:       make(chan AppendFuture, 16),
+		inprogressCh: make(chan *inmemPipelineInflight, 16),
+		shutdownCh:   make(chan struct{}),
+	}
+	go i.decodeResponses()
+	return i
+}
+
+func (i *inmemPipeline) decodeResponses() {
+	timeout := i.trans.timeout
+	for {
+		select {
+		case inp := <-i.inprogressCh:
+			var timeoutCh <-chan time.Time
+			if timeout > 0 {
+				timeoutCh = time.After(timeout)
+			}
+
+			select {
+			case rpcResp := <-inp.respCh:
+				// Copy the result back
+				*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
+				inp.future.respond(rpcResp.Error)
+
+				select {
+				case i.doneCh <- inp.future:
+				case <-i.shutdownCh:
+					return
+				}
+
+			case <-timeoutCh:
+				inp.future.respond(fmt.Errorf("command timed out"))
+				select {
+				case i.doneCh <- inp.future:
+				case <-i.shutdownCh:
+					return
+				}
+
+			case <-i.shutdownCh:
+				return
+			}
+		case <-i.shutdownCh:
+			return
+		}
+	}
+}
+
+func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
+	// Create a new future
+	future := &appendFuture{
+		start: time.Now(),
+		args:  args,
+		resp:  resp,
+	}
+	future.init()
+
+	// Handle a timeout
+	var timeout <-chan time.Time
+	if i.trans.timeout > 0 {
+		timeout = time.After(i.trans.timeout)
+	}
+
+	// Send the RPC over
+	respCh := make(chan RPCResponse, 1)
+	rpc := RPC{
+		Command:  args,
+		RespChan: respCh,
+	}
+	select {
+	case i.peer.consumerCh <- rpc:
+	case <-timeout:
+		return nil, fmt.Errorf("command enqueue timeout")
+	case <-i.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+
+	// Send to be decoded
+	select {
+	case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
+		return future, nil
+	case <-i.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+}
+
+func (i *inmemPipeline) Consumer() <-chan AppendFuture {
+	return i.doneCh
+}
+
+func (i *inmemPipeline) Close() error {
+	i.shutdownLock.Lock()
+	defer i.shutdownLock.Unlock()
+	if i.shutdown {
+		return nil
+	}
+
+	i.shutdown = true
+	close(i.shutdownCh)
+	return nil
+}
--- a/go/vt/orchestrator/external/raft/inmem_transport_test.go
+++ b/go/vt/orchestrator/external/raft/inmem_transport_test.go
@ -0,0 +1,18 @@
+package raft
+
+import (
+	"testing"
+)
+
+func TestInmemTransportImpl(t *testing.T) {
+	var inm interface{} = &InmemTransport{}
+	if _, ok := inm.(Transport); !ok {
+		t.Fatalf("InmemTransport is not a Transport")
+	}
+	if _, ok := inm.(LoopbackTransport); !ok {
+		t.Fatalf("InmemTransport is not a Loopback Transport")
+	}
+	if _, ok := inm.(WithPeers); !ok {
+		t.Fatalf("InmemTransport is not a WithPeers Transport")
+	}
+}
--- a/go/vt/orchestrator/external/raft/integ_test.go
+++ b/go/vt/orchestrator/external/raft/integ_test.go
@ -0,0 +1,268 @@
+package raft
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"testing"
+	"time"
+)
+
+// CheckInteg will skip a test if integration testing is not enabled.
+func CheckInteg(t *testing.T) {
+	if !IsInteg() {
+		t.SkipNow()
+	}
+}
+
+// IsInteg returns a boolean telling you if we're in integ testing mode.
+func IsInteg() bool {
+	return os.Getenv("INTEG_TESTS") != ""
+}
+
+type RaftEnv struct {
+	dir      string
+	conf     *Config
+	fsm      *MockFSM
+	store    *InmemStore
+	snapshot *FileSnapshotStore
+	peers    *JSONPeers
+	trans    *NetworkTransport
+	raft     *Raft
+	logger   *log.Logger
+}
+
+func (r *RaftEnv) Release() {
+	r.logger.Printf("[WARN] Release node at %v", r.raft.localAddr)
+	f := r.raft.Shutdown()
+	if err := f.Error(); err != nil {
+		panic(err)
+	}
+	r.trans.Close()
+	os.RemoveAll(r.dir)
+}
+
+func MakeRaft(t *testing.T, conf *Config) *RaftEnv {
+	// Set the config
+	if conf == nil {
+		conf = inmemConfig(t)
+	}
+
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+
+	stable := NewInmemStore()
+
+	snap, err := NewFileSnapshotStore(dir, 3, nil)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	env := &RaftEnv{
+		conf:     conf,
+		dir:      dir,
+		store:    stable,
+		snapshot: snap,
+		fsm:      &MockFSM{},
+		logger:   log.New(&testLoggerAdapter{t: t}, "", log.Lmicroseconds),
+	}
+
+	trans, err := NewTCPTransport("127.0.0.1:0", nil, 2, time.Second, nil)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	env.trans = trans
+
+	env.peers = NewJSONPeers(dir, trans)
+
+	env.logger.Printf("[INFO] Starting node at %v", trans.LocalAddr())
+	raft, err := NewRaft(conf, env.fsm, stable, stable, snap, env.peers, trans)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	env.raft = raft
+	return env
+}
+
+func WaitFor(env *RaftEnv, state RaftState) error {
+	limit := time.Now().Add(200 * time.Millisecond)
+	for env.raft.State() != state {
+		if time.Now().Before(limit) {
+			time.Sleep(10 * time.Millisecond)
+		} else {
+			return fmt.Errorf("failed to transition to state %v", state)
+		}
+	}
+	return nil
+}
+
+func WaitForAny(state RaftState, envs []*RaftEnv) (*RaftEnv, error) {
+	limit := time.Now().Add(200 * time.Millisecond)
+CHECK:
+	for _, env := range envs {
+		if env.raft.State() == state {
+			return env, nil
+		}
+	}
+	if time.Now().Before(limit) {
+		goto WAIT
+	}
+	return nil, fmt.Errorf("failed to find node in %v state", state)
+WAIT:
+	time.Sleep(10 * time.Millisecond)
+	goto CHECK
+}
+
+func WaitFuture(f Future, t *testing.T) error {
+	timer := time.AfterFunc(200*time.Millisecond, func() {
+		panic(fmt.Errorf("timeout waiting for future %v", f))
+	})
+	defer timer.Stop()
+	return f.Error()
+}
+
+func NoErr(err error, t *testing.T) {
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+}
+
+func CheckConsistent(envs []*RaftEnv, t *testing.T) {
+	limit := time.Now().Add(400 * time.Millisecond)
+	first := envs[0]
+	var err error
+CHECK:
+	l1 := len(first.fsm.logs)
+	for i := 1; i < len(envs); i++ {
+		env := envs[i]
+		l2 := len(env.fsm.logs)
+		if l1 != l2 {
+			err = fmt.Errorf("log length mismatch %d %d", l1, l2)
+			goto ERR
+		}
+		for idx, log := range first.fsm.logs {
+			other := env.fsm.logs[idx]
+			if bytes.Compare(log, other) != 0 {
+				err = fmt.Errorf("log %d mismatch %v %v", idx, log, other)
+				goto ERR
+			}
+		}
+	}
+	return
+ERR:
+	if time.Now().After(limit) {
+		t.Fatalf("%v", err)
+	}
+	time.Sleep(20 * time.Millisecond)
+	goto CHECK
+}
+
+// Tests Raft by creating a cluster, growing it to 5 nodes while
+// causing various stressful conditions
+func TestRaft_Integ(t *testing.T) {
+	CheckInteg(t)
+	conf := DefaultConfig()
+	conf.HeartbeatTimeout = 50 * time.Millisecond
+	conf.ElectionTimeout = 50 * time.Millisecond
+	conf.LeaderLeaseTimeout = 50 * time.Millisecond
+	conf.CommitTimeout = 5 * time.Millisecond
+	conf.SnapshotThreshold = 100
+	conf.TrailingLogs = 10
+	conf.EnableSingleNode = true
+
+	// Create a single node
+	env1 := MakeRaft(t, conf)
+	NoErr(WaitFor(env1, Leader), t)
+
+	// Do some commits
+	var futures []Future
+	for i := 0; i < 100; i++ {
+		futures = append(futures, env1.raft.Apply([]byte(fmt.Sprintf("test%d", i)), 0))
+	}
+	for _, f := range futures {
+		NoErr(WaitFuture(f, t), t)
+		env1.logger.Printf("[DEBUG] Applied %v", f)
+	}
+
+	// Do a snapshot
+	NoErr(WaitFuture(env1.raft.Snapshot(), t), t)
+
+	// Join a few nodes!
+	var envs []*RaftEnv
+	for i := 0; i < 4; i++ {
+		env := MakeRaft(t, conf)
+		addr := env.trans.LocalAddr()
+		NoErr(WaitFuture(env1.raft.AddPeer(addr), t), t)
+		envs = append(envs, env)
+	}
+
+	// Wait for a leader
+	leader, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))
+	NoErr(err, t)
+
+	// Do some more commits
+	futures = nil
+	for i := 0; i < 100; i++ {
+		futures = append(futures, leader.raft.Apply([]byte(fmt.Sprintf("test%d", i)), 0))
+	}
+	for _, f := range futures {
+		NoErr(WaitFuture(f, t), t)
+		leader.logger.Printf("[DEBUG] Applied %v", f)
+	}
+
+	// Shoot two nodes in the head!
+	rm1, rm2 := envs[0], envs[1]
+	rm1.Release()
+	rm2.Release()
+	envs = envs[2:]
+	time.Sleep(10 * time.Millisecond)
+
+	// Wait for a leader
+	leader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))
+	NoErr(err, t)
+
+	// Do some more commits
+	futures = nil
+	for i := 0; i < 100; i++ {
+		futures = append(futures, leader.raft.Apply([]byte(fmt.Sprintf("test%d", i)), 0))
+	}
+	for _, f := range futures {
+		NoErr(WaitFuture(f, t), t)
+		leader.logger.Printf("[DEBUG] Applied %v", f)
+	}
+
+	// Join a few new nodes!
+	for i := 0; i < 2; i++ {
+		env := MakeRaft(t, conf)
+		addr := env.trans.LocalAddr()
+		NoErr(WaitFuture(leader.raft.AddPeer(addr), t), t)
+		envs = append(envs, env)
+	}
+
+	// Remove the old nodes
+	NoErr(WaitFuture(leader.raft.RemovePeer(rm1.raft.localAddr), t), t)
+	NoErr(WaitFuture(leader.raft.RemovePeer(rm2.raft.localAddr), t), t)
+
+	// Shoot the leader
+	env1.Release()
+	time.Sleep(3 * conf.HeartbeatTimeout)
+
+	// Wait for a leader
+	leader, err = WaitForAny(Leader, envs)
+	NoErr(err, t)
+
+	allEnvs := append([]*RaftEnv{env1}, envs...)
+	CheckConsistent(allEnvs, t)
+
+	if len(env1.fsm.logs) != 300 {
+		t.Fatalf("should apply 300 logs! %d", len(env1.fsm.logs))
+	}
+
+	for _, e := range envs {
+		e.Release()
+	}
+}
--- a/go/vt/orchestrator/external/raft/log.go
+++ b/go/vt/orchestrator/external/raft/log.go
@ -0,0 +1,67 @@
+package raft
+
+// LogType describes various types of log entries.
+type LogType uint8
+
+const (
+	// LogCommand is applied to a user FSM.
+	LogCommand LogType = iota
+
+	// LogNoop is used to assert leadership.
+	LogNoop
+
+	// LogAddPeer is used to add a new peer.
+	LogAddPeer
+
+	// LogRemovePeer is used to remove an existing peer.
+	LogRemovePeer
+
+	// LogBarrier is used to ensure all preceding operations have been
+	// applied to the FSM. It is similar to LogNoop, but instead of returning
+	// once committed, it only returns once the FSM manager acks it. Otherwise
+	// it is possible there are operations committed but not yet applied to
+	// the FSM.
+	LogBarrier
+)
+
+// Log entries are replicated to all members of the Raft cluster
+// and form the heart of the replicated state machine.
+type Log struct {
+	// Index holds the index of the log entry.
+	Index uint64
+
+	// Term holds the election term of the log entry.
+	Term uint64
+
+	// Type holds the type of the log entry.
+	Type LogType
+
+	// Data holds the log entry's type-specific data.
+	Data []byte
+
+	// peer is not exported since it is not transmitted, only used
+	// internally to construct the Data field.
+	peer string
+}
+
+// LogStore is used to provide an interface for storing
+// and retrieving logs in a durable fashion.
+type LogStore interface {
+	// FirstIndex returns the first index written. 0 for no entries.
+	FirstIndex() (uint64, error)
+
+	// LastIndex returns the last index written. 0 for no entries.
+	LastIndex() (uint64, error)
+
+	// GetLog gets a log entry at a given index.
+	GetLog(index uint64, log *Log) error
+
+	// StoreLog stores a log entry.
+	StoreLog(log *Log) error
+
+	// StoreLogs stores multiple log entries.
+	StoreLogs(logs []*Log) error
+
+	// DeleteRange deletes a range of log entries. The range is inclusive.
+	DeleteRange(min, max uint64) error
+}
--- a/go/vt/orchestrator/external/raft/log_cache.go
+++ b/go/vt/orchestrator/external/raft/log_cache.go
@ -0,0 +1,79 @@
+package raft
+
+import (
+	"fmt"
+	"sync"
+)
+
+// LogCache wraps any LogStore implementation to provide an
+// in-memory ring buffer. This is used to cache access to
+// the recently written entries. For implementations that do not
+// cache themselves, this can provide a substantial boost by
+// avoiding disk I/O on recent entries.
+type LogCache struct {
+	store LogStore
+
+	cache []*Log
+	l     sync.RWMutex
+}
+
+// NewLogCache is used to create a new LogCache with the
+// given capacity and backend store.
+func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
+	if capacity <= 0 {
+		return nil, fmt.Errorf("capacity must be positive")
+	}
+	c := &LogCache{
+		store: store,
+		cache: make([]*Log, capacity),
+	}
+	return c, nil
+}
+
+func (c *LogCache) GetLog(idx uint64, log *Log) error {
+	// Check the buffer for an entry
+	c.l.RLock()
+	cached := c.cache[idx%uint64(len(c.cache))]
+	c.l.RUnlock()
+
+	// Check if entry is valid
+	if cached != nil && cached.Index == idx {
+		*log = *cached
+		return nil
+	}
+
+	// Forward request on cache miss
+	return c.store.GetLog(idx, log)
+}
+
+func (c *LogCache) StoreLog(log *Log) error {
+	return c.StoreLogs([]*Log{log})
+}
+
+func (c *LogCache) StoreLogs(logs []*Log) error {
+	// Insert the logs into the ring buffer
+	c.l.Lock()
+	for _, l := range logs {
+		c.cache[l.Index%uint64(len(c.cache))] = l
+	}
+	c.l.Unlock()
+
+	return c.store.StoreLogs(logs)
+}
+
+func (c *LogCache) FirstIndex() (uint64, error) {
+	return c.store.FirstIndex()
+}
+
+func (c *LogCache) LastIndex() (uint64, error) {
+	return c.store.LastIndex()
+}
+
+func (c *LogCache) DeleteRange(min, max uint64) error {
+	// Invalidate the cache on deletes
+	c.l.Lock()
+	c.cache = make([]*Log, len(c.cache))
+	c.l.Unlock()
+
+	return c.store.DeleteRange(min, max)
+}
--- a/go/vt/orchestrator/external/raft/log_cache_test.go
+++ b/go/vt/orchestrator/external/raft/log_cache_test.go
@ -0,0 +1,88 @@
+package raft
+
+import (
+	"testing"
+)
+
+func TestLogCache(t *testing.T) {
+	store := NewInmemStore()
+	c, _ := NewLogCache(16, store)
+
+	// Insert into the in-mem store
+	for i := 0; i < 32; i++ {
+		log := &Log{Index: uint64(i) + 1}
+		store.StoreLog(log)
+	}
+
+	// Check the indexes
+	if idx, _ := c.FirstIndex(); idx != 1 {
+		t.Fatalf("bad: %d", idx)
+	}
+	if idx, _ := c.LastIndex(); idx != 32 {
+		t.Fatalf("bad: %d", idx)
+	}
+
+	// Try get log with a miss
+	var out Log
+	err := c.GetLog(1, &out)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if out.Index != 1 {
+		t.Fatalf("bad: %#v", out)
+	}
+
+	// Store logs
+	l1 := &Log{Index: 33}
+	l2 := &Log{Index: 34}
+	err = c.StoreLogs([]*Log{l1, l2})
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	if idx, _ := c.LastIndex(); idx != 34 {
+		t.Fatalf("bad: %d", idx)
+	}
+
+	// Check that it wrote-through
+	err = store.GetLog(33, &out)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	err = store.GetLog(34, &out)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Delete in the backend
+	err = store.DeleteRange(33, 34)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Should be in the ring buffer
+	err = c.GetLog(33, &out)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	err = c.GetLog(34, &out)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Purge the ring buffer
+	err = c.DeleteRange(33, 34)
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Should not be in the ring buffer
+	err = c.GetLog(33, &out)
+	if err != ErrLogNotFound {
+		t.Fatalf("err: %v", err)
+	}
+	err = c.GetLog(34, &out)
+	if err != ErrLogNotFound {
+		t.Fatalf("err: %v", err)
+	}
+}
--- a/go/vt/orchestrator/external/raft/net_transport.go
+++ b/go/vt/orchestrator/external/raft/net_transport.go
@ -0,0 +1,623 @@
+package raft
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/hashicorp/go-msgpack/codec"
+)
+
+const (
+	rpcAppendEntries uint8 = iota
+	rpcRequestVote
+	rpcInstallSnapshot
+
+	// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
+	DefaultTimeoutScale = 256 * 1024 // 256KB
+
+	// rpcMaxPipeline controls the maximum number of outstanding
+	// AppendEntries RPC calls.
+	rpcMaxPipeline = 128
+)
+
+var (
+	// ErrTransportShutdown is returned when operations on a transport are
+	// invoked after it's been terminated.
+	ErrTransportShutdown = errors.New("transport shutdown")
+
+	// ErrPipelineShutdown is returned when the pipeline is closed.
+	ErrPipelineShutdown = errors.New("append pipeline closed")
+)
+
+/*
+
+NetworkTransport provides a network based transport that can be
+used to communicate with Raft on remote machines. It requires
+an underlying stream layer to provide a stream abstraction, which can
+be simple TCP, TLS, etc.
+
+This transport is very simple and lightweight. Each RPC request is
+framed by sending a byte that indicates the message type, followed
+by the MsgPack encoded request.
+
+The response is an error string followed by the response object,
+both are encoded using MsgPack.
+
+InstallSnapshot is special, in that after the RPC request we stream
+the entire state. That socket is not re-used as the connection state
+is not known if there is an error.
+
+*/
+type NetworkTransport struct {
+	connPool     map[string][]*netConn
+	connPoolLock sync.Mutex
+
+	consumeCh chan RPC
+
+	heartbeatFn     func(RPC)
+	heartbeatFnLock sync.Mutex
+
+	logger *log.Logger
+
+	maxPool int
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+
+	stream StreamLayer
+
+	timeout      time.Duration
+	TimeoutScale int
+}
+
+// StreamLayer is used with the NetworkTransport to provide
+// the low level stream abstraction.
+type StreamLayer interface {
+	net.Listener
+
+	// Dial is used to create a new outgoing connection
+	Dial(address string, timeout time.Duration) (net.Conn, error)
+}
+
+type netConn struct {
+	target string
+	conn   net.Conn
+	r      *bufio.Reader
+	w      *bufio.Writer
+	dec    *codec.Decoder
+	enc    *codec.Encoder
+}
+
+func (n *netConn) Release() error {
+	return n.conn.Close()
+}
+
+type netPipeline struct {
+	conn  *netConn
+	trans *NetworkTransport
+
+	doneCh       chan AppendFuture
+	inprogressCh chan *appendFuture
+
+	shutdown     bool
+	shutdownCh   chan struct{}
+	shutdownLock sync.Mutex
+}
+
+// NewNetworkTransport creates a new network transport with the given dialer
+// and listener. The maxPool controls how many connections we will pool. The
+// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
+// the timeout by (SnapshotSize / TimeoutScale).
+func NewNetworkTransport(
+	stream StreamLayer,
+	maxPool int,
+	timeout time.Duration,
+	logOutput io.Writer,
+) *NetworkTransport {
+	if logOutput == nil {
+		logOutput = os.Stderr
+	}
+	return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
+}
+
+// NewNetworkTransportWithLogger creates a new network transport with the given dialer
+// and listener. The maxPool controls how many connections we will pool. The
+// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
+// the timeout by (SnapshotSize / TimeoutScale).
+func NewNetworkTransportWithLogger(
+	stream StreamLayer,
+	maxPool int,
+	timeout time.Duration,
+	logger *log.Logger,
+) *NetworkTransport {
+	if logger == nil {
+		logger = log.New(os.Stderr, "", log.LstdFlags)
+	}
+	trans := &NetworkTransport{
+		connPool:     make(map[string][]*netConn),
+		consumeCh:    make(chan RPC),
+		logger:       logger,
+		maxPool:      maxPool,
+		shutdownCh:   make(chan struct{}),
+		stream:       stream,
+		timeout:      timeout,
+		TimeoutScale: DefaultTimeoutScale,
+	}
+	go trans.listen()
+	return trans
+}
+
+// SetHeartbeatHandler is used to setup a heartbeat handler
+// as a fast-pass. This is to avoid head-of-line blocking from
+// disk IO.
+func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
+	n.heartbeatFnLock.Lock()
+	defer n.heartbeatFnLock.Unlock()
+	n.heartbeatFn = cb
+}
+
+// Close is used to stop the network transport.
+func (n *NetworkTransport) Close() error {
+	n.shutdownLock.Lock()
+	defer n.shutdownLock.Unlock()
+
+	if !n.shutdown {
+		close(n.shutdownCh)
+		n.stream.Close()
+		n.shutdown = true
+	}
+	return nil
+}
+
+// Consumer implements the Transport interface.
+func (n *NetworkTransport) Consumer() <-chan RPC {
+	return n.consumeCh
+}
+
+// LocalAddr implements the Transport interface.
+func (n *NetworkTransport) LocalAddr() string {
+	return n.stream.Addr().String()
+}
+
+// IsShutdown is used to check if the transport is shutdown.
+func (n *NetworkTransport) IsShutdown() bool {
+	select {
+	case <-n.shutdownCh:
+		return true
+	default:
+		return false
+	}
+}
+
+// getExistingConn is used to grab a pooled connection.
+func (n *NetworkTransport) getPooledConn(target string) *netConn {
+	n.connPoolLock.Lock()
+	defer n.connPoolLock.Unlock()
+
+	conns, ok := n.connPool[target]
+	if !ok || len(conns) == 0 {
+		return nil
+	}
+
+	var conn *netConn
+	num := len(conns)
+	conn, conns[num-1] = conns[num-1], nil
+	n.connPool[target] = conns[:num-1]
+	return conn
+}
+
+// getConn is used to get a connection from the pool.
+func (n *NetworkTransport) getConn(target string) (*netConn, error) {
+	// Check for a pooled conn
+	if conn := n.getPooledConn(target); conn != nil {
+		return conn, nil
+	}
+
+	// Dial a new connection
+	conn, err := n.stream.Dial(target, n.timeout)
+	if err != nil {
+		return nil, err
+	}
+
+	// Wrap the conn
+	netConn := &netConn{
+		target: target,
+		conn:   conn,
+		r:      bufio.NewReader(conn),
+		w:      bufio.NewWriter(conn),
+	}
+
+	// Setup encoder/decoders
+	netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
+	netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
+
+	// Done
+	return netConn, nil
+}
+
+// returnConn returns a connection back to the pool.
+func (n *NetworkTransport) returnConn(conn *netConn) {
+	n.connPoolLock.Lock()
+	defer n.connPoolLock.Unlock()
+
+	key := conn.target
+	conns, _ := n.connPool[key]
+
+	if !n.IsShutdown() && len(conns) < n.maxPool {
+		n.connPool[key] = append(conns, conn)
+	} else {
+		conn.Release()
+	}
+}
+
+// AppendEntriesPipeline returns an interface that can be used to pipeline
+// AppendEntries requests.
+func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
+	// Get a connection
+	conn, err := n.getConn(target)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create the pipeline
+	return newNetPipeline(n, conn), nil
+}
+
+// AppendEntries implements the Transport interface.
+func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
+	return n.genericRPC(target, rpcAppendEntries, args, resp)
+}
+
+// RequestVote implements the Transport interface.
+func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
+	return n.genericRPC(target, rpcRequestVote, args, resp)
+}
+
+// genericRPC handles a simple request/response RPC.
+func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error {
+	// Get a conn
+	conn, err := n.getConn(target)
+	if err != nil {
+		return err
+	}
+
+	// Set a deadline
+	if n.timeout > 0 {
+		conn.conn.SetDeadline(time.Now().Add(n.timeout))
+	}
+
+	// Send the RPC
+	if err = sendRPC(conn, rpcType, args); err != nil {
+		return err
+	}
+
+	// Decode the response
+	canReturn, err := decodeResponse(conn, resp)
+	if canReturn {
+		n.returnConn(conn)
+	}
+	return err
+}
+
+// InstallSnapshot implements the Transport interface.
+func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
+	// Get a conn, always close for InstallSnapshot
+	conn, err := n.getConn(target)
+	if err != nil {
+		return err
+	}
+	defer conn.Release()
+
+	// Set a deadline, scaled by request size
+	if n.timeout > 0 {
+		timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
+		if timeout < n.timeout {
+			timeout = n.timeout
+		}
+		conn.conn.SetDeadline(time.Now().Add(timeout))
+	}
+
+	// Send the RPC
+	if err = sendRPC(conn, rpcInstallSnapshot, args); err != nil {
+		return err
+	}
+
+	// Stream the state
+	if _, err = io.Copy(conn.w, data); err != nil {
+		return err
+	}
+
+	// Flush
+	if err = conn.w.Flush(); err != nil {
+		return err
+	}
+
+	// Decode the response, do not return conn
+	_, err = decodeResponse(conn, resp)
+
+	return err
+}
+
+// EncodePeer implements the Transport interface.
+func (n *NetworkTransport) EncodePeer(p string) []byte {
+	return []byte(p)
+}
+
+// DecodePeer implements the Transport interface.
+func (n *NetworkTransport) DecodePeer(buf []byte) string {
+	return string(buf)
+}
+
+// listen is used to handling incoming connections.
+func (n *NetworkTransport) listen() {
+	for {
+		// Accept incoming connections
+		conn, err := n.stream.Accept()
+		if err != nil {
+			if n.IsShutdown() {
+				return
+			}
+			n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
+			continue
+		}
+		n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
+
+		// Handle the connection in dedicated routine
+		go n.handleConn(conn)
+	}
+}
+
+// handleConn is used to handle an inbound connection for its lifespan.
+func (n *NetworkTransport) handleConn(conn net.Conn) {
+	defer conn.Close()
+	r := bufio.NewReader(conn)
+	w := bufio.NewWriter(conn)
+	dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
+	enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
+
+	for {
+		if err := n.handleCommand(r, dec, enc); err != nil {
+			if err != io.EOF {
+				n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
+			}
+			return
+		}
+		if err := w.Flush(); err != nil {
+			n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
+			return
+		}
+	}
+}
+
+// handleCommand is used to decode and dispatch a single command.
+func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
+	// Get the rpc type
+	rpcType, err := r.ReadByte()
+	if err != nil {
+		return err
+	}
+
+	// Create the RPC object
+	respCh := make(chan RPCResponse, 1)
+	rpc := RPC{
+		RespChan: respCh,
+	}
+
+	// Decode the command
+	isHeartbeat := false
+	switch rpcType {
+	case rpcAppendEntries:
+		var req AppendEntriesRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+
+		// Check if this is a heartbeat
+		if req.Term != 0 && req.Leader != nil &&
+			req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
+			len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
+			isHeartbeat = true
+		}
+
+	case rpcRequestVote:
+		var req RequestVoteRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+
+	case rpcInstallSnapshot:
+		var req InstallSnapshotRequest
+		if err := dec.Decode(&req); err != nil {
+			return err
+		}
+		rpc.Command = &req
+		rpc.Reader = io.LimitReader(r, req.Size)
+
+	default:
+		return fmt.Errorf("unknown rpc type %d", rpcType)
+	}
+
+	// Check for heartbeat fast-path
+	if isHeartbeat {
+		n.heartbeatFnLock.Lock()
+		fn := n.heartbeatFn
+		n.heartbeatFnLock.Unlock()
+		if fn != nil {
+			fn(rpc)
+			goto RESP
+		}
+	}
+
+	// Dispatch the RPC
+	select {
+	case n.consumeCh <- rpc:
+	case <-n.shutdownCh:
+		return ErrTransportShutdown
+	}
+
+	// Wait for response
+RESP:
+	select {
+	case resp := <-respCh:
+		// Send the error first
+		respErr := ""
+		if resp.Error != nil {
+			respErr = resp.Error.Error()
+		}
+		if err := enc.Encode(respErr); err != nil {
+			return err
+		}
+
+		// Send the response
+		if err := enc.Encode(resp.Response); err != nil {
+			return err
+		}
+	case <-n.shutdownCh:
+		return ErrTransportShutdown
+	}
+	return nil
+}
+
+// decodeResponse is used to decode an RPC response and reports whether
+// the connection can be reused.
+func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
+	// Decode the error if any
+	var rpcError string
+	if err := conn.dec.Decode(&rpcError); err != nil {
+		conn.Release()
+		return false, err
+	}
+
+	// Decode the response
+	if err := conn.dec.Decode(resp); err != nil {
+		conn.Release()
+		return false, err
+	}
+
+	// Format an error if any
+	if rpcError != "" {
+		return true, fmt.Errorf(rpcError)
+	}
+	return true, nil
+}
+
+// sendRPC is used to encode and send the RPC.
+func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
+	// Write the request type
+	if err := conn.w.WriteByte(rpcType); err != nil {
+		conn.Release()
+		return err
+	}
+
+	// Send the request
+	if err := conn.enc.Encode(args); err != nil {
+		conn.Release()
+		return err
+	}
+
+	// Flush
+	if err := conn.w.Flush(); err != nil {
+		conn.Release()
+		return err
+	}
+	return nil
+}
+
+// newNetPipeline is used to construct a netPipeline from a given
+// transport and connection.
+func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
+	n := &netPipeline{
+		conn:         conn,
+		trans:        trans,
+		doneCh:       make(chan AppendFuture, rpcMaxPipeline),
+		inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
+		shutdownCh:   make(chan struct{}),
+	}
+	go n.decodeResponses()
+	return n
+}
+
+// decodeResponses is a long running routine that decodes the responses
+// sent on the connection.
+func (n *netPipeline) decodeResponses() {
+	timeout := n.trans.timeout
+	for {
+		select {
+		case future := <-n.inprogressCh:
+			if timeout > 0 {
+				n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
+			}
+
+			_, err := decodeResponse(n.conn, future.resp)
+			future.respond(err)
+			select {
+			case n.doneCh <- future:
+			case <-n.shutdownCh:
+				return
+			}
+		case <-n.shutdownCh:
+			return
+		}
+	}
+}
+
+// AppendEntries is used to pipeline a new append entries request.
+func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
+	// Create a new future
+	future := &appendFuture{
+		start: time.Now(),
+		args:  args,
+		resp:  resp,
+	}
+	future.init()
+
+	// Add a send timeout
+	if timeout := n.trans.timeout; timeout > 0 {
+		n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
+	}
+
+	// Send the RPC
+	if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
+		return nil, err
+	}
+
+	// Hand-off for decoding, this can also cause back-pressure
+	// to prevent too many inflight requests
+	select {
+	case n.inprogressCh <- future:
+		return future, nil
+	case <-n.shutdownCh:
+		return nil, ErrPipelineShutdown
+	}
+}
+
+// Consumer returns a channel that can be used to consume complete futures.
+func (n *netPipeline) Consumer() <-chan AppendFuture {
+	return n.doneCh
+}
+
+// Closed is used to shutdown the pipeline connection.
+func (n *netPipeline) Close() error {
+	n.shutdownLock.Lock()
+	defer n.shutdownLock.Unlock()
+	if n.shutdown {
+		return nil
+	}
+
+	// Release the connection
+	n.conn.Release()
+
+	n.shutdown = true
+	close(n.shutdownCh)
+	return nil
+}
--- a/go/vt/orchestrator/external/raft/net_transport_test.go
+++ b/go/vt/orchestrator/external/raft/net_transport_test.go
@ -0,0 +1,449 @@
+package raft
+
+import (
+	"bytes"
+	"reflect"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestNetworkTransport_StartStop(t *testing.T) {
+	trans, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	trans.Close()
+}
+
+func TestNetworkTransport_Heartbeat_FastPath(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+
+	// Make the RPC request
+	args := AppendEntriesRequest{
+		Term:   10,
+		Leader: []byte("cartman"),
+	}
+	resp := AppendEntriesResponse{
+		Term:    4,
+		LastLog: 90,
+		Success: true,
+	}
+
+	invoked := false
+	fastpath := func(rpc RPC) {
+		// Verify the command
+		req := rpc.Command.(*AppendEntriesRequest)
+		if !reflect.DeepEqual(req, &args) {
+			t.Fatalf("command mismatch: %#v %#v", *req, args)
+		}
+
+		rpc.Respond(&resp, nil)
+		invoked = true
+	}
+	trans1.SetHeartbeatHandler(fastpath)
+
+	// Transport 2 makes outbound request
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	var out AppendEntriesResponse
+	if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify the response
+	if !reflect.DeepEqual(resp, out) {
+		t.Fatalf("command mismatch: %#v %#v", resp, out)
+	}
+
+	// Ensure fast-path is used
+	if !invoked {
+		t.Fatalf("fast-path not used")
+	}
+}
+
+func TestNetworkTransport_AppendEntries(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+	rpcCh := trans1.Consumer()
+
+	// Make the RPC request
+	args := AppendEntriesRequest{
+		Term:         10,
+		Leader:       []byte("cartman"),
+		PrevLogEntry: 100,
+		PrevLogTerm:  4,
+		Entries: []*Log{
+			{
+				Index: 101,
+				Term:  4,
+				Type:  LogNoop,
+			},
+		},
+		LeaderCommitIndex: 90,
+	}
+	resp := AppendEntriesResponse{
+		Term:    4,
+		LastLog: 90,
+		Success: true,
+	}
+
+	// Listen for a request
+	go func() {
+		select {
+		case rpc := <-rpcCh:
+			// Verify the command
+			req := rpc.Command.(*AppendEntriesRequest)
+			if !reflect.DeepEqual(req, &args) {
+				t.Fatalf("command mismatch: %#v %#v", *req, args)
+			}
+
+			rpc.Respond(&resp, nil)
+
+		case <-time.After(200 * time.Millisecond):
+			t.Fatalf("timeout")
+		}
+	}()
+
+	// Transport 2 makes outbound request
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	var out AppendEntriesResponse
+	if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify the response
+	if !reflect.DeepEqual(resp, out) {
+		t.Fatalf("command mismatch: %#v %#v", resp, out)
+	}
+}
+
+func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+	rpcCh := trans1.Consumer()
+
+	// Make the RPC request
+	args := AppendEntriesRequest{
+		Term:         10,
+		Leader:       []byte("cartman"),
+		PrevLogEntry: 100,
+		PrevLogTerm:  4,
+		Entries: []*Log{
+			{
+				Index: 101,
+				Term:  4,
+				Type:  LogNoop,
+			},
+		},
+		LeaderCommitIndex: 90,
+	}
+	resp := AppendEntriesResponse{
+		Term:    4,
+		LastLog: 90,
+		Success: true,
+	}
+
+	// Listen for a request
+	go func() {
+		for i := 0; i < 10; i++ {
+			select {
+			case rpc := <-rpcCh:
+				// Verify the command
+				req := rpc.Command.(*AppendEntriesRequest)
+				if !reflect.DeepEqual(req, &args) {
+					t.Fatalf("command mismatch: %#v %#v", *req, args)
+				}
+				rpc.Respond(&resp, nil)
+
+			case <-time.After(200 * time.Millisecond):
+				t.Fatalf("timeout")
+			}
+		}
+	}()
+
+	// Transport 2 makes outbound request
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr())
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer pipeline.Close()
+	for i := 0; i < 10; i++ {
+		out := new(AppendEntriesResponse)
+		if _, err := pipeline.AppendEntries(&args, out); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+
+	respCh := pipeline.Consumer()
+	for i := 0; i < 10; i++ {
+		select {
+		case ready := <-respCh:
+			// Verify the response
+			if !reflect.DeepEqual(&resp, ready.Response()) {
+				t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response())
+			}
+		case <-time.After(200 * time.Millisecond):
+			t.Fatalf("timeout")
+		}
+	}
+}
+
+func TestNetworkTransport_RequestVote(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+	rpcCh := trans1.Consumer()
+
+	// Make the RPC request
+	args := RequestVoteRequest{
+		Term:         20,
+		Candidate:    []byte("butters"),
+		LastLogIndex: 100,
+		LastLogTerm:  19,
+	}
+	resp := RequestVoteResponse{
+		Term:    100,
+		Peers:   []byte("blah"),
+		Granted: false,
+	}
+
+	// Listen for a request
+	go func() {
+		select {
+		case rpc := <-rpcCh:
+			// Verify the command
+			req := rpc.Command.(*RequestVoteRequest)
+			if !reflect.DeepEqual(req, &args) {
+				t.Fatalf("command mismatch: %#v %#v", *req, args)
+			}
+
+			rpc.Respond(&resp, nil)
+
+		case <-time.After(200 * time.Millisecond):
+			t.Fatalf("timeout")
+		}
+	}()
+
+	// Transport 2 makes outbound request
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	var out RequestVoteResponse
+	if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify the response
+	if !reflect.DeepEqual(resp, out) {
+		t.Fatalf("command mismatch: %#v %#v", resp, out)
+	}
+}
+
+func TestNetworkTransport_InstallSnapshot(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+	rpcCh := trans1.Consumer()
+
+	// Make the RPC request
+	args := InstallSnapshotRequest{
+		Term:         10,
+		Leader:       []byte("kyle"),
+		LastLogIndex: 100,
+		LastLogTerm:  9,
+		Peers:        []byte("blah blah"),
+		Size:         10,
+	}
+	resp := InstallSnapshotResponse{
+		Term:    10,
+		Success: true,
+	}
+
+	// Listen for a request
+	go func() {
+		select {
+		case rpc := <-rpcCh:
+			// Verify the command
+			req := rpc.Command.(*InstallSnapshotRequest)
+			if !reflect.DeepEqual(req, &args) {
+				t.Fatalf("command mismatch: %#v %#v", *req, args)
+			}
+
+			// Try to read the bytes
+			buf := make([]byte, 10)
+			rpc.Reader.Read(buf)
+
+			// Compare
+			if bytes.Compare(buf, []byte("0123456789")) != 0 {
+				t.Fatalf("bad buf %v", buf)
+			}
+
+			rpc.Respond(&resp, nil)
+
+		case <-time.After(200 * time.Millisecond):
+			t.Fatalf("timeout")
+		}
+	}()
+
+	// Transport 2 makes outbound request
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	// Create a buffer
+	buf := bytes.NewBuffer([]byte("0123456789"))
+
+	var out InstallSnapshotResponse
+	if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Verify the response
+	if !reflect.DeepEqual(resp, out) {
+		t.Fatalf("command mismatch: %#v %#v", resp, out)
+	}
+}
+
+func TestNetworkTransport_EncodeDecode(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+
+	local := trans1.LocalAddr()
+	enc := trans1.EncodePeer(local)
+	dec := trans1.DecodePeer(enc)
+
+	if dec != local {
+		t.Fatalf("enc/dec fail: %v %v", dec, local)
+	}
+}
+
+func TestNetworkTransport_PooledConn(t *testing.T) {
+	// Transport 1 is consumer
+	trans1, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 2, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans1.Close()
+	rpcCh := trans1.Consumer()
+
+	// Make the RPC request
+	args := AppendEntriesRequest{
+		Term:         10,
+		Leader:       []byte("cartman"),
+		PrevLogEntry: 100,
+		PrevLogTerm:  4,
+		Entries: []*Log{
+			{
+				Index: 101,
+				Term:  4,
+				Type:  LogNoop,
+			},
+		},
+		LeaderCommitIndex: 90,
+	}
+	resp := AppendEntriesResponse{
+		Term:    4,
+		LastLog: 90,
+		Success: true,
+	}
+
+	// Listen for a request
+	go func() {
+		for {
+			select {
+			case rpc := <-rpcCh:
+				// Verify the command
+				req := rpc.Command.(*AppendEntriesRequest)
+				if !reflect.DeepEqual(req, &args) {
+					t.Fatalf("command mismatch: %#v %#v", *req, args)
+				}
+				rpc.Respond(&resp, nil)
+
+			case <-time.After(200 * time.Millisecond):
+				return
+			}
+		}
+	}()
+
+	// Transport 2 makes outbound request, 3 conn pool
+	trans2, err := NewTCPTransportWithLogger("127.0.0.1:0", nil, 3, time.Second, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	defer trans2.Close()
+
+	// Create wait group
+	wg := &sync.WaitGroup{}
+	wg.Add(5)
+
+	appendFunc := func() {
+		defer wg.Done()
+		var out AppendEntriesResponse
+		if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+
+		// Verify the response
+		if !reflect.DeepEqual(resp, out) {
+			t.Fatalf("command mismatch: %#v %#v", resp, out)
+		}
+	}
+
+	// Try to do parallel appends, should stress the conn pool
+	for i := 0; i < 5; i++ {
+		go appendFunc()
+	}
+
+	// Wait for the routines to finish
+	wg.Wait()
+
+	// Check the conn pool size
+	addr := trans1.LocalAddr()
+	if len(trans2.connPool[addr]) != 3 {
+		t.Fatalf("Expected 2 pooled conns!")
+	}
+}
--- a/go/vt/orchestrator/external/raft/observer.go
+++ b/go/vt/orchestrator/external/raft/observer.go
@ -0,0 +1,120 @@
+package raft
+
+import (
+	"sync/atomic"
+)
+
+// Observation is sent along the given channel to observers when an event occurs.
+type Observation struct {
+	// Raft holds the Raft instance generating the observation.
+	Raft *Raft
+	// Data holds observation-specific data. Possible types are
+	// *RequestVoteRequest, RaftState and LeaderObservation.
+	Data interface{}
+}
+
+// LeaderObservation is used in Observation.Data when leadership changes.
+type LeaderObservation struct {
+	Leader string
+}
+
+// nextObserverId is used to provide a unique ID for each observer to aid in
+// deregistration.
+var nextObserverID uint64
+
+// FilterFn is a function that can be registered in order to filter observations.
+// The function reports whether the observation should be included - if
+// it returns false, the observation will be filtered out.
+type FilterFn func(o *Observation) bool
+
+// Observer describes what to do with a given observation.
+type Observer struct {
+	// channel receives observations.
+	channel chan Observation
+
+	// blocking, if true, will cause Raft to block when sending an observation
+	// to this observer. This should generally be set to false.
+	blocking bool
+
+	// filter will be called to determine if an observation should be sent to
+	// the channel.
+	filter FilterFn
+
+	// id is the ID of this observer in the Raft map.
+	id uint64
+
+	// numObserved and numDropped are performance counters for this observer.
+	numObserved uint64
+	numDropped  uint64
+}
+
+// NewObserver creates a new observer that can be registered
+// to make observations on a Raft instance. Observations
+// will be sent on the given channel if they satisfy the
+// given filter.
+//
+// If blocking is true, the observer will block when it can't
+// send on the channel, otherwise it may discard events.
+func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {
+	return &Observer{
+		channel:  channel,
+		blocking: blocking,
+		filter:   filter,
+		id:       atomic.AddUint64(&nextObserverID, 1),
+	}
+}
+
+// GetNumObserved returns the number of observations.
+func (or *Observer) GetNumObserved() uint64 {
+	return atomic.LoadUint64(&or.numObserved)
+}
+
+// GetNumDropped returns the number of dropped observations due to blocking.
+func (or *Observer) GetNumDropped() uint64 {
+	return atomic.LoadUint64(&or.numDropped)
+}
+
+// RegisterObserver registers a new observer.
+func (r *Raft) RegisterObserver(or *Observer) {
+	r.observersLock.Lock()
+	defer r.observersLock.Unlock()
+	r.observers[or.id] = or
+}
+
+// DeregisterObserver deregisters an observer.
+func (r *Raft) DeregisterObserver(or *Observer) {
+	r.observersLock.Lock()
+	defer r.observersLock.Unlock()
+	delete(r.observers, or.id)
+}
+
+// observe sends an observation to every observer.
+func (r *Raft) observe(o interface{}) {
+	// In general observers should not block. But in any case this isn't
+	// disastrous as we only hold a read lock, which merely prevents
+	// registration / deregistration of observers.
+	r.observersLock.RLock()
+	defer r.observersLock.RUnlock()
+	for _, or := range r.observers {
+		// It's wasteful to do this in the loop, but for the common case
+		// where there are no observers we won't create any objects.
+		ob := Observation{Raft: r, Data: o}
+		if or.filter != nil && !or.filter(&ob) {
+			continue
+		}
+		if or.channel == nil {
+			continue
+		}
+		if or.blocking {
+			or.channel <- ob
+			atomic.AddUint64(&or.numObserved, 1)
+		} else {
+			select {
+			case or.channel <- ob:
+				atomic.AddUint64(&or.numObserved, 1)
+			default:
+				atomic.AddUint64(&or.numDropped, 1)
+			}
+		}
+	}
+}
--- a/go/vt/orchestrator/external/raft/peer.go
+++ b/go/vt/orchestrator/external/raft/peer.go
@ -0,0 +1,122 @@
+package raft
+
+import (
+	"bytes"
+	"encoding/json"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"sync"
+)
+
+const (
+	jsonPeerPath = "peers.json"
+)
+
+// PeerStore provides an interface for persistent storage and
+// retrieval of peers. We use a separate interface than StableStore
+// since the peers may need to be edited by a human operator. For example,
+// in a two node cluster, the failure of either node requires human intervention
+// since consensus is impossible.
+type PeerStore interface {
+	// Peers returns the list of known peers.
+	Peers() ([]string, error)
+
+	// SetPeers sets the list of known peers. This is invoked when a peer is
+	// added or removed.
+	SetPeers([]string) error
+}
+
+// StaticPeers is used to provide a static list of peers.
+type StaticPeers struct {
+	StaticPeers []string
+	l           sync.Mutex
+}
+
+// Peers implements the PeerStore interface.
+func (s *StaticPeers) Peers() ([]string, error) {
+	s.l.Lock()
+	peers := s.StaticPeers
+	s.l.Unlock()
+	return peers, nil
+}
+
+// SetPeers implements the PeerStore interface.
+func (s *StaticPeers) SetPeers(p []string) error {
+	s.l.Lock()
+	s.StaticPeers = p
+	s.l.Unlock()
+	return nil
+}
+
+// JSONPeers is used to provide peer persistence on disk in the form
+// of a JSON file. This allows human operators to manipulate the file.
+type JSONPeers struct {
+	l     sync.Mutex
+	path  string
+	trans Transport
+}
+
+// NewJSONPeers creates a new JSONPeers store. Requires a transport
+// to handle the serialization of network addresses.
+func NewJSONPeers(base string, trans Transport) *JSONPeers {
+	path := filepath.Join(base, jsonPeerPath)
+	store := &JSONPeers{
+		path:  path,
+		trans: trans,
+	}
+	return store
+}
+
+// Peers implements the PeerStore interface.
+func (j *JSONPeers) Peers() ([]string, error) {
+	j.l.Lock()
+	defer j.l.Unlock()
+
+	// Read the file
+	buf, err := ioutil.ReadFile(j.path)
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+
+	// Check for no peers
+	if len(buf) == 0 {
+		return nil, nil
+	}
+
+	// Decode the peers
+	var peerSet []string
+	dec := json.NewDecoder(bytes.NewReader(buf))
+	if err := dec.Decode(&peerSet); err != nil {
+		return nil, err
+	}
+
+	// Deserialize each peer
+	var peers []string
+	for _, p := range peerSet {
+		peers = append(peers, j.trans.DecodePeer([]byte(p)))
+	}
+	return peers, nil
+}
+
+// SetPeers implements the PeerStore interface.
+func (j *JSONPeers) SetPeers(peers []string) error {
+	j.l.Lock()
+	defer j.l.Unlock()
+
+	// Encode each peer
+	var peerSet []string
+	for _, p := range peers {
+		peerSet = append(peerSet, string(j.trans.EncodePeer(p)))
+	}
+
+	// Convert to JSON
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	if err := enc.Encode(peerSet); err != nil {
+		return err
+	}
+
+	// Write out as JSON
+	return ioutil.WriteFile(j.path, buf.Bytes(), 0755)
+}
--- a/go/vt/orchestrator/external/raft/peer_test.go
+++ b/go/vt/orchestrator/external/raft/peer_test.go
@ -0,0 +1,44 @@
+package raft
+
+import (
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+func TestJSONPeers(t *testing.T) {
+	// Create a test dir
+	dir, err := ioutil.TempDir("", "raft")
+	if err != nil {
+		t.Fatalf("err: %v ", err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Create the store
+	_, trans := NewInmemTransport("")
+	store := NewJSONPeers(dir, trans)
+
+	// Try a read, should get nothing
+	peers, err := store.Peers()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(peers) != 0 {
+		t.Fatalf("peers: %v", peers)
+	}
+
+	// Initialize some peers
+	newPeers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
+	if err := store.SetPeers(newPeers); err != nil {
+		t.Fatalf("err: %v", err)
+	}
+
+	// Try a read, should peers
+	peers, err = store.Peers()
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if len(peers) != 3 {
+		t.Fatalf("peers: %v", peers)
+	}
+}
--- a/go/vt/orchestrator/external/raft/raft.go
+++ b/go/vt/orchestrator/external/raft/raft.go
--- a/go/vt/orchestrator/external/raft/raft_test.go
+++ b/go/vt/orchestrator/external/raft/raft_test.go
--- a/go/vt/orchestrator/external/raft/replication.go
+++ b/go/vt/orchestrator/external/raft/replication.go
@ -0,0 +1,522 @@
+package raft
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/armon/go-metrics"
+)
+
+const (
+	maxFailureScale = 12
+	failureWait     = 10 * time.Millisecond
+)
+
+var (
+	// ErrLogNotFound indicates a given log entry is not available.
+	ErrLogNotFound = errors.New("log not found")
+
+	// ErrPipelineReplicationNotSupported can be returned by the transport to
+	// signal that pipeline replication is not supported in general, and that
+	// no error message should be produced.
+	ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
+)
+
+type followerReplication struct {
+	peer     string
+	inflight *inflight
+
+	stopCh    chan uint64
+	triggerCh chan struct{}
+
+	currentTerm uint64
+	matchIndex  uint64
+	nextIndex   uint64
+
+	lastContact     time.Time
+	lastContactLock sync.RWMutex
+
+	failures uint64
+
+	notifyCh   chan struct{}
+	notify     []*verifyFuture
+	notifyLock sync.Mutex
+
+	// stepDown is used to indicate to the leader that we
+	// should step down based on information from a follower.
+	stepDown chan struct{}
+
+	// allowPipeline is used to control it seems like
+	// pipeline replication should be enabled.
+	allowPipeline bool
+}
+
+// notifyAll is used to notify all the waiting verify futures
+// if the follower believes we are still the leader.
+func (s *followerReplication) notifyAll(leader bool) {
+	// Clear the waiting notifies minimizing lock time
+	s.notifyLock.Lock()
+	n := s.notify
+	s.notify = nil
+	s.notifyLock.Unlock()
+
+	// Submit our votes
+	for _, v := range n {
+		v.vote(leader)
+	}
+}
+
+// LastContact returns the time of last contact.
+func (s *followerReplication) LastContact() time.Time {
+	s.lastContactLock.RLock()
+	last := s.lastContact
+	s.lastContactLock.RUnlock()
+	return last
+}
+
+// setLastContact sets the last contact to the current time.
+func (s *followerReplication) setLastContact() {
+	s.lastContactLock.Lock()
+	s.lastContact = time.Now()
+	s.lastContactLock.Unlock()
+}
+
+// replicate is a long running routine that is used to manage
+// the process of replicating logs to our followers.
+func (r *Raft) replicate(s *followerReplication) {
+	// Start an async heartbeating routing
+	stopHeartbeat := make(chan struct{})
+	defer close(stopHeartbeat)
+	r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
+
+RPC:
+	shouldStop := false
+	for !shouldStop {
+		select {
+		case maxIndex := <-s.stopCh:
+			// Make a best effort to replicate up to this index
+			if maxIndex > 0 {
+				r.replicateTo(s, maxIndex)
+			}
+			return
+		case <-s.triggerCh:
+			lastLogIdx, _ := r.getLastLog()
+			shouldStop = r.replicateTo(s, lastLogIdx)
+		case <-randomTimeout(r.conf.CommitTimeout):
+			lastLogIdx, _ := r.getLastLog()
+			shouldStop = r.replicateTo(s, lastLogIdx)
+		}
+
+		// If things looks healthy, switch to pipeline mode
+		if !shouldStop && s.allowPipeline {
+			goto PIPELINE
+		}
+	}
+	return
+
+PIPELINE:
+	// Disable until re-enabled
+	s.allowPipeline = false
+
+	// Replicates using a pipeline for high performance. This method
+	// is not able to gracefully recover from errors, and so we fall back
+	// to standard mode on failure.
+	if err := r.pipelineReplicate(s); err != nil {
+		if err != ErrPipelineReplicationNotSupported {
+			r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
+		}
+	}
+	goto RPC
+}
+
+// replicateTo is used to replicate the logs up to a given last index.
+// If the follower log is behind, we take care to bring them up to date.
+func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
+	// Create the base request
+	var req AppendEntriesRequest
+	var resp AppendEntriesResponse
+	var start time.Time
+START:
+	// Prevent an excessive retry rate on errors
+	if s.failures > 0 {
+		select {
+		case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
+		case <-r.shutdownCh:
+		}
+	}
+
+	// Setup the request
+	if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
+		goto SEND_SNAP
+	} else if err != nil {
+		return
+	}
+
+	// Make the RPC call
+	start = time.Now()
+	if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
+		s.failures++
+		return
+	}
+	appendStats(s.peer, start, float32(len(req.Entries)))
+
+	// Check for a newer term, stop running
+	if resp.Term > req.Term {
+		r.handleStaleTerm(s)
+		return true
+	}
+
+	// Update the last contact
+	s.setLastContact()
+
+	// Update s based on success
+	if resp.Success {
+		// Update our replication state
+		updateLastAppended(s, &req)
+
+		// Clear any failures, allow pipelining
+		s.failures = 0
+		s.allowPipeline = true
+	} else {
+		s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
+		s.matchIndex = s.nextIndex - 1
+		if resp.NoRetryBackoff {
+			s.failures = 0
+		} else {
+			s.failures++
+		}
+		r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
+	}
+
+CHECK_MORE:
+	// Check if there are more logs to replicate
+	if s.nextIndex <= lastIndex {
+		goto START
+	}
+	return
+
+	// SEND_SNAP is used when we fail to get a log, usually because the follower
+	// is too far behind, and we must ship a snapshot down instead
+SEND_SNAP:
+	if stop, err := r.sendLatestSnapshot(s); stop {
+		return true
+	} else if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
+		return
+	}
+
+	// Check if there is more to replicate
+	goto CHECK_MORE
+}
+
+// sendLatestSnapshot is used to send the latest snapshot we have
+// down to our follower.
+func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
+	// Get the snapshots
+	snapshots, err := r.snapshots.List()
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
+		return false, err
+	}
+
+	// Check we have at least a single snapshot
+	if len(snapshots) == 0 {
+		return false, fmt.Errorf("no snapshots found")
+	}
+
+	// Open the most recent snapshot
+	snapID := snapshots[0].ID
+	meta, snapshot, err := r.snapshots.Open(snapID)
+	if err != nil {
+		r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
+		return false, err
+	}
+	defer snapshot.Close()
+
+	// Setup the request
+	req := InstallSnapshotRequest{
+		Term:         s.currentTerm,
+		Leader:       r.trans.EncodePeer(r.localAddr),
+		LastLogIndex: meta.Index,
+		LastLogTerm:  meta.Term,
+		Peers:        meta.Peers,
+		Size:         meta.Size,
+	}
+
+	// Make the call
+	start := time.Now()
+	var resp InstallSnapshotResponse
+	if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
+		s.failures++
+		return false, err
+	}
+	metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start)
+
+	// Check for a newer term, stop running
+	if resp.Term > req.Term {
+		r.handleStaleTerm(s)
+		return true, nil
+	}
+
+	// Update the last contact
+	s.setLastContact()
+
+	// Check for success
+	if resp.Success {
+		// Mark any inflight logs as committed
+		s.inflight.CommitRange(s.matchIndex+1, meta.Index)
+
+		// Update the indexes
+		s.matchIndex = meta.Index
+		s.nextIndex = s.matchIndex + 1
+
+		// Clear any failures
+		s.failures = 0
+
+		// Notify we are still leader
+		s.notifyAll(true)
+	} else {
+		s.failures++
+		r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
+	}
+	return false, nil
+}
+
+// heartbeat is used to periodically invoke AppendEntries on a peer
+// to ensure they don't time out. This is done async of replicate(),
+// since that routine could potentially be blocked on disk IO.
+func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
+	var failures uint64
+	req := AppendEntriesRequest{
+		Term:   s.currentTerm,
+		Leader: r.trans.EncodePeer(r.localAddr),
+	}
+	var resp AppendEntriesResponse
+	for {
+		// Wait for the next heartbeat interval or forced notify
+		select {
+		case <-s.notifyCh:
+		case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
+		case <-stopCh:
+			return
+		}
+
+		start := time.Now()
+		if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err)
+			failures++
+			select {
+			case <-time.After(backoff(failureWait, failures, maxFailureScale)):
+			case <-stopCh:
+			}
+		} else {
+			s.setLastContact()
+			failures = 0
+			metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start)
+			s.notifyAll(resp.Success)
+		}
+	}
+}
+
+// pipelineReplicate is used when we have synchronized our state with the follower,
+// and want to switch to a higher performance pipeline mode of replication.
+// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
+// back to the standard replication which can handle more complex situations.
+func (r *Raft) pipelineReplicate(s *followerReplication) error {
+	// Create a new pipeline
+	pipeline, err := r.trans.AppendEntriesPipeline(s.peer)
+	if err != nil {
+		return err
+	}
+	defer pipeline.Close()
+
+	// Log start and stop of pipeline
+	r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
+	defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
+
+	// Create a shutdown and finish channel
+	stopCh := make(chan struct{})
+	finishCh := make(chan struct{})
+
+	// Start a dedicated decoder
+	r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
+
+	// Start pipeline sends at the last good nextIndex
+	nextIndex := s.nextIndex
+
+	shouldStop := false
+SEND:
+	for !shouldStop {
+		select {
+		case <-finishCh:
+			break SEND
+		case maxIndex := <-s.stopCh:
+			if maxIndex > 0 {
+				r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
+			}
+			break SEND
+		case <-s.triggerCh:
+			lastLogIdx, _ := r.getLastLog()
+			shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
+		case <-randomTimeout(r.conf.CommitTimeout):
+			lastLogIdx, _ := r.getLastLog()
+			shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
+		}
+	}
+
+	// Stop our decoder, and wait for it to finish
+	close(stopCh)
+	select {
+	case <-finishCh:
+	case <-r.shutdownCh:
+	}
+	return nil
+}
+
+// pipelineSend is used to send data over a pipeline.
+func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
+	// Create a new append request
+	req := new(AppendEntriesRequest)
+	if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
+		return true
+	}
+
+	// Pipeline the append entries
+	if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
+		r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
+		return true
+	}
+
+	// Increase the next send log to avoid re-sending old logs
+	if n := len(req.Entries); n > 0 {
+		last := req.Entries[n-1]
+		*nextIdx = last.Index + 1
+	}
+	return false
+}
+
+// pipelineDecode is used to decode the responses of pipelined requests.
+func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
+	defer close(finishCh)
+	respCh := p.Consumer()
+	for {
+		select {
+		case ready := <-respCh:
+			req, resp := ready.Request(), ready.Response()
+			appendStats(s.peer, ready.Start(), float32(len(req.Entries)))
+
+			// Check for a newer term, stop running
+			if resp.Term > req.Term {
+				r.handleStaleTerm(s)
+				return
+			}
+
+			// Update the last contact
+			s.setLastContact()
+
+			// Abort pipeline if not successful
+			if !resp.Success {
+				return
+			}
+
+			// Update our replication state
+			updateLastAppended(s, req)
+		case <-stopCh:
+			return
+		}
+	}
+}
+
+// setupAppendEntries is used to setup an append entries request.
+func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
+	req.Term = s.currentTerm
+	req.Leader = r.trans.EncodePeer(r.localAddr)
+	req.LeaderCommitIndex = r.getCommitIndex()
+	if err := r.setPreviousLog(req, nextIndex); err != nil {
+		return err
+	}
+	if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
+		return err
+	}
+	return nil
+}
+
+// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
+// AppendEntriesRequest given the next index to replicate.
+func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
+	// Guard for the first index, since there is no 0 log entry
+	// Guard against the previous index being a snapshot as well
+	lastSnapIdx, lastSnapTerm := r.getLastSnapshot()
+	if nextIndex == 1 {
+		req.PrevLogEntry = 0
+		req.PrevLogTerm = 0
+
+	} else if (nextIndex - 1) == lastSnapIdx {
+		req.PrevLogEntry = lastSnapIdx
+		req.PrevLogTerm = lastSnapTerm
+
+	} else {
+		var l Log
+		if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
+				nextIndex-1, err)
+			return err
+		}
+
+		// Set the previous index and term (0 if nextIndex is 1)
+		req.PrevLogEntry = l.Index
+		req.PrevLogTerm = l.Term
+	}
+	return nil
+}
+
+// setNewLogs is used to setup the logs which should be appended for a request.
+func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
+	// Append up to MaxAppendEntries or up to the lastIndex
+	req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
+	maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
+	for i := nextIndex; i <= maxIndex; i++ {
+		oldLog := new(Log)
+		if err := r.logs.GetLog(i, oldLog); err != nil {
+			r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
+			return err
+		}
+		req.Entries = append(req.Entries, oldLog)
+	}
+	return nil
+}
+
+// appendStats is used to emit stats about an AppendEntries invocation.
+func appendStats(peer string, start time.Time, logs float32) {
+	metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
+	metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
+}
+
+// handleStaleTerm is used when a follower indicates that we have a stale term.
+func (r *Raft) handleStaleTerm(s *followerReplication) {
+	r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
+	s.notifyAll(false) // No longer leader
+	asyncNotifyCh(s.stepDown)
+}
+
+// updateLastAppended is used to update follower replication state after a successful
+// AppendEntries RPC.
+func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
+	// Mark any inflight logs as committed
+	if logs := req.Entries; len(logs) > 0 {
+		first := logs[0]
+		last := logs[len(logs)-1]
+		s.inflight.CommitRange(first.Index, last.Index)
+
+		// Update the indexes
+		s.matchIndex = last.Index
+		s.nextIndex = last.Index + 1
+	}
+
+	// Notify still leader
+	s.notifyAll(true)
+}
--- a/go/vt/orchestrator/external/raft/snapshot.go
+++ b/go/vt/orchestrator/external/raft/snapshot.go
@ -0,0 +1,40 @@
+package raft
+
+import (
+	"io"
+)
+
+// SnapshotMeta is for metadata of a snapshot.
+type SnapshotMeta struct {
+	ID    string // ID is opaque to the store, and is used for opening
+	Index uint64
+	Term  uint64
+	Peers []byte
+	Size  int64
+}
+
+// SnapshotStore interface is used to allow for flexible implementations
+// of snapshot storage and retrieval. For example, a client could implement
+// a shared state store such as S3, allowing new nodes to restore snapshots
+// without streaming from the leader.
+type SnapshotStore interface {
+	// Create is used to begin a snapshot at a given index and term,
+	// with the current peer set already encoded.
+	Create(index, term uint64, peers []byte) (SnapshotSink, error)
+
+	// List is used to list the available snapshots in the store.
+	// It should return then in descending order, with the highest index first.
+	List() ([]*SnapshotMeta, error)
+
+	// Open takes a snapshot ID and provides a ReadCloser. Once close is
+	// called it is assumed the snapshot is no longer needed.
+	Open(id string) (*SnapshotMeta, io.ReadCloser, error)
+}
+
+// SnapshotSink is returned by StartSnapshot. The FSM will Write state
+// to the sink and call Close on completion. On error, Cancel will be invoked.
+type SnapshotSink interface {
+	io.WriteCloser
+	ID() string
+	Cancel() error
+}
--- a/go/vt/orchestrator/external/raft/stable.go
+++ b/go/vt/orchestrator/external/raft/stable.go
@ -0,0 +1,15 @@
+package raft
+
+// StableStore is used to provide stable storage
+// of key configurations to ensure safety.
+type StableStore interface {
+	Set(key []byte, val []byte) error
+
+	// Get returns the value for key, or an empty byte slice if key was not found.
+	Get(key []byte) ([]byte, error)
+
+	SetUint64(key []byte, val uint64) error
+
+	// GetUint64 returns the uint64 value for key, or 0 if key was not found.
+	GetUint64(key []byte) (uint64, error)
+}
--- a/go/vt/orchestrator/external/raft/state.go
+++ b/go/vt/orchestrator/external/raft/state.go
@ -0,0 +1,167 @@
+package raft
+
+import (
+	"sync"
+	"sync/atomic"
+)
+
+// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
+// or Shutdown.
+type RaftState uint32
+
+const (
+	// Follower is the initial state of a Raft node.
+	Follower RaftState = iota
+
+	// Candidate is one of the valid states of a Raft node.
+	Candidate
+
+	// Leader is one of the valid states of a Raft node.
+	Leader
+
+	// Shutdown is the terminal state of a Raft node.
+	Shutdown
+)
+
+func (s RaftState) String() string {
+	switch s {
+	case Follower:
+		return "Follower"
+	case Candidate:
+		return "Candidate"
+	case Leader:
+		return "Leader"
+	case Shutdown:
+		return "Shutdown"
+	default:
+		return "Unknown"
+	}
+}
+
+// raftState is used to maintain various state variables
+// and provides an interface to set/get the variables in a
+// thread safe manner.
+type raftState struct {
+	// The current term, cache of StableStore
+	currentTerm uint64
+
+	// Highest committed log entry
+	commitIndex uint64
+
+	// Last applied log to the FSM
+	lastApplied uint64
+
+	// protects 4 next fields
+	lastLock sync.Mutex
+
+	// Cache the latest snapshot index/term
+	lastSnapshotIndex uint64
+	lastSnapshotTerm  uint64
+
+	// Cache the latest log from LogStore
+	lastLogIndex uint64
+	lastLogTerm  uint64
+
+	// Tracks running goroutines
+	routinesGroup sync.WaitGroup
+
+	// The current state
+	state RaftState
+}
+
+func (r *raftState) getState() RaftState {
+	stateAddr := (*uint32)(&r.state)
+	return RaftState(atomic.LoadUint32(stateAddr))
+}
+
+func (r *raftState) setState(s RaftState) {
+	stateAddr := (*uint32)(&r.state)
+	atomic.StoreUint32(stateAddr, uint32(s))
+}
+
+func (r *raftState) getCurrentTerm() uint64 {
+	return atomic.LoadUint64(&r.currentTerm)
+}
+
+func (r *raftState) setCurrentTerm(term uint64) {
+	atomic.StoreUint64(&r.currentTerm, term)
+}
+
+func (r *raftState) getLastLog() (index, term uint64) {
+	r.lastLock.Lock()
+	index = r.lastLogIndex
+	term = r.lastLogTerm
+	r.lastLock.Unlock()
+	return
+}
+
+func (r *raftState) setLastLog(index, term uint64) {
+	r.lastLock.Lock()
+	r.lastLogIndex = index
+	r.lastLogTerm = term
+	r.lastLock.Unlock()
+}
+
+func (r *raftState) getLastSnapshot() (index, term uint64) {
+	r.lastLock.Lock()
+	index = r.lastSnapshotIndex
+	term = r.lastSnapshotTerm
+	r.lastLock.Unlock()
+	return
+}
+
+func (r *raftState) setLastSnapshot(index, term uint64) {
+	r.lastLock.Lock()
+	r.lastSnapshotIndex = index
+	r.lastSnapshotTerm = term
+	r.lastLock.Unlock()
+}
+
+func (r *raftState) getCommitIndex() uint64 {
+	return atomic.LoadUint64(&r.commitIndex)
+}
+
+func (r *raftState) setCommitIndex(index uint64) {
+	atomic.StoreUint64(&r.commitIndex, index)
+}
+
+func (r *raftState) getLastApplied() uint64 {
+	return atomic.LoadUint64(&r.lastApplied)
+}
+
+func (r *raftState) setLastApplied(index uint64) {
+	atomic.StoreUint64(&r.lastApplied, index)
+}
+
+// Start a goroutine and properly handle the race between a routine
+// starting and incrementing, and exiting and decrementing.
+func (r *raftState) goFunc(f func()) {
+	r.routinesGroup.Add(1)
+	go func() {
+		defer r.routinesGroup.Done()
+		f()
+	}()
+}
+
+func (r *raftState) waitShutdown() {
+	r.routinesGroup.Wait()
+}
+
+// getLastIndex returns the last index in stable storage.
+// Either from the last log or from the last snapshot.
+func (r *raftState) getLastIndex() uint64 {
+	r.lastLock.Lock()
+	defer r.lastLock.Unlock()
+	return max(r.lastLogIndex, r.lastSnapshotIndex)
+}
+
+// getLastEntry returns the last index and term in stable storage.
+// Either from the last log or from the last snapshot.
+func (r *raftState) getLastEntry() (uint64, uint64) {
+	r.lastLock.Lock()
+	defer r.lastLock.Unlock()
+	if r.lastLogIndex >= r.lastSnapshotIndex {
+		return r.lastLogIndex, r.lastLogTerm
+	}
+	return r.lastSnapshotIndex, r.lastSnapshotTerm
+}
--- a/go/vt/orchestrator/external/raft/tcp_transport.go
+++ b/go/vt/orchestrator/external/raft/tcp_transport.go
@ -0,0 +1,105 @@
+package raft
+
+import (
+	"errors"
+	"io"
+	"log"
+	"net"
+	"time"
+)
+
+var (
+	errNotAdvertisable = errors.New("local bind address is not advertisable")
+	errNotTCP          = errors.New("local address is not a TCP address")
+)
+
+// TCPStreamLayer implements StreamLayer interface for plain TCP.
+type TCPStreamLayer struct {
+	advertise net.Addr
+	listener  *net.TCPListener
+}
+
+// NewTCPTransport returns a NetworkTransport that is built on top of
+// a TCP streaming transport layer.
+func NewTCPTransport(
+	bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	logOutput io.Writer,
+) (*NetworkTransport, error) {
+	return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
+		return NewNetworkTransport(stream, maxPool, timeout, logOutput)
+	})
+}
+
+// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
+// a TCP streaming transport layer, with log output going to the supplied Logger
+func NewTCPTransportWithLogger(
+	bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	logger *log.Logger,
+) (*NetworkTransport, error) {
+	return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
+		return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
+	})
+}
+
+func newTCPTransport(bindAddr string,
+	advertise net.Addr,
+	maxPool int,
+	timeout time.Duration,
+	transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
+	// Try to bind
+	list, err := net.Listen("tcp", bindAddr)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create stream
+	stream := &TCPStreamLayer{
+		advertise: advertise,
+		listener:  list.(*net.TCPListener),
+	}
+
+	// Verify that we have a usable advertise address
+	addr, ok := stream.Addr().(*net.TCPAddr)
+	if !ok {
+		list.Close()
+		return nil, errNotTCP
+	}
+	if addr.IP.IsUnspecified() {
+		list.Close()
+		return nil, errNotAdvertisable
+	}
+
+	// Create the network transport
+	trans := transportCreator(stream)
+	return trans, nil
+}
+
+// Dial implements the StreamLayer interface.
+func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) {
+	return net.DialTimeout("tcp", address, timeout)
+}
+
+// Accept implements the net.Listener interface.
+func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
+	return t.listener.Accept()
+}
+
+// Close implements the net.Listener interface.
+func (t *TCPStreamLayer) Close() (err error) {
+	return t.listener.Close()
+}
+
+// Addr implements the net.Listener interface.
+func (t *TCPStreamLayer) Addr() net.Addr {
+	// Use an advertise addr if provided
+	if t.advertise != nil {
+		return t.advertise
+	}
+	return t.listener.Addr()
+}
--- a/go/vt/orchestrator/external/raft/tcp_transport_test.go
+++ b/go/vt/orchestrator/external/raft/tcp_transport_test.go
@ -0,0 +1,24 @@
+package raft
+
+import (
+	"net"
+	"testing"
+)
+
+func TestTCPTransport_BadAddr(t *testing.T) {
+	_, err := NewTCPTransportWithLogger("0.0.0.0:0", nil, 1, 0, newTestLogger(t))
+	if err != errNotAdvertisable {
+		t.Fatalf("err: %v", err)
+	}
+}
+
+func TestTCPTransport_WithAdvertise(t *testing.T) {
+	addr := &net.TCPAddr{IP: []byte{127, 0, 0, 1}, Port: 12345}
+	trans, err := NewTCPTransportWithLogger("0.0.0.0:0", addr, 1, 0, newTestLogger(t))
+	if err != nil {
+		t.Fatalf("err: %v", err)
+	}
+	if trans.LocalAddr() != "127.0.0.1:12345" {
+		t.Fatalf("bad: %v", trans.LocalAddr())
+	}
+}
--- a/go/vt/orchestrator/external/raft/transport.go
+++ b/go/vt/orchestrator/external/raft/transport.go
@ -0,0 +1,124 @@
+package raft
+
+import (
+	"io"
+	"time"
+)
+
+// RPCResponse captures both a response and a potential error.
+type RPCResponse struct {
+	Response interface{}
+	Error    error
+}
+
+// RPC has a command, and provides a response mechanism.
+type RPC struct {
+	Command  interface{}
+	Reader   io.Reader // Set only for InstallSnapshot
+	RespChan chan<- RPCResponse
+}
+
+// Respond is used to respond with a response, error or both
+func (r *RPC) Respond(resp interface{}, err error) {
+	r.RespChan <- RPCResponse{resp, err}
+}
+
+// Transport provides an interface for network transports
+// to allow Raft to communicate with other nodes.
+type Transport interface {
+	// Consumer returns a channel that can be used to
+	// consume and respond to RPC requests.
+	Consumer() <-chan RPC
+
+	// LocalAddr is used to return our local address to distinguish from our peers.
+	LocalAddr() string
+
+	// AppendEntriesPipeline returns an interface that can be used to pipeline
+	// AppendEntries requests.
+	AppendEntriesPipeline(target string) (AppendPipeline, error)
+
+	// AppendEntries sends the appropriate RPC to the target node.
+	AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
+
+	// RequestVote sends the appropriate RPC to the target node.
+	RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error
+
+	// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
+	// the ReadCloser and streamed to the client.
+	InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
+
+	// EncodePeer is used to serialize a peer name.
+	EncodePeer(string) []byte
+
+	// DecodePeer is used to deserialize a peer name.
+	DecodePeer([]byte) string
+
+	// SetHeartbeatHandler is used to setup a heartbeat handler
+	// as a fast-pass. This is to avoid head-of-line blocking from
+	// disk IO. If a Transport does not support this, it can simply
+	// ignore the call, and push the heartbeat onto the Consumer channel.
+	SetHeartbeatHandler(cb func(rpc RPC))
+}
+
+// WithClose is an interface that a transport may provide which
+// allows a transport to be shut down cleanly when a Raft instance
+// shuts down.
+//
+// It is defined separately from Transport as unfortunately it wasn't in the
+// original interface specification.
+type WithClose interface {
+	// Close permanently closes a transport, stopping
+	// any associated goroutines and freeing other resources.
+	Close() error
+}
+
+// LoopbackTransport is an interface that provides a loopback transport suitable for testing
+// e.g. InmemTransport. It's there so we don't have to rewrite tests.
+type LoopbackTransport interface {
+	Transport // Embedded transport reference
+	WithPeers // Embedded peer management
+	WithClose // with a close routine
+}
+
+// WithPeers is an interface that a transport may provide which allows for connection and
+// disconnection. Unless the transport is a loopback transport, the transport specified to
+// "Connect" is likely to be nil.
+type WithPeers interface {
+	Connect(peer string, t Transport) // Connect a peer
+	Disconnect(peer string)           // Disconnect a given peer
+	DisconnectAll()                   // Disconnect all peers, possibly to reconnect them later
+}
+
+// AppendPipeline is used for pipelining AppendEntries requests. It is used
+// to increase the replication throughput by masking latency and better
+// utilizing bandwidth.
+type AppendPipeline interface {
+	// AppendEntries is used to add another request to the pipeline.
+	// The send may block which is an effective form of back-pressure.
+	AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
+
+	// Consumer returns a channel that can be used to consume
+	// response futures when they are ready.
+	Consumer() <-chan AppendFuture
+
+	// Close closes the pipeline and cancels all inflight RPCs
+	Close() error
+}
+
+// AppendFuture is used to return information about a pipelined AppendEntries request.
+type AppendFuture interface {
+	Future
+
+	// Start returns the time that the append request was started.
+	// It is always OK to call this method.
+	Start() time.Time
+
+	// Request holds the parameters of the AppendEntries call.
+	// It is always OK to call this method.
+	Request() *AppendEntriesRequest
+
+	// Response holds the results of the AppendEntries call.
+	// This method must only be called after the Error
+	// method returns, and will only be valid on success.
+	Response() *AppendEntriesResponse
+}
--- a/go/vt/orchestrator/external/raft/transport_test.go
+++ b/go/vt/orchestrator/external/raft/transport_test.go
@ -0,0 +1,313 @@
+package raft
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+	"time"
+)
+
+const (
+	TT_Inmem = iota
+
+	// NOTE: must be last
+	numTestTransports
+)
+
+func NewTestTransport(ttype int, addr string) (string, LoopbackTransport) {
+	switch ttype {
+	case TT_Inmem:
+		addr, lt := NewInmemTransport(addr)
+		return addr, lt
+	default:
+		panic("Unknown transport type")
+	}
+}
+
+func TestTransport_StartStop(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		_, trans := NewTestTransport(ttype, "")
+		if err := trans.Close(); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+	}
+}
+
+func TestTransport_AppendEntries(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		addr1, trans1 := NewTestTransport(ttype, "")
+		defer trans1.Close()
+		rpcCh := trans1.Consumer()
+
+		// Make the RPC request
+		args := AppendEntriesRequest{
+			Term:         10,
+			Leader:       []byte("cartman"),
+			PrevLogEntry: 100,
+			PrevLogTerm:  4,
+			Entries: []*Log{
+				{
+					Index: 101,
+					Term:  4,
+					Type:  LogNoop,
+				},
+			},
+			LeaderCommitIndex: 90,
+		}
+		resp := AppendEntriesResponse{
+			Term:    4,
+			LastLog: 90,
+			Success: true,
+		}
+
+		// Listen for a request
+		go func() {
+			select {
+			case rpc := <-rpcCh:
+				// Verify the command
+				req := rpc.Command.(*AppendEntriesRequest)
+				if !reflect.DeepEqual(req, &args) {
+					t.Fatalf("command mismatch: %#v %#v", *req, args)
+				}
+				rpc.Respond(&resp, nil)
+
+			case <-time.After(200 * time.Millisecond):
+				t.Fatalf("timeout")
+			}
+		}()
+
+		// Transport 2 makes outbound request
+		addr2, trans2 := NewTestTransport(ttype, "")
+		defer trans2.Close()
+
+		trans1.Connect(addr2, trans2)
+		trans2.Connect(addr1, trans1)
+
+		var out AppendEntriesResponse
+		if err := trans2.AppendEntries(trans1.LocalAddr(), &args, &out); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+
+		// Verify the response
+		if !reflect.DeepEqual(resp, out) {
+			t.Fatalf("command mismatch: %#v %#v", resp, out)
+		}
+	}
+}
+
+func TestTransport_AppendEntriesPipeline(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		addr1, trans1 := NewTestTransport(ttype, "")
+		defer trans1.Close()
+		rpcCh := trans1.Consumer()
+
+		// Make the RPC request
+		args := AppendEntriesRequest{
+			Term:         10,
+			Leader:       []byte("cartman"),
+			PrevLogEntry: 100,
+			PrevLogTerm:  4,
+			Entries: []*Log{
+				{
+					Index: 101,
+					Term:  4,
+					Type:  LogNoop,
+				},
+			},
+			LeaderCommitIndex: 90,
+		}
+		resp := AppendEntriesResponse{
+			Term:    4,
+			LastLog: 90,
+			Success: true,
+		}
+
+		// Listen for a request
+		go func() {
+			for i := 0; i < 10; i++ {
+				select {
+				case rpc := <-rpcCh:
+					// Verify the command
+					req := rpc.Command.(*AppendEntriesRequest)
+					if !reflect.DeepEqual(req, &args) {
+						t.Fatalf("command mismatch: %#v %#v", *req, args)
+					}
+					rpc.Respond(&resp, nil)
+
+				case <-time.After(200 * time.Millisecond):
+					t.Fatalf("timeout")
+				}
+			}
+		}()
+
+		// Transport 2 makes outbound request
+		addr2, trans2 := NewTestTransport(ttype, "")
+		defer trans2.Close()
+
+		trans1.Connect(addr2, trans2)
+		trans2.Connect(addr1, trans1)
+
+		pipeline, err := trans2.AppendEntriesPipeline(trans1.LocalAddr())
+		if err != nil {
+			t.Fatalf("err: %v", err)
+		}
+		defer pipeline.Close()
+		for i := 0; i < 10; i++ {
+			out := new(AppendEntriesResponse)
+			if _, err := pipeline.AppendEntries(&args, out); err != nil {
+				t.Fatalf("err: %v", err)
+			}
+		}
+
+		respCh := pipeline.Consumer()
+		for i := 0; i < 10; i++ {
+			select {
+			case ready := <-respCh:
+				// Verify the response
+				if !reflect.DeepEqual(&resp, ready.Response()) {
+					t.Fatalf("command mismatch: %#v %#v", &resp, ready.Response())
+				}
+			case <-time.After(200 * time.Millisecond):
+				t.Fatalf("timeout")
+			}
+		}
+	}
+}
+
+func TestTransport_RequestVote(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		addr1, trans1 := NewTestTransport(ttype, "")
+		defer trans1.Close()
+		rpcCh := trans1.Consumer()
+
+		// Make the RPC request
+		args := RequestVoteRequest{
+			Term:         20,
+			Candidate:    []byte("butters"),
+			LastLogIndex: 100,
+			LastLogTerm:  19,
+		}
+		resp := RequestVoteResponse{
+			Term:    100,
+			Peers:   []byte("blah"),
+			Granted: false,
+		}
+
+		// Listen for a request
+		go func() {
+			select {
+			case rpc := <-rpcCh:
+				// Verify the command
+				req := rpc.Command.(*RequestVoteRequest)
+				if !reflect.DeepEqual(req, &args) {
+					t.Fatalf("command mismatch: %#v %#v", *req, args)
+				}
+
+				rpc.Respond(&resp, nil)
+
+			case <-time.After(200 * time.Millisecond):
+				t.Fatalf("timeout")
+			}
+		}()
+
+		// Transport 2 makes outbound request
+		addr2, trans2 := NewTestTransport(ttype, "")
+		defer trans2.Close()
+
+		trans1.Connect(addr2, trans2)
+		trans2.Connect(addr1, trans1)
+
+		var out RequestVoteResponse
+		if err := trans2.RequestVote(trans1.LocalAddr(), &args, &out); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+
+		// Verify the response
+		if !reflect.DeepEqual(resp, out) {
+			t.Fatalf("command mismatch: %#v %#v", resp, out)
+		}
+	}
+}
+
+func TestTransport_InstallSnapshot(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		addr1, trans1 := NewTestTransport(ttype, "")
+		defer trans1.Close()
+		rpcCh := trans1.Consumer()
+
+		// Make the RPC request
+		args := InstallSnapshotRequest{
+			Term:         10,
+			Leader:       []byte("kyle"),
+			LastLogIndex: 100,
+			LastLogTerm:  9,
+			Peers:        []byte("blah blah"),
+			Size:         10,
+		}
+		resp := InstallSnapshotResponse{
+			Term:    10,
+			Success: true,
+		}
+
+		// Listen for a request
+		go func() {
+			select {
+			case rpc := <-rpcCh:
+				// Verify the command
+				req := rpc.Command.(*InstallSnapshotRequest)
+				if !reflect.DeepEqual(req, &args) {
+					t.Fatalf("command mismatch: %#v %#v", *req, args)
+				}
+
+				// Try to read the bytes
+				buf := make([]byte, 10)
+				rpc.Reader.Read(buf)
+
+				// Compare
+				if bytes.Compare(buf, []byte("0123456789")) != 0 {
+					t.Fatalf("bad buf %v", buf)
+				}
+
+				rpc.Respond(&resp, nil)
+
+			case <-time.After(200 * time.Millisecond):
+				t.Fatalf("timeout")
+			}
+		}()
+
+		// Transport 2 makes outbound request
+		addr2, trans2 := NewTestTransport(ttype, "")
+		defer trans2.Close()
+
+		trans1.Connect(addr2, trans2)
+		trans2.Connect(addr1, trans1)
+
+		// Create a buffer
+		buf := bytes.NewBuffer([]byte("0123456789"))
+
+		var out InstallSnapshotResponse
+		if err := trans2.InstallSnapshot(trans1.LocalAddr(), &args, &out, buf); err != nil {
+			t.Fatalf("err: %v", err)
+		}
+
+		// Verify the response
+		if !reflect.DeepEqual(resp, out) {
+			t.Fatalf("command mismatch: %#v %#v", resp, out)
+		}
+	}
+}
+
+func TestTransport_EncodeDecode(t *testing.T) {
+	for ttype := 0; ttype < numTestTransports; ttype++ {
+		_, trans1 := NewTestTransport(ttype, "")
+		defer trans1.Close()
+
+		local := trans1.LocalAddr()
+		enc := trans1.EncodePeer(local)
+		dec := trans1.DecodePeer(enc)
+
+		if dec != local {
+			t.Fatalf("enc/dec fail: %v %v", dec, local)
+		}
+	}
+}
--- a/go/vt/orchestrator/external/raft/util.go
+++ b/go/vt/orchestrator/external/raft/util.go
@ -0,0 +1,179 @@
+package raft
+
+import (
+	"bytes"
+	crand "crypto/rand"
+	"fmt"
+	"math"
+	"math/big"
+	"math/rand"
+	"time"
+
+	"github.com/hashicorp/go-msgpack/codec"
+)
+
+func init() {
+	// Ensure we use a high-entropy seed for the psuedo-random generator
+	rand.Seed(newSeed())
+}
+
+// returns an int64 from a crypto random source
+// can be used to seed a source for a math/rand.
+func newSeed() int64 {
+	r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
+	if err != nil {
+		panic(fmt.Errorf("failed to read random bytes: %v", err))
+	}
+	return r.Int64()
+}
+
+// randomTimeout returns a value that is between the minVal and 2x minVal.
+func randomTimeout(minVal time.Duration) <-chan time.Time {
+	if minVal == 0 {
+		return nil
+	}
+	extra := (time.Duration(rand.Int63()) % minVal)
+	return time.After(minVal + extra)
+}
+
+// min returns the minimum.
+func min(a, b uint64) uint64 {
+	if a <= b {
+		return a
+	}
+	return b
+}
+
+// max returns the maximum.
+func max(a, b uint64) uint64 {
+	if a >= b {
+		return a
+	}
+	return b
+}
+
+// generateUUID is used to generate a random UUID.
+func generateUUID() string {
+	buf := make([]byte, 16)
+	if _, err := crand.Read(buf); err != nil {
+		panic(fmt.Errorf("failed to read random bytes: %v", err))
+	}
+
+	return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
+		buf[0:4],
+		buf[4:6],
+		buf[6:8],
+		buf[8:10],
+		buf[10:16])
+}
+
+// asyncNotifyCh is used to do an async channel send
+// to a single channel without blocking.
+func asyncNotifyCh(ch chan struct{}) {
+	select {
+	case ch <- struct{}{}:
+	default:
+	}
+}
+
+// asyncNotifyBool is used to do an async notification
+// on a bool channel.
+func asyncNotifyBool(ch chan bool, v bool) {
+	select {
+	case ch <- v:
+	default:
+	}
+}
+
+// ExcludePeer is used to exclude a single peer from a list of peers.
+func ExcludePeer(peers []string, peer string) []string {
+	otherPeers := make([]string, 0, len(peers))
+	for _, p := range peers {
+		if p != peer {
+			otherPeers = append(otherPeers, p)
+		}
+	}
+	return otherPeers
+}
+
+// PeerContained checks if a given peer is contained in a list.
+func PeerContained(peers []string, peer string) bool {
+	for _, p := range peers {
+		if p == peer {
+			return true
+		}
+	}
+	return false
+}
+
+// AddUniquePeer is used to add a peer to a list of existing
+// peers only if it is not already contained.
+func AddUniquePeer(peers []string, peer string) []string {
+	if PeerContained(peers, peer) {
+		return peers
+	}
+	return append(peers, peer)
+}
+
+// encodePeers is used to serialize a list of peers.
+func encodePeers(peers []string, trans Transport) []byte {
+	// Encode each peer
+	var encPeers [][]byte
+	for _, p := range peers {
+		encPeers = append(encPeers, trans.EncodePeer(p))
+	}
+
+	// Encode the entire array
+	buf, err := encodeMsgPack(encPeers)
+	if err != nil {
+		panic(fmt.Errorf("failed to encode peers: %v", err))
+	}
+
+	return buf.Bytes()
+}
+
+// decodePeers is used to deserialize a list of peers.
+func decodePeers(buf []byte, trans Transport) []string {
+	// Decode the buffer first
+	var encPeers [][]byte
+	if err := decodeMsgPack(buf, &encPeers); err != nil {
+		panic(fmt.Errorf("failed to decode peers: %v", err))
+	}
+
+	// Deserialize each peer
+	var peers []string
+	for _, enc := range encPeers {
+		peers = append(peers, trans.DecodePeer(enc))
+	}
+
+	return peers
+}
+
+// Decode reverses the encode operation on a byte slice input.
+func decodeMsgPack(buf []byte, out interface{}) error {
+	r := bytes.NewBuffer(buf)
+	hd := codec.MsgpackHandle{}
+	dec := codec.NewDecoder(r, &hd)
+	return dec.Decode(out)
+}
+
+// Encode writes an encoded object to a new bytes buffer.
+func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
+	buf := bytes.NewBuffer(nil)
+	hd := codec.MsgpackHandle{}
+	enc := codec.NewEncoder(buf, &hd)
+	err := enc.Encode(in)
+	return buf, err
+}
+
+// backoff is used to compute an exponential backoff
+// duration. Base time is scaled by the current round,
+// up to some maximum scale factor.
+func backoff(base time.Duration, round, limit uint64) time.Duration {
+	power := min(round, limit)
+	for power > 2 {
+		base *= 2
+		power--
+	}
+	return base
+}
--- a/go/vt/orchestrator/external/raft/util_test.go
+++ b/go/vt/orchestrator/external/raft/util_test.go
@ -0,0 +1,152 @@
+package raft
+
+import (
+	"reflect"
+	"regexp"
+	"testing"
+	"time"
+)
+
+func TestRandomTimeout(t *testing.T) {
+	start := time.Now()
+	timeout := randomTimeout(time.Millisecond)
+
+	select {
+	case <-timeout:
+		diff := time.Now().Sub(start)
+		if diff < time.Millisecond {
+			t.Fatalf("fired early")
+		}
+	case <-time.After(3 * time.Millisecond):
+		t.Fatalf("timeout")
+	}
+}
+
+func TestNewSeed(t *testing.T) {
+	vals := make(map[int64]bool)
+	for i := 0; i < 1000; i++ {
+		seed := newSeed()
+		if _, exists := vals[seed]; exists {
+			t.Fatal("newSeed() return a value it'd previously returned")
+		}
+		vals[seed] = true
+	}
+}
+
+func TestRandomTimeout_NoTime(t *testing.T) {
+	timeout := randomTimeout(0)
+	if timeout != nil {
+		t.Fatalf("expected nil channel")
+	}
+}
+
+func TestMin(t *testing.T) {
+	if min(1, 1) != 1 {
+		t.Fatalf("bad min")
+	}
+	if min(2, 1) != 1 {
+		t.Fatalf("bad min")
+	}
+	if min(1, 2) != 1 {
+		t.Fatalf("bad min")
+	}
+}
+
+func TestMax(t *testing.T) {
+	if max(1, 1) != 1 {
+		t.Fatalf("bad max")
+	}
+	if max(2, 1) != 2 {
+		t.Fatalf("bad max")
+	}
+	if max(1, 2) != 2 {
+		t.Fatalf("bad max")
+	}
+}
+
+func TestGenerateUUID(t *testing.T) {
+	prev := generateUUID()
+	for i := 0; i < 100; i++ {
+		id := generateUUID()
+		if prev == id {
+			t.Fatalf("Should get a new ID!")
+		}
+
+		matched, err := regexp.MatchString(
+			`[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}`, id)
+		if !matched || err != nil {
+			t.Fatalf("expected match %s %v %s", id, matched, err)
+		}
+	}
+}
+
+func TestExcludePeer(t *testing.T) {
+	peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
+	peer := peers[2]
+
+	after := ExcludePeer(peers, peer)
+	if len(after) != 2 {
+		t.Fatalf("Bad length")
+	}
+	if after[0] == peer || after[1] == peer {
+		t.Fatalf("should not contain peer")
+	}
+}
+
+func TestPeerContained(t *testing.T) {
+	peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
+
+	if !PeerContained(peers, peers[2]) {
+		t.Fatalf("Expect contained")
+	}
+	if PeerContained(peers, NewInmemAddr()) {
+		t.Fatalf("unexpected contained")
+	}
+}
+
+func TestAddUniquePeer(t *testing.T) {
+	peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
+	after := AddUniquePeer(peers, peers[2])
+	if !reflect.DeepEqual(after, peers) {
+		t.Fatalf("unexpected append")
+	}
+	after = AddUniquePeer(peers, NewInmemAddr())
+	if len(after) != 4 {
+		t.Fatalf("expected append")
+	}
+}
+
+func TestEncodeDecodePeers(t *testing.T) {
+	peers := []string{NewInmemAddr(), NewInmemAddr(), NewInmemAddr()}
+	_, trans := NewInmemTransport("")
+
+	// Try to encode/decode
+	buf := encodePeers(peers, trans)
+	decoded := decodePeers(buf, trans)
+
+	if !reflect.DeepEqual(peers, decoded) {
+		t.Fatalf("mismatch %v %v", peers, decoded)
+	}
+}
+
+func TestBackoff(t *testing.T) {
+	b := backoff(10*time.Millisecond, 1, 8)
+	if b != 10*time.Millisecond {
+		t.Fatalf("bad: %v", b)
+	}
+
+	b = backoff(20*time.Millisecond, 2, 8)
+	if b != 20*time.Millisecond {
+		t.Fatalf("bad: %v", b)
+	}
+
+	b = backoff(10*time.Millisecond, 8, 8)
+	if b != 640*time.Millisecond {
+		t.Fatalf("bad: %v", b)
+	}
+
+	b = backoff(10*time.Millisecond, 9, 8)
+	if b != 640*time.Millisecond {
+		t.Fatalf("bad: %v", b)
+	}
+}
--- a/go/vt/orchestrator/external/zk/zk.go
+++ b/go/vt/orchestrator/external/zk/zk.go
@ -0,0 +1,404 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// zk provides with higher level commands over the lower level zookeeper connector
+package zk
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"math"
+	gopath "path"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/samuel/go-zookeeper/zk"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+type ZooKeeper struct {
+	servers        []string
+	authScheme     string
+	authExpression []byte
+
+	// We assume complete access to all
+	flags int32
+	acl   []zk.ACL
+}
+
+func NewZooKeeper() *ZooKeeper {
+	return &ZooKeeper{
+		flags: int32(0),
+		acl:   zk.WorldACL(zk.PermAll),
+	}
+}
+
+// SetServers sets the list of servers for the zookeeper client to connect to.
+// Each element in the array should be in either of following forms:
+// - "servername"
+// - "servername:port"
+func (zook *ZooKeeper) SetServers(serversArray []string) {
+	zook.servers = serversArray
+}
+
+func (zook *ZooKeeper) SetAuth(scheme string, auth []byte) {
+	log.Debug("Setting Auth ")
+	zook.authScheme = scheme
+	zook.authExpression = auth
+}
+
+// Returns acls
+func (zook *ZooKeeper) BuildACL(authScheme string, user string, pwd string, acls string) (perms []zk.ACL, err error) {
+	aclsList := strings.Split(acls, ",")
+	for _, elem := range aclsList {
+		acl, err := strconv.ParseInt(elem, 10, 32)
+		if err != nil {
+			break
+		}
+		perm := zk.DigestACL(int32(acl), user, pwd)
+		perms = append(perms, perm[0])
+	}
+	return perms, err
+}
+
+type infoLogger struct{}
+
+func (_ infoLogger) Printf(format string, a ...interface{}) {
+	log.Infof(format, a...)
+}
+
+// connect
+func (zook *ZooKeeper) connect() (*zk.Conn, error) {
+	zk.DefaultLogger = &infoLogger{}
+	conn, _, err := zk.Connect(zook.servers, time.Second)
+	if err == nil && zook.authScheme != "" {
+		log.Debugf("Add Auth %s %s", zook.authScheme, zook.authExpression)
+		err = conn.AddAuth(zook.authScheme, zook.authExpression)
+	}
+
+	return conn, err
+}
+
+// Exists returns true when the given path exists
+func (zook *ZooKeeper) Exists(path string) (bool, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return false, err
+	}
+	defer connection.Close()
+
+	exists, _, err := connection.Exists(path)
+	return exists, err
+}
+
+// Get returns value associated with given path, or error if path does not exist
+func (zook *ZooKeeper) Get(path string) ([]byte, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return []byte{}, err
+	}
+	defer connection.Close()
+
+	data, _, err := connection.Get(path)
+	return data, err
+}
+
+func (zook *ZooKeeper) GetACL(path string) (data []string, err error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return nil, err
+	}
+	defer connection.Close()
+
+	perms, _, err := connection.GetACL(path)
+	return zook.aclsToString(perms), err
+}
+
+func (zook *ZooKeeper) aclsToString(acls []zk.ACL) (result []string) {
+	for _, acl := range acls {
+		var buffer bytes.Buffer
+
+		buffer.WriteString(fmt.Sprintf("%v:%v:", acl.Scheme, acl.ID))
+
+		if acl.Perms&zk.PermCreate != 0 {
+			buffer.WriteString("c")
+		}
+		if acl.Perms&zk.PermDelete != 0 {
+			buffer.WriteString("d")
+		}
+		if acl.Perms&zk.PermRead != 0 {
+			buffer.WriteString("r")
+		}
+		if acl.Perms&zk.PermWrite != 0 {
+			buffer.WriteString("w")
+		}
+		if acl.Perms&zk.PermAdmin != 0 {
+			buffer.WriteString("a")
+		}
+		result = append(result, buffer.String())
+	}
+	return result
+}
+
+// Children returns sub-paths of given path, optionally empty array, or error if path does not exist
+func (zook *ZooKeeper) Children(path string) ([]string, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return []string{}, err
+	}
+	defer connection.Close()
+
+	children, _, err := connection.Children(path)
+	return children, err
+}
+
+// childrenRecursiveInternal: internal implementation of recursive-children query.
+func (zook *ZooKeeper) childrenRecursiveInternal(connection *zk.Conn, path string, incrementalPath string) ([]string, error) {
+	children, _, err := connection.Children(path)
+	if err != nil {
+		return children, err
+	}
+	sort.Sort(sort.StringSlice(children))
+	recursiveChildren := []string{}
+	for _, child := range children {
+		incrementalChild := gopath.Join(incrementalPath, child)
+		recursiveChildren = append(recursiveChildren, incrementalChild)
+		log.Debugf("incremental child: %+v", incrementalChild)
+		incrementalChildren, err := zook.childrenRecursiveInternal(connection, gopath.Join(path, child), incrementalChild)
+		if err != nil {
+			return children, err
+		}
+		recursiveChildren = append(recursiveChildren, incrementalChildren...)
+	}
+	return recursiveChildren, err
+}
+
+// ChildrenRecursive returns list of all descendants of given path (optionally empty), or error if the path
+// does not exist.
+// Every element in result list is a relative subpath for the given path.
+func (zook *ZooKeeper) ChildrenRecursive(path string) ([]string, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return []string{}, err
+	}
+	defer connection.Close()
+
+	result, err := zook.childrenRecursiveInternal(connection, path, "")
+	return result, err
+}
+
+// createInternal: create a new path
+func (zook *ZooKeeper) createInternal(connection *zk.Conn, path string, data []byte, acl []zk.ACL, force bool) (string, error) {
+	if path == "/" {
+		return "/", nil
+	}
+
+	log.Debugf("creating: %s", path)
+	attempts := 0
+	for {
+		attempts += 1
+		returnValue, err := connection.Create(path, data, zook.flags, zook.acl)
+		log.Debugf("create status for %s: %s, %+v", path, returnValue, err)
+
+		if err != nil && force && attempts < 2 {
+			parentPath := gopath.Dir(path)
+			if parentPath == path {
+				return returnValue, err
+			}
+			returnValue, err = zook.createInternal(connection, parentPath, []byte("zookeepercli auto-generated"), acl, force)
+		} else {
+			return returnValue, err
+		}
+	}
+	return "", nil
+}
+
+// createInternalWithACL: create a new path with acl
+func (zook *ZooKeeper) createInternalWithACL(connection *zk.Conn, path string, data []byte, force bool, perms []zk.ACL) (string, error) {
+	if path == "/" {
+		return "/", nil
+	}
+	log.Debugf("creating: %s with acl ", path)
+	attempts := 0
+	for {
+		attempts += 1
+		returnValue, err := connection.Create(path, data, zook.flags, perms)
+		log.Debugf("create status for %s: %s, %+v", path, returnValue, err)
+		if err != nil && force && attempts < 2 {
+			returnValue, err = zook.createInternalWithACL(connection, gopath.Dir(path), []byte("zookeepercli auto-generated"), force, perms)
+		} else {
+			return returnValue, err
+		}
+	}
+	return "", nil
+}
+
+// Create will create a new path, or exit with error should the path exist.
+// The "force" param controls the behavior when path's parent directory does not exist.
+// When "force" is false, the function returns with error/ When "force" is true, it recursively
+// attempts to create required parent directories.
+func (zook *ZooKeeper) Create(path string, data []byte, aclstr string, force bool) (string, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return "", err
+	}
+	defer connection.Close()
+
+	if len(aclstr) > 0 {
+		zook.acl, err = zook.parseACLString(aclstr)
+		if err != nil {
+			return "", err
+		}
+	}
+
+	return zook.createInternal(connection, path, data, zook.acl, force)
+}
+
+func (zook *ZooKeeper) CreateWithACL(path string, data []byte, force bool, perms []zk.ACL) (string, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return "", err
+	}
+	defer connection.Close()
+
+	return zook.createInternalWithACL(connection, path, data, force, perms)
+}
+
+// Set updates a value for a given path, or returns with error if the path does not exist
+func (zook *ZooKeeper) Set(path string, data []byte) (*zk.Stat, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return nil, err
+	}
+	defer connection.Close()
+
+	return connection.Set(path, data, -1)
+}
+
+// updates the ACL on a given path
+func (zook *ZooKeeper) SetACL(path string, aclstr string, force bool) (string, error) {
+	connection, err := zook.connect()
+	if err != nil {
+		return "", err
+	}
+	defer connection.Close()
+
+	acl, err := zook.parseACLString(aclstr)
+	if err != nil {
+		return "", err
+	}
+
+	if force {
+		exists, _, err := connection.Exists(path)
+		if err != nil {
+			return "", err
+		}
+
+		if !exists {
+			return zook.createInternal(connection, path, []byte(""), acl, force)
+		}
+	}
+
+	_, err = connection.SetACL(path, acl, -1)
+	return path, err
+}
+
+func (zook *ZooKeeper) parseACLString(aclstr string) (acl []zk.ACL, err error) {
+	aclsList := strings.Split(aclstr, ",")
+	for _, entry := range aclsList {
+		parts := strings.Split(entry, ":")
+		var scheme, id string
+		var perms int32
+		if len(parts) > 3 && parts[0] == "digest" {
+			scheme = parts[0]
+			id = fmt.Sprintf("%s:%s", parts[1], parts[2])
+			perms, err = zook.parsePermsString(parts[3])
+		} else {
+			scheme, id = parts[0], parts[1]
+			perms, err = zook.parsePermsString(parts[2])
+		}
+
+		if err == nil {
+			perm := zk.ACL{Scheme: scheme, ID: id, Perms: perms}
+			acl = append(acl, perm)
+		}
+	}
+	return acl, err
+}
+
+func (zook *ZooKeeper) parsePermsString(permstr string) (perms int32, err error) {
+	if x, e := strconv.ParseFloat(permstr, 64); e == nil {
+		perms = int32(math.Min(x, 31))
+	} else {
+		for _, rune := range strings.Split(permstr, "") {
+			switch rune {
+			case "r":
+				perms |= zk.PermRead
+				break
+			case "w":
+				perms |= zk.PermWrite
+				break
+			case "c":
+				perms |= zk.PermCreate
+				break
+			case "d":
+				perms |= zk.PermDelete
+				break
+			case "a":
+				perms |= zk.PermAdmin
+				break
+			default:
+				err = errors.New("invalid ACL string specified")
+			}
+
+			if err != nil {
+				break
+			}
+		}
+	}
+	return perms, err
+}
+
+// Delete removes a path entry. It exits with error if the path does not exist, or has subdirectories.
+func (zook *ZooKeeper) Delete(path string) error {
+	connection, err := zook.connect()
+	if err != nil {
+		return err
+	}
+	defer connection.Close()
+
+	return connection.Delete(path, -1)
+}
+
+// Delete recursive if has subdirectories.
+func (zook *ZooKeeper) DeleteRecursive(path string) error {
+	result, err := zook.ChildrenRecursive(path)
+	if err != nil {
+		log.Fatale(err)
+	}
+
+	for i := len(result) - 1; i >= 0; i-- {
+		znode := path + "/" + result[i]
+		if err = zook.Delete(znode); err != nil {
+			log.Fatale(err)
+		}
+	}
+
+	return zook.Delete(path)
+}
--- a/go/vt/orchestrator/external/zk/zk_test.go
+++ b/go/vt/orchestrator/external/zk/zk_test.go
@ -0,0 +1,76 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// zk provides with higher level commands over the lower level zookeeper connector
+package zk
+
+import (
+	"testing"
+
+	"github.com/samuel/go-zookeeper/zk"
+)
+
+func TestParseACLString(t *testing.T) {
+	cases := []struct {
+		aclstr string
+		want   []zk.ACL
+	}{
+		{"world:anyone:cdrwa", []zk.ACL{{Scheme: "world", ID: "anyone", Perms: 31}}},
+		{"world:anyone:rw", []zk.ACL{{Scheme: "world", ID: "anyone", Perms: 3}}},
+		{"world:anyone:3", []zk.ACL{{Scheme: "world", ID: "anyone", Perms: 3}}},
+		{"host:example.com:cdrw", []zk.ACL{{Scheme: "host", ID: "example.com", Perms: 15}}},
+		{"ip:10.2.1.15/32:cdrwa", []zk.ACL{{Scheme: "ip", ID: "10.2.1.15/32", Perms: 31}}},
+		{"digest:username:pwhash:cd", []zk.ACL{{Scheme: "digest", ID: "username:pwhash", Perms: 12}}},
+		{"auth::cdrwa", []zk.ACL{{Scheme: "auth", ID: "", Perms: 31}}},
+	}
+
+	for _, c := range cases {
+		zook := NewZooKeeper()
+		got, _ := zook.parseACLString(c.aclstr)
+		if !aclsEqual(got, c.want) {
+			t.Errorf("parseACLString(%q) == %q, want %q", c.aclstr, got, c.want)
+		}
+	}
+}
+
+func TestParseInvalidACLString(t *testing.T) {
+	aclstr := "world:anyone:rwb"
+	want := "invalid ACL string specified"
+
+	zook := NewZooKeeper()
+	_, err := zook.parseACLString(aclstr)
+
+	if err == nil {
+		t.Error("No error returned")
+	} else {
+		if err.Error() != want {
+			t.Errorf("parseACLString(%q) error %q, want %q", aclstr, err.Error(), want)
+		}
+	}
+}
+
+func aclsEqual(a, b []zk.ACL) bool {
+	if len(a) != len(b) {
+		return false
+	}
+
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
--- a/go/vt/orchestrator/http/agents_api.go
+++ b/go/vt/orchestrator/http/agents_api.go
@ -0,0 +1,133 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package http
+
+import (
+	"fmt"
+	"net/http"
+	"strconv"
+	"strings"
+
+	"github.com/go-martini/martini"
+	"github.com/martini-contrib/render"
+
+	"vitess.io/vitess/go/vt/orchestrator/agent"
+	"vitess.io/vitess/go/vt/orchestrator/attributes"
+)
+
+type HttpAgentsAPI struct {
+	URLPrefix string
+}
+
+var AgentsAPI HttpAgentsAPI = HttpAgentsAPI{}
+
+// SubmitAgent registeres an agent. It is initiated by an agent to register itself.
+func (this *HttpAgentsAPI) SubmitAgent(params martini.Params, r render.Render) {
+	port, err := strconv.Atoi(params["port"])
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()})
+		return
+	}
+
+	output, err := agent.SubmitAgent(params["host"], port, params["token"])
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()})
+		return
+	}
+	r.JSON(200, output)
+}
+
+// SetHostAttribute is a utility method that allows per-host key-value store.
+func (this *HttpAgentsAPI) SetHostAttribute(params martini.Params, r render.Render, req *http.Request) {
+	err := attributes.SetHostAttributes(params["host"], params["attrVame"], params["attrValue"])
+
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return
+	}
+
+	r.JSON(200, (err == nil))
+}
+
+// GetHostAttributeByAttributeName returns a host attribute
+func (this *HttpAgentsAPI) GetHostAttributeByAttributeName(params martini.Params, r render.Render, req *http.Request) {
+
+	output, err := attributes.GetHostAttributesByAttribute(params["attr"], req.URL.Query().Get("valueMatch"))
+
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return
+	}
+
+	r.JSON(200, output)
+}
+
+// AgentsHosts provides list of agent host names
+func (this *HttpAgentsAPI) AgentsHosts(params martini.Params, r render.Render, req *http.Request) string {
+	agents, err := agent.ReadAgents()
+	hostnames := []string{}
+	for _, agent := range agents {
+		hostnames = append(hostnames, agent.Hostname)
+	}
+
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return ""
+	}
+
+	if req.URL.Query().Get("format") == "txt" {
+		return strings.Join(hostnames, "\n")
+	} else {
+		r.JSON(200, hostnames)
+	}
+	return ""
+}
+
+// AgentsInstances provides list of assumed MySQL instances (host:port)
+func (this *HttpAgentsAPI) AgentsInstances(params martini.Params, r render.Render, req *http.Request) string {
+	agents, err := agent.ReadAgents()
+	hostnames := []string{}
+	for _, agent := range agents {
+		hostnames = append(hostnames, fmt.Sprintf("%s:%d", agent.Hostname, agent.MySQLPort))
+	}
+
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return ""
+	}
+
+	if req.URL.Query().Get("format") == "txt" {
+		return strings.Join(hostnames, "\n")
+	} else {
+		r.JSON(200, hostnames)
+	}
+	return ""
+}
+
+func (this *HttpAgentsAPI) AgentPing(params martini.Params, r render.Render, req *http.Request) {
+	r.JSON(200, "OK")
+}
+
+// RegisterRequests makes for the de-facto list of known API calls
+func (this *HttpAgentsAPI) RegisterRequests(m *martini.ClassicMartini) {
+	m.Get(this.URLPrefix+"/api/submit-agent/:host/:port/:token", this.SubmitAgent)
+	m.Get(this.URLPrefix+"/api/host-attribute/:host/:attrVame/:attrValue", this.SetHostAttribute)
+	m.Get(this.URLPrefix+"/api/host-attribute/attr/:attr/", this.GetHostAttributeByAttributeName)
+	m.Get(this.URLPrefix+"/api/agents-hosts", this.AgentsHosts)
+	m.Get(this.URLPrefix+"/api/agents-instances", this.AgentsInstances)
+	m.Get(this.URLPrefix+"/api/agent-ping", this.AgentPing)
+}
--- a/go/vt/orchestrator/http/api.go
+++ b/go/vt/orchestrator/http/api.go
--- a/go/vt/orchestrator/http/api_test.go
+++ b/go/vt/orchestrator/http/api_test.go
@ -0,0 +1,55 @@
+package http
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/go-martini/martini"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+func init() {
+	config.Config.HostnameResolveMethod = "none"
+	config.MarkConfigurationLoaded()
+	log.SetLevel(log.ERROR)
+}
+
+func TestGetSynonymPath(t *testing.T) {
+	api := HttpAPI{}
+
+	{
+		path := "relocate-slaves"
+		synonym := api.getSynonymPath(path)
+		test.S(t).ExpectEquals(synonym, "relocate-replicas")
+	}
+	{
+		path := "relocate-slaves/:host/:port"
+		synonym := api.getSynonymPath(path)
+		test.S(t).ExpectEquals(synonym, "relocate-replicas/:host/:port")
+	}
+}
+
+func TestKnownPaths(t *testing.T) {
+	m := martini.Classic()
+	api := HttpAPI{}
+
+	api.RegisterRequests(m)
+
+	pathsMap := make(map[string]bool)
+	for _, path := range registeredPaths {
+		pathBase := strings.Split(path, "/")[0]
+		pathsMap[pathBase] = true
+	}
+	test.S(t).ExpectTrue(pathsMap["health"])
+	test.S(t).ExpectTrue(pathsMap["lb-check"])
+	test.S(t).ExpectTrue(pathsMap["relocate"])
+	test.S(t).ExpectTrue(pathsMap["relocate-slaves"])
+
+	for path, synonym := range apiSynonyms {
+		test.S(t).ExpectTrue(pathsMap[path])
+		test.S(t).ExpectTrue(pathsMap[synonym])
+	}
+}
--- a/go/vt/orchestrator/http/httpbase.go
+++ b/go/vt/orchestrator/http/httpbase.go
@ -0,0 +1,176 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package http
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/martini-contrib/auth"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+	"vitess.io/vitess/go/vt/orchestrator/os"
+	"vitess.io/vitess/go/vt/orchestrator/process"
+	orcraft "vitess.io/vitess/go/vt/orchestrator/raft"
+)
+
+func getProxyAuthUser(req *http.Request) string {
+	for _, user := range req.Header[config.Config.AuthUserHeader] {
+		return user
+	}
+	return ""
+}
+
+// isAuthorizedForAction checks req to see whether authenticated user has write-privileges.
+// This depends on configured authentication method.
+func isAuthorizedForAction(req *http.Request, user auth.User) bool {
+	if config.Config.ReadOnly {
+		return false
+	}
+
+	if orcraft.IsRaftEnabled() && !orcraft.IsLeader() {
+		// A raft member that is not a leader is unauthorized.
+		return false
+	}
+
+	switch strings.ToLower(config.Config.AuthenticationMethod) {
+	case "basic":
+		{
+			// The mere fact we're here means the user has passed authentication
+			return true
+		}
+	case "multi":
+		{
+			if string(user) == "readonly" {
+				// read only
+				return false
+			}
+			// passed authentication ==> writeable
+			return true
+		}
+	case "proxy":
+		{
+			authUser := getProxyAuthUser(req)
+			for _, configPowerAuthUser := range config.Config.PowerAuthUsers {
+				if configPowerAuthUser == "*" || configPowerAuthUser == authUser {
+					return true
+				}
+			}
+			// check the user's group is one of those listed here
+			if len(config.Config.PowerAuthGroups) > 0 && os.UserInGroups(authUser, config.Config.PowerAuthGroups) {
+				return true
+			}
+			return false
+		}
+	case "token":
+		{
+			cookie, err := req.Cookie("access-token")
+			if err != nil {
+				return false
+			}
+
+			publicToken := strings.Split(cookie.Value, ":")[0]
+			secretToken := strings.Split(cookie.Value, ":")[1]
+			result, _ := process.TokenIsValid(publicToken, secretToken)
+			return result
+		}
+	case "oauth":
+		{
+			return false
+		}
+	default:
+		{
+			// Default: no authentication method
+			return true
+		}
+	}
+}
+
+func authenticateToken(publicToken string, resp http.ResponseWriter) error {
+	secretToken, err := process.AcquireAccessToken(publicToken)
+	if err != nil {
+		return err
+	}
+	cookieValue := fmt.Sprintf("%s:%s", publicToken, secretToken)
+	cookie := &http.Cookie{Name: "access-token", Value: cookieValue, Path: "/"}
+	http.SetCookie(resp, cookie)
+	return nil
+}
+
+// getUserId returns the authenticated user id, if available, depending on authertication method.
+func getUserId(req *http.Request, user auth.User) string {
+	if config.Config.ReadOnly {
+		return ""
+	}
+
+	switch strings.ToLower(config.Config.AuthenticationMethod) {
+	case "basic":
+		{
+			return string(user)
+		}
+	case "multi":
+		{
+			return string(user)
+		}
+	case "proxy":
+		{
+			return getProxyAuthUser(req)
+		}
+	case "token":
+		{
+			return ""
+		}
+	default:
+		{
+			return ""
+		}
+	}
+}
+
+func getClusterHint(params map[string]string) string {
+	if params["clusterHint"] != "" {
+		return params["clusterHint"]
+	}
+	if params["clusterName"] != "" {
+		return params["clusterName"]
+	}
+	if params["host"] != "" && params["port"] != "" {
+		return fmt.Sprintf("%s:%s", params["host"], params["port"])
+	}
+	return ""
+}
+
+// figureClusterName is a convenience function to get a cluster name from hints
+func figureClusterName(hint string) (clusterName string, err error) {
+	if hint == "" {
+		return "", fmt.Errorf("Unable to determine cluster name by empty hint")
+	}
+	instanceKey, _ := inst.ParseRawInstanceKey(hint)
+	return inst.FigureClusterName(hint, instanceKey, nil)
+}
+
+// getClusterNameIfExists returns a cluster name by params hint, or an empty cluster name
+// if no hint is given
+func getClusterNameIfExists(params map[string]string) (clusterName string, err error) {
+	if clusterHint := getClusterHint(params); clusterHint == "" {
+		return "", nil
+	} else {
+		return figureClusterName(clusterHint)
+	}
+}
--- a/go/vt/orchestrator/http/raft_reverse_proxy.go
+++ b/go/vt/orchestrator/http/raft_reverse_proxy.go
@ -0,0 +1,48 @@
+package http
+
+import (
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"strings"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	orcraft "vitess.io/vitess/go/vt/orchestrator/raft"
+
+	"github.com/go-martini/martini"
+	"vitess.io/vitess/go/vt/orchestrator/config"
+)
+
+func raftReverseProxy(w http.ResponseWriter, r *http.Request, c martini.Context) {
+	if !orcraft.IsRaftEnabled() {
+		// No raft, so no reverse proxy to the leader
+		return
+	}
+	if orcraft.IsLeader() {
+		// I am the leader. I will handle the request directly.
+		return
+	}
+	if orcraft.GetLeader() == "" {
+		return
+	}
+	if orcraft.LeaderURI.IsThisLeaderURI() {
+		// Although I'm not the leader, the value I see for LeaderURI is my own.
+		// I'm probably not up-to-date with my raft transaction log and don't have the latest information.
+		// But anyway, obviously not going to redirect to myself.
+		// Gonna return: this isn't ideal, because I'm not really the leader. If the user tries to
+		// run an operation they'll fail.
+		return
+	}
+	url, err := url.Parse(orcraft.LeaderURI.Get())
+	if err != nil {
+		log.Errore(err)
+		return
+	}
+	r.Header.Del("Accept-Encoding")
+	switch strings.ToLower(config.Config.AuthenticationMethod) {
+	case "basic", "multi":
+		r.SetBasicAuth(config.Config.HTTPAuthUser, config.Config.HTTPAuthPassword)
+	}
+	proxy := httputil.NewSingleHostReverseProxy(url)
+	proxy.ServeHTTP(w, r)
+}
--- a/go/vt/orchestrator/http/web.go
+++ b/go/vt/orchestrator/http/web.go
@ -0,0 +1,476 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package http
+
+import (
+	"expvar"
+	"fmt"
+	"net/http"
+	"net/http/pprof"
+	"strconv"
+	"text/template"
+
+	"github.com/go-martini/martini"
+	"github.com/martini-contrib/auth"
+	"github.com/martini-contrib/render"
+	"github.com/rcrowley/go-metrics"
+	"github.com/rcrowley/go-metrics/exp"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/inst"
+)
+
+// HttpWeb is the web requests server, mapping each request to a web page
+type HttpWeb struct {
+	URLPrefix string
+}
+
+var Web HttpWeb = HttpWeb{}
+
+func (this *HttpWeb) getInstanceKey(host string, port string) (inst.InstanceKey, error) {
+	instanceKey := inst.InstanceKey{Hostname: host}
+	var err error
+
+	if instanceKey.Port, err = strconv.Atoi(port); err != nil {
+		return instanceKey, fmt.Errorf("Invalid port: %s", port)
+	}
+	return instanceKey, err
+}
+
+func (this *HttpWeb) AccessToken(params martini.Params, r render.Render, req *http.Request, resp http.ResponseWriter, user auth.User) {
+	publicToken := template.JSEscapeString(req.URL.Query().Get("publicToken"))
+	err := authenticateToken(publicToken, resp)
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return
+	}
+	r.Redirect(this.URLPrefix + "/")
+}
+
+func (this *HttpWeb) Index(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	// Redirect index so that all web URLs begin with "/web/".
+	// We also redirect /web/ to /web/clusters so that
+	// the Clusters page has a single canonical URL.
+	r.Redirect(this.URLPrefix + "/web/clusters")
+}
+
+func (this *HttpWeb) Clusters(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/clusters", map[string]interface{}{
+		"agentsHttpActive":              config.Config.ServeAgentsHttp,
+		"title":                         "clusters",
+		"autoshow_problems":             false,
+		"authorizedForAction":           isAuthorizedForAction(req, user),
+		"userId":                        getUserId(req, user),
+		"removeTextFromHostnameDisplay": config.Config.RemoveTextFromHostnameDisplay,
+		"prefix":                        this.URLPrefix,
+		"webMessage":                    config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) ClustersAnalysis(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/clusters_analysis", map[string]interface{}{
+		"agentsHttpActive":              config.Config.ServeAgentsHttp,
+		"title":                         "clusters",
+		"autoshow_problems":             false,
+		"authorizedForAction":           isAuthorizedForAction(req, user),
+		"userId":                        getUserId(req, user),
+		"removeTextFromHostnameDisplay": config.Config.RemoveTextFromHostnameDisplay,
+		"prefix":                        this.URLPrefix,
+		"webMessage":                    config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Cluster(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	clusterName, _ := figureClusterName(params["clusterName"])
+
+	r.HTML(200, "templates/cluster", map[string]interface{}{
+		"agentsHttpActive":              config.Config.ServeAgentsHttp,
+		"title":                         "cluster",
+		"clusterName":                   clusterName,
+		"autoshow_problems":             true,
+		"contextMenuVisible":            true,
+		"pseudoGTIDModeEnabled":         (config.Config.PseudoGTIDPattern != ""),
+		"authorizedForAction":           isAuthorizedForAction(req, user),
+		"userId":                        getUserId(req, user),
+		"removeTextFromHostnameDisplay": config.Config.RemoveTextFromHostnameDisplay,
+		"compactDisplay":                template.JSEscapeString(req.URL.Query().Get("compact")),
+		"prefix":                        this.URLPrefix,
+		"webMessage":                    config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) ClusterByAlias(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	clusterName, err := inst.GetClusterByAlias(params["clusterAlias"])
+	// Willing to accept the case of multiple clusters; we just present one
+	if clusterName == "" && err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return
+	}
+
+	params["clusterName"] = clusterName
+	this.Cluster(params, r, req, user)
+}
+
+func (this *HttpWeb) ClusterByInstance(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	instanceKey, err := this.getInstanceKey(params["host"], params["port"])
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()})
+		return
+	}
+	instance, found, err := inst.ReadInstance(&instanceKey)
+	if (!found) || (err != nil) {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("Cannot read instance: %+v", instanceKey)})
+		return
+	}
+
+	// Willing to accept the case of multiple clusters; we just present one
+	if instance.ClusterName == "" && err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("%+v", err)})
+		return
+	}
+
+	params["clusterName"] = instance.ClusterName
+	this.Cluster(params, r, req, user)
+}
+
+func (this *HttpWeb) ClusterPools(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	clusterName, _ := figureClusterName(params["clusterName"])
+	r.HTML(200, "templates/cluster_pools", map[string]interface{}{
+		"agentsHttpActive":              config.Config.ServeAgentsHttp,
+		"title":                         "cluster pools",
+		"clusterName":                   clusterName,
+		"autoshow_problems":             false, // because pool screen by default expands all hosts
+		"contextMenuVisible":            true,
+		"pseudoGTIDModeEnabled":         (config.Config.PseudoGTIDPattern != ""),
+		"authorizedForAction":           isAuthorizedForAction(req, user),
+		"userId":                        getUserId(req, user),
+		"removeTextFromHostnameDisplay": config.Config.RemoveTextFromHostnameDisplay,
+		"compactDisplay":                template.JSEscapeString(req.URL.Query().Get("compact")),
+		"prefix":                        this.URLPrefix,
+		"webMessage":                    config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Search(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	searchString := params["searchString"]
+	if searchString == "" {
+		searchString = req.URL.Query().Get("s")
+	}
+	searchString = template.JSEscapeString(searchString)
+	r.HTML(200, "templates/search", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "search",
+		"searchString":        searchString,
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Discover(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/discover", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "discover",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Audit(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	page, err := strconv.Atoi(params["page"])
+	if err != nil {
+		page = 0
+	}
+
+	r.HTML(200, "templates/audit", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "audit",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"page":                page,
+		"auditHostname":       params["host"],
+		"auditPort":           params["port"],
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) AuditRecovery(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	page, err := strconv.Atoi(params["page"])
+	if err != nil {
+		page = 0
+	}
+	recoveryId, err := strconv.ParseInt(params["id"], 10, 0)
+	if err != nil {
+		recoveryId = 0
+	}
+	recoveryUid := params["uid"]
+	clusterAlias := params["clusterAlias"]
+
+	clusterName, _ := figureClusterName(params["clusterName"])
+	r.HTML(200, "templates/audit_recovery", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "audit-recovery",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"page":                page,
+		"clusterName":         clusterName,
+		"clusterAlias":        clusterAlias,
+		"recoveryId":          recoveryId,
+		"recoveryUid":         recoveryUid,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) AuditFailureDetection(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	page, err := strconv.Atoi(params["page"])
+	if err != nil {
+		page = 0
+	}
+	detectionId, err := strconv.ParseInt(params["id"], 10, 0)
+	if err != nil {
+		detectionId = 0
+	}
+	clusterAlias := params["clusterAlias"]
+
+	r.HTML(200, "templates/audit_failure_detection", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "audit-failure-detection",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"page":                page,
+		"detectionId":         detectionId,
+		"clusterAlias":        clusterAlias,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Agents(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/agents", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "agents",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Agent(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/agent", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "agent",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"agentHost":           params["host"],
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) AgentSeedDetails(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/agent_seed_details", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "agent seed details",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"seedId":              params["seedId"],
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Seeds(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	r.HTML(200, "templates/seeds", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "seeds",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Home(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/home", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "home",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) About(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/about", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "about",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) KeepCalm(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/keep-calm", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "Keep Calm",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) FAQ(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/faq", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "FAQ",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) Status(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+
+	r.HTML(200, "templates/status", map[string]interface{}{
+		"agentsHttpActive":    config.Config.ServeAgentsHttp,
+		"title":               "status",
+		"authorizedForAction": isAuthorizedForAction(req, user),
+		"userId":              getUserId(req, user),
+		"autoshow_problems":   false,
+		"prefix":              this.URLPrefix,
+		"webMessage":          config.Config.WebMessage,
+	})
+}
+
+func (this *HttpWeb) registerWebRequest(m *martini.ClassicMartini, path string, handler martini.Handler) {
+	fullPath := fmt.Sprintf("%s/web/%s", this.URLPrefix, path)
+	if path == "/" {
+		fullPath = fmt.Sprintf("%s/", this.URLPrefix)
+	}
+
+	if config.Config.RaftEnabled {
+		m.Get(fullPath, raftReverseProxy, handler)
+	} else {
+		m.Get(fullPath, handler)
+	}
+}
+
+// RegisterRequests makes for the de-facto list of known Web calls
+func (this *HttpWeb) RegisterRequests(m *martini.ClassicMartini) {
+	this.registerWebRequest(m, "access-token", this.AccessToken)
+	this.registerWebRequest(m, "", this.Index)
+	this.registerWebRequest(m, "/", this.Index)
+	this.registerWebRequest(m, "home", this.About)
+	this.registerWebRequest(m, "about", this.About)
+	this.registerWebRequest(m, "keep-calm", this.KeepCalm)
+	this.registerWebRequest(m, "faq", this.FAQ)
+	this.registerWebRequest(m, "status", this.Status)
+	this.registerWebRequest(m, "clusters", this.Clusters)
+	this.registerWebRequest(m, "clusters-analysis", this.ClustersAnalysis)
+	this.registerWebRequest(m, "cluster/:clusterName", this.Cluster)
+	this.registerWebRequest(m, "cluster/alias/:clusterAlias", this.ClusterByAlias)
+	this.registerWebRequest(m, "cluster/instance/:host/:port", this.ClusterByInstance)
+	this.registerWebRequest(m, "cluster-pools/:clusterName", this.ClusterPools)
+	this.registerWebRequest(m, "search/:searchString", this.Search)
+	this.registerWebRequest(m, "search", this.Search)
+	this.registerWebRequest(m, "discover", this.Discover)
+	this.registerWebRequest(m, "audit", this.Audit)
+	this.registerWebRequest(m, "audit/:page", this.Audit)
+	this.registerWebRequest(m, "audit/instance/:host/:port", this.Audit)
+	this.registerWebRequest(m, "audit/instance/:host/:port/:page", this.Audit)
+	this.registerWebRequest(m, "audit-recovery", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/:page", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/id/:id", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/uid/:uid", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/cluster/:clusterName", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/cluster/:clusterName/:page", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/alias/:clusterAlias", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-recovery/alias/:clusterAlias/:page", this.AuditRecovery)
+	this.registerWebRequest(m, "audit-failure-detection", this.AuditFailureDetection)
+	this.registerWebRequest(m, "audit-failure-detection/:page", this.AuditFailureDetection)
+	this.registerWebRequest(m, "audit-failure-detection/id/:id", this.AuditFailureDetection)
+	this.registerWebRequest(m, "audit-failure-detection/alias/:clusterAlias", this.AuditFailureDetection)
+	this.registerWebRequest(m, "audit-failure-detection/alias/:clusterAlias/:page", this.AuditFailureDetection)
+	this.registerWebRequest(m, "audit-recovery-steps/:uid", this.AuditRecovery)
+	this.registerWebRequest(m, "agents", this.Agents)
+	this.registerWebRequest(m, "agent/:host", this.Agent)
+	this.registerWebRequest(m, "seed-details/:seedId", this.AgentSeedDetails)
+	this.registerWebRequest(m, "seeds", this.Seeds)
+
+	this.RegisterDebug(m)
+}
+
+// RegisterDebug adds handlers for /debug/vars (expvar) and /debug/pprof (net/http/pprof) support
+func (this *HttpWeb) RegisterDebug(m *martini.ClassicMartini) {
+	m.Get(this.URLPrefix+"/debug/vars", func(w http.ResponseWriter, r *http.Request) {
+		// from expvar.go, since the expvarHandler isn't exported :(
+		w.Header().Set("Content-Type", "application/json; charset=utf-8")
+		fmt.Fprintf(w, "{\n")
+		first := true
+		expvar.Do(func(kv expvar.KeyValue) {
+			if !first {
+				fmt.Fprintf(w, ",\n")
+			}
+			first = false
+			fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value)
+		})
+		fmt.Fprintf(w, "\n}\n")
+	})
+
+	// list all the /debug/ endpoints we want
+	m.Get(this.URLPrefix+"/debug/pprof", pprof.Index)
+	m.Get(this.URLPrefix+"/debug/pprof/cmdline", pprof.Cmdline)
+	m.Get(this.URLPrefix+"/debug/pprof/profile", pprof.Profile)
+	m.Get(this.URLPrefix+"/debug/pprof/symbol", pprof.Symbol)
+	m.Post(this.URLPrefix+"/debug/pprof/symbol", pprof.Symbol)
+	m.Get(this.URLPrefix+"/debug/pprof/block", pprof.Handler("block").ServeHTTP)
+	m.Get(this.URLPrefix+"/debug/pprof/heap", pprof.Handler("heap").ServeHTTP)
+	m.Get(this.URLPrefix+"/debug/pprof/goroutine", pprof.Handler("goroutine").ServeHTTP)
+	m.Get(this.URLPrefix+"/debug/pprof/threadcreate", pprof.Handler("threadcreate").ServeHTTP)
+
+	// go-metrics
+	m.Get(this.URLPrefix+"/debug/metrics", exp.ExpHandler(metrics.DefaultRegistry))
+}
--- a/go/vt/orchestrator/inst/analysis.go
+++ b/go/vt/orchestrator/inst/analysis.go
@ -0,0 +1,226 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+)
+
+type AnalysisCode string
+type StructureAnalysisCode string
+
+const (
+	NoProblem                                               AnalysisCode = "NoProblem"
+	DeadMasterWithoutReplicas                                            = "DeadMasterWithoutReplicas"
+	DeadMaster                                                           = "DeadMaster"
+	DeadMasterAndReplicas                                                = "DeadMasterAndReplicas"
+	DeadMasterAndSomeReplicas                                            = "DeadMasterAndSomeReplicas"
+	UnreachableMasterWithLaggingReplicas                                 = "UnreachableMasterWithLaggingReplicas"
+	UnreachableMaster                                                    = "UnreachableMaster"
+	MasterSingleReplicaNotReplicating                                    = "MasterSingleReplicaNotReplicating"
+	MasterSingleReplicaDead                                              = "MasterSingleReplicaDead"
+	AllMasterReplicasNotReplicating                                      = "AllMasterReplicasNotReplicating"
+	AllMasterReplicasNotReplicatingOrDead                                = "AllMasterReplicasNotReplicatingOrDead"
+	LockedSemiSyncMasterHypothesis                                       = "LockedSemiSyncMasterHypothesis"
+	LockedSemiSyncMaster                                                 = "LockedSemiSyncMaster"
+	MasterWithoutReplicas                                                = "MasterWithoutReplicas"
+	DeadCoMaster                                                         = "DeadCoMaster"
+	DeadCoMasterAndSomeReplicas                                          = "DeadCoMasterAndSomeReplicas"
+	UnreachableCoMaster                                                  = "UnreachableCoMaster"
+	AllCoMasterReplicasNotReplicating                                    = "AllCoMasterReplicasNotReplicating"
+	DeadIntermediateMaster                                               = "DeadIntermediateMaster"
+	DeadIntermediateMasterWithSingleReplica                              = "DeadIntermediateMasterWithSingleReplica"
+	DeadIntermediateMasterWithSingleReplicaFailingToConnect              = "DeadIntermediateMasterWithSingleReplicaFailingToConnect"
+	DeadIntermediateMasterAndSomeReplicas                                = "DeadIntermediateMasterAndSomeReplicas"
+	DeadIntermediateMasterAndReplicas                                    = "DeadIntermediateMasterAndReplicas"
+	UnreachableIntermediateMasterWithLaggingReplicas                     = "UnreachableIntermediateMasterWithLaggingReplicas"
+	UnreachableIntermediateMaster                                        = "UnreachableIntermediateMaster"
+	AllIntermediateMasterReplicasFailingToConnectOrDead                  = "AllIntermediateMasterReplicasFailingToConnectOrDead"
+	AllIntermediateMasterReplicasNotReplicating                          = "AllIntermediateMasterReplicasNotReplicating"
+	FirstTierReplicaFailingToConnectToMaster                             = "FirstTierReplicaFailingToConnectToMaster"
+	BinlogServerFailingToConnectToMaster                                 = "BinlogServerFailingToConnectToMaster"
+)
+
+const (
+	StatementAndMixedLoggingReplicasStructureWarning     StructureAnalysisCode = "StatementAndMixedLoggingReplicasStructureWarning"
+	StatementAndRowLoggingReplicasStructureWarning                             = "StatementAndRowLoggingReplicasStructureWarning"
+	MixedAndRowLoggingReplicasStructureWarning                                 = "MixedAndRowLoggingReplicasStructureWarning"
+	MultipleMajorVersionsLoggingReplicasStructureWarning                       = "MultipleMajorVersionsLoggingReplicasStructureWarning"
+	NoLoggingReplicasStructureWarning                                          = "NoLoggingReplicasStructureWarning"
+	DifferentGTIDModesStructureWarning                                         = "DifferentGTIDModesStructureWarning"
+	ErrantGTIDStructureWarning                                                 = "ErrantGTIDStructureWarning"
+	NoFailoverSupportStructureWarning                                          = "NoFailoverSupportStructureWarning"
+	NoWriteableMasterStructureWarning                                          = "NoWriteableMasterStructureWarning"
+	NotEnoughValidSemiSyncReplicasStructureWarning                             = "NotEnoughValidSemiSyncReplicasStructureWarning"
+)
+
+type InstanceAnalysis struct {
+	key      *InstanceKey
+	analysis AnalysisCode
+}
+
+func NewInstanceAnalysis(instanceKey *InstanceKey, analysis AnalysisCode) *InstanceAnalysis {
+	return &InstanceAnalysis{
+		key:      instanceKey,
+		analysis: analysis,
+	}
+}
+
+func (instanceAnalysis *InstanceAnalysis) String() string {
+	return fmt.Sprintf("%s/%s", instanceAnalysis.key.StringCode(), string(instanceAnalysis.analysis))
+}
+
+// PeerAnalysisMap indicates the number of peers agreeing on an analysis.
+// Key of this map is a InstanceAnalysis.String()
+type PeerAnalysisMap map[string]int
+
+type ReplicationAnalysisHints struct {
+	IncludeDowntimed bool
+	IncludeNoProblem bool
+	AuditAnalysis    bool
+}
+
+const (
+	ForceMasterFailoverCommandHint    string = "force-master-failover"
+	ForceMasterTakeoverCommandHint    string = "force-master-takeover"
+	GracefulMasterTakeoverCommandHint string = "graceful-master-takeover"
+)
+
+type AnalysisInstanceType string
+
+const (
+	AnalysisInstanceTypeMaster             AnalysisInstanceType = "master"
+	AnalysisInstanceTypeCoMaster           AnalysisInstanceType = "co-master"
+	AnalysisInstanceTypeIntermediateMaster AnalysisInstanceType = "intermediate-master"
+)
+
+// ReplicationAnalysis notes analysis on replication chain status, per instance
+type ReplicationAnalysis struct {
+	AnalyzedInstanceKey                       InstanceKey
+	AnalyzedInstanceMasterKey                 InstanceKey
+	ClusterDetails                            ClusterInfo
+	AnalyzedInstanceDataCenter                string
+	AnalyzedInstanceRegion                    string
+	AnalyzedInstancePhysicalEnvironment       string
+	AnalyzedInstanceBinlogCoordinates         BinlogCoordinates
+	IsMaster                                  bool
+	IsCoMaster                                bool
+	LastCheckValid                            bool
+	LastCheckPartialSuccess                   bool
+	CountReplicas                             uint
+	CountValidReplicas                        uint
+	CountValidReplicatingReplicas             uint
+	CountReplicasFailingToConnectToMaster     uint
+	CountDowntimedReplicas                    uint
+	ReplicationDepth                          uint
+	Replicas                                  InstanceKeyMap
+	SlaveHosts                                InstanceKeyMap // for backwards compatibility. Equals `Replicas`
+	IsFailingToConnectToMaster                bool
+	Analysis                                  AnalysisCode
+	Description                               string
+	StructureAnalysis                         []StructureAnalysisCode
+	IsDowntimed                               bool
+	IsReplicasDowntimed                       bool // as good as downtimed because all replicas are downtimed AND analysis is all about the replicas (e.e. AllMasterReplicasNotReplicating)
+	DowntimeEndTimestamp                      string
+	DowntimeRemainingSeconds                  int
+	IsBinlogServer                            bool
+	PseudoGTIDImmediateTopology               bool
+	OracleGTIDImmediateTopology               bool
+	MariaDBGTIDImmediateTopology              bool
+	BinlogServerImmediateTopology             bool
+	SemiSyncMasterEnabled                     bool
+	SemiSyncMasterStatus                      bool
+	SemiSyncMasterWaitForReplicaCount         uint
+	SemiSyncMasterClients                     uint
+	CountSemiSyncReplicasEnabled              uint
+	CountLoggingReplicas                      uint
+	CountStatementBasedLoggingReplicas        uint
+	CountMixedBasedLoggingReplicas            uint
+	CountRowBasedLoggingReplicas              uint
+	CountDistinctMajorVersionsLoggingReplicas uint
+	CountDelayedReplicas                      uint
+	CountLaggingReplicas                      uint
+	IsActionableRecovery                      bool
+	ProcessingNodeHostname                    string
+	ProcessingNodeToken                       string
+	CountAdditionalAgreeingNodes              int
+	StartActivePeriod                         string
+	SkippableDueToDowntime                    bool
+	GTIDMode                                  string
+	MinReplicaGTIDMode                        string
+	MaxReplicaGTIDMode                        string
+	MaxReplicaGTIDErrant                      string
+	CommandHint                               string
+	IsReadOnly                                bool
+}
+
+type AnalysisMap map[string](*ReplicationAnalysis)
+
+type ReplicationAnalysisChangelog struct {
+	AnalyzedInstanceKey InstanceKey
+	Changelog           []string
+}
+
+func (this *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
+	i := struct {
+		ReplicationAnalysis
+	}{}
+	i.ReplicationAnalysis = *this
+	// backwards compatibility
+	i.SlaveHosts = i.Replicas
+
+	return json.Marshal(i)
+}
+
+// ReadReplicaHostsFromString parses and reads replica keys from comma delimited string
+func (this *ReplicationAnalysis) ReadReplicaHostsFromString(replicaHostsString string) error {
+	this.Replicas = *NewInstanceKeyMap()
+	return this.Replicas.ReadCommaDelimitedList(replicaHostsString)
+}
+
+// AnalysisString returns a human friendly description of all analysis issues
+func (this *ReplicationAnalysis) AnalysisString() string {
+	result := []string{}
+	if this.Analysis != NoProblem {
+		result = append(result, string(this.Analysis))
+	}
+	for _, structureAnalysis := range this.StructureAnalysis {
+		result = append(result, string(structureAnalysis))
+	}
+	return strings.Join(result, ", ")
+}
+
+// Get a string description of the analyzed instance type (master? co-master? intermediate-master?)
+func (this *ReplicationAnalysis) GetAnalysisInstanceType() AnalysisInstanceType {
+	if this.IsCoMaster {
+		return AnalysisInstanceTypeCoMaster
+	}
+	if this.IsMaster {
+		return AnalysisInstanceTypeMaster
+	}
+	return AnalysisInstanceTypeIntermediateMaster
+}
+
+// ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
+// between last_attempted_check to last_checked before we consider the instance as invalid.
+func ValidSecondsFromSeenToLastAttemptedCheck() uint {
+	return config.Config.InstancePollSeconds + 1
+}
--- a/go/vt/orchestrator/inst/analysis_dao.go
+++ b/go/vt/orchestrator/inst/analysis_dao.go
@ -0,0 +1,875 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+	"regexp"
+	"time"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/process"
+	orcraft "vitess.io/vitess/go/vt/orchestrator/raft"
+	"vitess.io/vitess/go/vt/orchestrator/util"
+
+	"github.com/patrickmn/go-cache"
+	"github.com/rcrowley/go-metrics"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+)
+
+var analysisChangeWriteAttemptCounter = metrics.NewCounter()
+var analysisChangeWriteCounter = metrics.NewCounter()
+
+var recentInstantAnalysis *cache.Cache
+
+func init() {
+	metrics.Register("analysis.change.write.attempt", analysisChangeWriteAttemptCounter)
+	metrics.Register("analysis.change.write", analysisChangeWriteCounter)
+
+	go initializeAnalysisDaoPostConfiguration()
+}
+
+func initializeAnalysisDaoPostConfiguration() {
+	config.WaitForConfigurationToBeLoaded()
+
+	recentInstantAnalysis = cache.New(time.Duration(config.RecoveryPollSeconds*2)*time.Second, time.Second)
+}
+
+// GetReplicationAnalysis will check for replication problems (dead master; unreachable master; etc)
+func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) ([]ReplicationAnalysis, error) {
+	result := []ReplicationAnalysis{}
+
+	args := sqlutils.Args(config.Config.ReasonableReplicationLagSeconds, ValidSecondsFromSeenToLastAttemptedCheck(), config.Config.ReasonableReplicationLagSeconds, clusterName)
+	analysisQueryReductionClause := ``
+
+	if config.Config.ReduceReplicationAnalysisCount {
+		analysisQueryReductionClause = `
+			HAVING
+				(
+					MIN(
+						master_instance.last_checked <= master_instance.last_seen
+						and master_instance.last_attempted_check <= master_instance.last_seen + interval ? second
+					) = 1
+					/* AS is_last_check_valid */
+				) = 0
+				OR (
+					IFNULL(
+						SUM(
+							replica_instance.last_checked <= replica_instance.last_seen
+							AND replica_instance.slave_io_running = 0
+							AND replica_instance.last_io_error like '%error %connecting to master%'
+							AND replica_instance.slave_sql_running = 1
+						),
+						0
+					)
+					/* AS count_replicas_failing_to_connect_to_master */
+					> 0
+				)
+				OR (
+					IFNULL(
+						SUM(
+							replica_instance.last_checked <= replica_instance.last_seen
+						),
+						0
+					)
+					/* AS count_valid_replicas */
+					< COUNT(replica_instance.server_id)
+					/* AS count_replicas */
+				)
+				OR (
+					IFNULL(
+						SUM(
+							replica_instance.last_checked <= replica_instance.last_seen
+							AND replica_instance.slave_io_running != 0
+							AND replica_instance.slave_sql_running != 0
+						),
+						0
+					)
+					/* AS count_valid_replicating_replicas */
+					< COUNT(replica_instance.server_id)
+					/* AS count_replicas */
+				)
+				OR (
+					MIN(
+						master_instance.slave_sql_running = 1
+						AND master_instance.slave_io_running = 0
+						AND master_instance.last_io_error like '%error %connecting to master%'
+					)
+					/* AS is_failing_to_connect_to_master */
+				)
+				OR (
+					COUNT(replica_instance.server_id)
+					/* AS count_replicas */
+					> 0
+				)
+			`
+		args = append(args, ValidSecondsFromSeenToLastAttemptedCheck())
+	}
+	// "OR count_replicas > 0" above is a recent addition, which, granted, makes some previous conditions redundant.
+	// It gives more output, and more "NoProblem" messages that I am now interested in for purpose of auditing in database_instance_analysis_changelog
+	query := fmt.Sprintf(`
+	SELECT
+		master_instance.hostname,
+		master_instance.port,
+		master_instance.read_only AS read_only,
+		MIN(master_instance.data_center) AS data_center,
+		MIN(master_instance.region) AS region,
+		MIN(master_instance.physical_environment) AS physical_environment,
+		MIN(master_instance.master_host) AS master_host,
+		MIN(master_instance.master_port) AS master_port,
+		MIN(master_instance.cluster_name) AS cluster_name,
+		MIN(master_instance.binary_log_file) AS binary_log_file,
+		MIN(master_instance.binary_log_pos) AS binary_log_pos,
+		MIN(
+			IFNULL(
+				master_instance.binary_log_file = database_instance_stale_binlog_coordinates.binary_log_file
+				AND master_instance.binary_log_pos = database_instance_stale_binlog_coordinates.binary_log_pos
+				AND database_instance_stale_binlog_coordinates.first_seen < NOW() - interval ? second,
+				0
+			)
+		) AS is_stale_binlog_coordinates,
+		MIN(
+			IFNULL(
+				cluster_alias.alias,
+				master_instance.cluster_name
+			)
+		) AS cluster_alias,
+		MIN(
+			IFNULL(
+				cluster_domain_name.domain_name,
+				master_instance.cluster_name
+			)
+		) AS cluster_domain,
+		MIN(
+			master_instance.last_checked <= master_instance.last_seen
+			and master_instance.last_attempted_check <= master_instance.last_seen + interval ? second
+		) = 1 AS is_last_check_valid,
+		/* To be considered a master, traditional async replication must not be present/valid AND the host should either */
+		/* not be a replication group member OR be the primary of the replication group */
+		MIN(master_instance.last_check_partial_success) as last_check_partial_success,
+		MIN(
+			(
+				master_instance.master_host IN ('', '_')
+				OR master_instance.master_port = 0
+				OR substr(master_instance.master_host, 1, 2) = '//'
+			)
+			AND (
+				master_instance.replication_group_name = ''
+				OR master_instance.replication_group_member_role = 'PRIMARY'
+			)
+		) AS is_master,
+		MIN(master_instance.is_co_master) AS is_co_master,
+		MIN(
+			CONCAT(
+				master_instance.hostname,
+				':',
+				master_instance.port
+			) = master_instance.cluster_name
+		) AS is_cluster_master,
+		MIN(master_instance.gtid_mode) AS gtid_mode,
+		COUNT(replica_instance.server_id) AS count_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+			),
+			0
+		) AS count_valid_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.slave_io_running != 0
+				AND replica_instance.slave_sql_running != 0
+			),
+			0
+		) AS count_valid_replicating_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.slave_io_running = 0
+				AND replica_instance.last_io_error like '%%error %%connecting to master%%'
+				AND replica_instance.slave_sql_running = 1
+			),
+			0
+		) AS count_replicas_failing_to_connect_to_master,
+		MIN(master_instance.replication_depth) AS replication_depth,
+		GROUP_CONCAT(
+			concat(
+				replica_instance.Hostname,
+				':',
+				replica_instance.Port
+			)
+		) as slave_hosts,
+		MIN(
+			master_instance.slave_sql_running = 1
+			AND master_instance.slave_io_running = 0
+			AND master_instance.last_io_error like '%%error %%connecting to master%%'
+		) AS is_failing_to_connect_to_master,
+		MIN(
+			master_downtime.downtime_active is not null
+			and ifnull(master_downtime.end_timestamp, now()) > now()
+		) AS is_downtimed,
+		MIN(
+			IFNULL(master_downtime.end_timestamp, '')
+		) AS downtime_end_timestamp,
+		MIN(
+			IFNULL(
+				unix_timestamp() - unix_timestamp(master_downtime.end_timestamp),
+				0
+			)
+		) AS downtime_remaining_seconds,
+		MIN(
+			master_instance.binlog_server
+		) AS is_binlog_server,
+		MIN(master_instance.pseudo_gtid) AS is_pseudo_gtid,
+		MIN(
+			master_instance.supports_oracle_gtid
+		) AS supports_oracle_gtid,
+		MIN(
+			master_instance.semi_sync_master_enabled
+		) AS semi_sync_master_enabled,
+		MIN(
+			master_instance.semi_sync_master_wait_for_slave_count
+		) AS semi_sync_master_wait_for_slave_count,
+		MIN(
+			master_instance.semi_sync_master_clients
+		) AS semi_sync_master_clients,
+		MIN(
+			master_instance.semi_sync_master_status
+		) AS semi_sync_master_status,
+		SUM(replica_instance.is_co_master) AS count_co_master_replicas,
+		SUM(replica_instance.oracle_gtid) AS count_oracle_gtid_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.oracle_gtid != 0
+			),
+			0
+		) AS count_valid_oracle_gtid_replicas,
+		SUM(
+			replica_instance.binlog_server
+		) AS count_binlog_server_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.binlog_server != 0
+			),
+			0
+		) AS count_valid_binlog_server_replicas,
+		SUM(
+			replica_instance.semi_sync_replica_enabled
+		) AS count_semi_sync_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.semi_sync_replica_enabled != 0
+			),
+			0
+		) AS count_valid_semi_sync_replicas,
+		MIN(
+			master_instance.mariadb_gtid
+		) AS is_mariadb_gtid,
+		SUM(replica_instance.mariadb_gtid) AS count_mariadb_gtid_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.last_checked <= replica_instance.last_seen
+				AND replica_instance.mariadb_gtid != 0
+			),
+			0
+		) AS count_valid_mariadb_gtid_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.log_bin
+				AND replica_instance.log_slave_updates
+			),
+			0
+		) AS count_logging_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.log_bin
+				AND replica_instance.log_slave_updates
+				AND replica_instance.binlog_format = 'STATEMENT'
+			),
+			0
+		) AS count_statement_based_logging_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.log_bin
+				AND replica_instance.log_slave_updates
+				AND replica_instance.binlog_format = 'MIXED'
+			),
+			0
+		) AS count_mixed_based_logging_replicas,
+		IFNULL(
+			SUM(
+				replica_instance.log_bin
+				AND replica_instance.log_slave_updates
+				AND replica_instance.binlog_format = 'ROW'
+			),
+			0
+		) AS count_row_based_logging_replicas,
+		IFNULL(
+			SUM(replica_instance.sql_delay > 0),
+			0
+		) AS count_delayed_replicas,
+		IFNULL(
+			SUM(replica_instance.slave_lag_seconds > ?),
+			0
+		) AS count_lagging_replicas,
+		IFNULL(MIN(replica_instance.gtid_mode), '') AS min_replica_gtid_mode,
+		IFNULL(MAX(replica_instance.gtid_mode), '') AS max_replica_gtid_mode,
+		IFNULL(
+			MAX(
+				case when replica_downtime.downtime_active is not null
+				and ifnull(replica_downtime.end_timestamp, now()) > now() then '' else replica_instance.gtid_errant end
+			),
+			''
+		) AS max_replica_gtid_errant,
+		IFNULL(
+			SUM(
+				replica_downtime.downtime_active is not null
+				and ifnull(replica_downtime.end_timestamp, now()) > now()
+			),
+			0
+		) AS count_downtimed_replicas,
+		COUNT(
+			DISTINCT case when replica_instance.log_bin
+			AND replica_instance.log_slave_updates then replica_instance.major_version else NULL end
+		) AS count_distinct_logging_major_versions
+	FROM
+		database_instance master_instance
+		LEFT JOIN hostname_resolve ON (
+			master_instance.hostname = hostname_resolve.hostname
+		)
+		LEFT JOIN database_instance replica_instance ON (
+			COALESCE(
+				hostname_resolve.resolved_hostname,
+				master_instance.hostname
+			) = replica_instance.master_host
+			AND master_instance.port = replica_instance.master_port
+		)
+		LEFT JOIN database_instance_maintenance ON (
+			master_instance.hostname = database_instance_maintenance.hostname
+			AND master_instance.port = database_instance_maintenance.port
+			AND database_instance_maintenance.maintenance_active = 1
+		)
+		LEFT JOIN database_instance_stale_binlog_coordinates ON (
+			master_instance.hostname = database_instance_stale_binlog_coordinates.hostname
+			AND master_instance.port = database_instance_stale_binlog_coordinates.port
+		)
+		LEFT JOIN database_instance_downtime as master_downtime ON (
+			master_instance.hostname = master_downtime.hostname
+			AND master_instance.port = master_downtime.port
+			AND master_downtime.downtime_active = 1
+		)
+		LEFT JOIN database_instance_downtime as replica_downtime ON (
+			replica_instance.hostname = replica_downtime.hostname
+			AND replica_instance.port = replica_downtime.port
+			AND replica_downtime.downtime_active = 1
+		)
+		LEFT JOIN cluster_alias ON (
+			cluster_alias.cluster_name = master_instance.cluster_name
+		)
+		LEFT JOIN cluster_domain_name ON (
+			cluster_domain_name.cluster_name = master_instance.cluster_name
+		)
+	WHERE
+		database_instance_maintenance.database_instance_maintenance_id IS NULL
+		AND ? IN ('', master_instance.cluster_name)
+	GROUP BY
+		master_instance.hostname,
+		master_instance.port
+		%s
+	ORDER BY
+		is_master DESC,
+		is_cluster_master DESC,
+		count_replicas DESC
+	`,
+		analysisQueryReductionClause)
+
+	err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error {
+		a := ReplicationAnalysis{
+			Analysis:               NoProblem,
+			ProcessingNodeHostname: process.ThisHostname,
+			ProcessingNodeToken:    util.ProcessToken.Hash,
+		}
+
+		a.IsMaster = m.GetBool("is_master")
+		countCoMasterReplicas := m.GetUint("count_co_master_replicas")
+		a.IsCoMaster = m.GetBool("is_co_master") || (countCoMasterReplicas > 0)
+		a.AnalyzedInstanceKey = InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")}
+		a.AnalyzedInstanceMasterKey = InstanceKey{Hostname: m.GetString("master_host"), Port: m.GetInt("master_port")}
+		a.AnalyzedInstanceDataCenter = m.GetString("data_center")
+		a.AnalyzedInstanceRegion = m.GetString("region")
+		a.AnalyzedInstancePhysicalEnvironment = m.GetString("physical_environment")
+		a.AnalyzedInstanceBinlogCoordinates = BinlogCoordinates{
+			LogFile: m.GetString("binary_log_file"),
+			LogPos:  m.GetInt64("binary_log_pos"),
+			Type:    BinaryLog,
+		}
+		isStaleBinlogCoordinates := m.GetBool("is_stale_binlog_coordinates")
+		a.ClusterDetails.ClusterName = m.GetString("cluster_name")
+		a.ClusterDetails.ClusterAlias = m.GetString("cluster_alias")
+		a.ClusterDetails.ClusterDomain = m.GetString("cluster_domain")
+		a.GTIDMode = m.GetString("gtid_mode")
+		a.LastCheckValid = m.GetBool("is_last_check_valid")
+		a.LastCheckPartialSuccess = m.GetBool("last_check_partial_success")
+		a.CountReplicas = m.GetUint("count_replicas")
+		a.CountValidReplicas = m.GetUint("count_valid_replicas")
+		a.CountValidReplicatingReplicas = m.GetUint("count_valid_replicating_replicas")
+		a.CountReplicasFailingToConnectToMaster = m.GetUint("count_replicas_failing_to_connect_to_master")
+		a.CountDowntimedReplicas = m.GetUint("count_downtimed_replicas")
+		a.ReplicationDepth = m.GetUint("replication_depth")
+		a.IsFailingToConnectToMaster = m.GetBool("is_failing_to_connect_to_master")
+		a.IsDowntimed = m.GetBool("is_downtimed")
+		a.DowntimeEndTimestamp = m.GetString("downtime_end_timestamp")
+		a.DowntimeRemainingSeconds = m.GetInt("downtime_remaining_seconds")
+		a.IsBinlogServer = m.GetBool("is_binlog_server")
+		a.ClusterDetails.ReadRecoveryInfo()
+
+		a.Replicas = *NewInstanceKeyMap()
+		a.Replicas.ReadCommaDelimitedList(m.GetString("slave_hosts"))
+
+		countValidOracleGTIDReplicas := m.GetUint("count_valid_oracle_gtid_replicas")
+		a.OracleGTIDImmediateTopology = countValidOracleGTIDReplicas == a.CountValidReplicas && a.CountValidReplicas > 0
+		countValidMariaDBGTIDReplicas := m.GetUint("count_valid_mariadb_gtid_replicas")
+		a.MariaDBGTIDImmediateTopology = countValidMariaDBGTIDReplicas == a.CountValidReplicas && a.CountValidReplicas > 0
+		countValidBinlogServerReplicas := m.GetUint("count_valid_binlog_server_replicas")
+		a.BinlogServerImmediateTopology = countValidBinlogServerReplicas == a.CountValidReplicas && a.CountValidReplicas > 0
+		a.PseudoGTIDImmediateTopology = m.GetBool("is_pseudo_gtid")
+		a.SemiSyncMasterEnabled = m.GetBool("semi_sync_master_enabled")
+		a.SemiSyncMasterStatus = m.GetBool("semi_sync_master_status")
+		a.CountSemiSyncReplicasEnabled = m.GetUint("count_semi_sync_replicas")
+		// countValidSemiSyncReplicasEnabled := m.GetUint("count_valid_semi_sync_replicas")
+		a.SemiSyncMasterWaitForReplicaCount = m.GetUint("semi_sync_master_wait_for_slave_count")
+		a.SemiSyncMasterClients = m.GetUint("semi_sync_master_clients")
+
+		a.MinReplicaGTIDMode = m.GetString("min_replica_gtid_mode")
+		a.MaxReplicaGTIDMode = m.GetString("max_replica_gtid_mode")
+		a.MaxReplicaGTIDErrant = m.GetString("max_replica_gtid_errant")
+
+		a.CountLoggingReplicas = m.GetUint("count_logging_replicas")
+		a.CountStatementBasedLoggingReplicas = m.GetUint("count_statement_based_logging_replicas")
+		a.CountMixedBasedLoggingReplicas = m.GetUint("count_mixed_based_logging_replicas")
+		a.CountRowBasedLoggingReplicas = m.GetUint("count_row_based_logging_replicas")
+		a.CountDistinctMajorVersionsLoggingReplicas = m.GetUint("count_distinct_logging_major_versions")
+
+		a.CountDelayedReplicas = m.GetUint("count_delayed_replicas")
+		a.CountLaggingReplicas = m.GetUint("count_lagging_replicas")
+
+		a.IsReadOnly = m.GetUint("read_only") == 1
+
+		if !a.LastCheckValid {
+			analysisMessage := fmt.Sprintf("analysis: ClusterName: %+v, IsMaster: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v, CountReplicasFailingToConnectToMaster: %+v",
+				a.ClusterDetails.ClusterName, a.IsMaster, a.LastCheckValid, a.LastCheckPartialSuccess, a.CountReplicas, a.CountValidReplicas, a.CountValidReplicatingReplicas, a.CountLaggingReplicas, a.CountDelayedReplicas, a.CountReplicasFailingToConnectToMaster,
+			)
+			if util.ClearToLog("analysis_dao", analysisMessage) {
+				log.Debugf(analysisMessage)
+			}
+		}
+		if a.IsMaster && !a.LastCheckValid && a.CountReplicas == 0 {
+			a.Analysis = DeadMasterWithoutReplicas
+			a.Description = "Master cannot be reached by orchestrator and has no replica"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadMaster
+			a.Description = "Master cannot be reached by orchestrator and none of its replicas is replicating"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicas == 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadMasterAndReplicas
+			a.Description = "Master cannot be reached by orchestrator and none of its replicas is replicating"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && a.CountValidReplicas < a.CountReplicas && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadMasterAndSomeReplicas
+			a.Description = "Master cannot be reached by orchestrator; some of its replicas are unreachable and none of its reachable replicas is replicating"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && a.CountLaggingReplicas == a.CountReplicas && a.CountDelayedReplicas < a.CountReplicas && a.CountValidReplicatingReplicas > 0 {
+			a.Analysis = UnreachableMasterWithLaggingReplicas
+			a.Description = "Master cannot be reached by orchestrator and all of its replicas are lagging"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && !a.LastCheckPartialSuccess && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 {
+			// partial success is here to redice noise
+			a.Analysis = UnreachableMaster
+			a.Description = "Master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue"
+			//
+		} else if a.IsMaster && !a.LastCheckValid && a.LastCheckPartialSuccess && a.CountReplicasFailingToConnectToMaster > 0 && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 {
+			// there's partial success, but also at least one replica is failing to connect to master
+			a.Analysis = UnreachableMaster
+			a.Description = "Master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue"
+			//
+		} else if a.IsMaster && a.SemiSyncMasterEnabled && a.SemiSyncMasterStatus && a.SemiSyncMasterWaitForReplicaCount > 0 && a.SemiSyncMasterClients < a.SemiSyncMasterWaitForReplicaCount {
+			if isStaleBinlogCoordinates {
+				a.Analysis = LockedSemiSyncMaster
+				a.Description = "Semi sync master is locked since it doesn't get enough replica acknowledgements"
+			} else {
+				a.Analysis = LockedSemiSyncMasterHypothesis
+				a.Description = "Semi sync master seems to be locked, more samplings needed to validate"
+			}
+			//
+		} else if a.IsMaster && a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = MasterSingleReplicaNotReplicating
+			a.Description = "Master is reachable but its single replica is not replicating"
+			//
+		} else if a.IsMaster && a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == 0 {
+			a.Analysis = MasterSingleReplicaDead
+			a.Description = "Master is reachable but its single replica is dead"
+			//
+		} else if a.IsMaster && a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = AllMasterReplicasNotReplicating
+			a.Description = "Master is reachable but none of its replicas is replicating"
+			//
+		} else if a.IsMaster && a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas < a.CountReplicas && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = AllMasterReplicasNotReplicatingOrDead
+			a.Description = "Master is reachable but none of its replicas is replicating"
+			//
+		} else /* co-master */ if a.IsCoMaster && !a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadCoMaster
+			a.Description = "Co-master cannot be reached by orchestrator and none of its replicas is replicating"
+			//
+		} else if a.IsCoMaster && !a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicas < a.CountReplicas && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadCoMasterAndSomeReplicas
+			a.Description = "Co-master cannot be reached by orchestrator; some of its replicas are unreachable and none of its reachable replicas is replicating"
+			//
+		} else if a.IsCoMaster && !a.LastCheckValid && !a.LastCheckPartialSuccess && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 {
+			a.Analysis = UnreachableCoMaster
+			a.Description = "Co-master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue"
+			//
+		} else if a.IsCoMaster && a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = AllCoMasterReplicasNotReplicating
+			a.Description = "Co-master is reachable but none of its replicas is replicating"
+			//
+		} else /* intermediate-master */ if !a.IsMaster && !a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountReplicasFailingToConnectToMaster == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadIntermediateMasterWithSingleReplicaFailingToConnect
+			a.Description = "Intermediate master cannot be reached by orchestrator and its (single) replica is failing to connect"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && a.CountReplicas == 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadIntermediateMasterWithSingleReplica
+			a.Description = "Intermediate master cannot be reached by orchestrator and its (single) replica is not replicating"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicas == a.CountReplicas && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadIntermediateMaster
+			a.Description = "Intermediate master cannot be reached by orchestrator and none of its replicas is replicating"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && a.CountValidReplicas < a.CountReplicas && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = DeadIntermediateMasterAndSomeReplicas
+			a.Description = "Intermediate master cannot be reached by orchestrator; some of its replicas are unreachable and none of its reachable replicas is replicating"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicas == 0 {
+			a.Analysis = DeadIntermediateMasterAndReplicas
+			a.Description = "Intermediate master cannot be reached by orchestrator and all of its replicas are unreachable"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && a.CountLaggingReplicas == a.CountReplicas && a.CountDelayedReplicas < a.CountReplicas && a.CountValidReplicatingReplicas > 0 {
+			a.Analysis = UnreachableIntermediateMasterWithLaggingReplicas
+			a.Description = "Intermediate master cannot be reached by orchestrator and all of its replicas are lagging"
+			//
+		} else if !a.IsMaster && !a.LastCheckValid && !a.LastCheckPartialSuccess && a.CountValidReplicas > 0 && a.CountValidReplicatingReplicas > 0 {
+			a.Analysis = UnreachableIntermediateMaster
+			a.Description = "Intermediate master cannot be reached by orchestrator but it has replicating replicas; possibly a network/host issue"
+			//
+		} else if !a.IsMaster && a.LastCheckValid && a.CountReplicas > 1 && a.CountValidReplicatingReplicas == 0 &&
+			a.CountReplicasFailingToConnectToMaster > 0 && a.CountReplicasFailingToConnectToMaster == a.CountValidReplicas {
+			// All replicas are either failing to connect to master (and at least one of these have to exist)
+			// or completely dead.
+			// Must have at least two replicas to reach such conclusion -- do note that the intermediate master is still
+			// reachable to orchestrator, so we base our conclusion on replicas only at this point.
+			a.Analysis = AllIntermediateMasterReplicasFailingToConnectOrDead
+			a.Description = "Intermediate master is reachable but all of its replicas are failing to connect"
+			//
+		} else if !a.IsMaster && a.LastCheckValid && a.CountReplicas > 0 && a.CountValidReplicatingReplicas == 0 {
+			a.Analysis = AllIntermediateMasterReplicasNotReplicating
+			a.Description = "Intermediate master is reachable but none of its replicas is replicating"
+			//
+		} else if a.IsBinlogServer && a.IsFailingToConnectToMaster {
+			a.Analysis = BinlogServerFailingToConnectToMaster
+			a.Description = "Binlog server is unable to connect to its master"
+			//
+		} else if a.ReplicationDepth == 1 && a.IsFailingToConnectToMaster {
+			a.Analysis = FirstTierReplicaFailingToConnectToMaster
+			a.Description = "1st tier replica (directly replicating from topology master) is unable to connect to the master"
+			//
+		}
+		//		 else if a.IsMaster && a.CountReplicas == 0 {
+		//			a.Analysis = MasterWithoutReplicas
+		//			a.Description = "Master has no replicas"
+		//		}
+
+		appendAnalysis := func(analysis *ReplicationAnalysis) {
+			if a.Analysis == NoProblem && len(a.StructureAnalysis) == 0 && !hints.IncludeNoProblem {
+				return
+			}
+			for _, filter := range config.Config.RecoveryIgnoreHostnameFilters {
+				if matched, _ := regexp.MatchString(filter, a.AnalyzedInstanceKey.Hostname); matched {
+					return
+				}
+			}
+			if a.IsDowntimed {
+				a.SkippableDueToDowntime = true
+			}
+			if a.CountReplicas == a.CountDowntimedReplicas {
+				switch a.Analysis {
+				case AllMasterReplicasNotReplicating,
+					AllMasterReplicasNotReplicatingOrDead,
+					MasterSingleReplicaDead,
+					AllCoMasterReplicasNotReplicating,
+					DeadIntermediateMasterWithSingleReplica,
+					DeadIntermediateMasterWithSingleReplicaFailingToConnect,
+					DeadIntermediateMasterAndReplicas,
+					DeadIntermediateMasterAndSomeReplicas,
+					AllIntermediateMasterReplicasFailingToConnectOrDead,
+					AllIntermediateMasterReplicasNotReplicating:
+					a.IsReplicasDowntimed = true
+					a.SkippableDueToDowntime = true
+				}
+			}
+			if a.SkippableDueToDowntime && !hints.IncludeDowntimed {
+				return
+			}
+			result = append(result, a)
+		}
+
+		{
+			// Moving on to structure analysis
+			// We also do structural checks. See if there's potential danger in promotions
+			if a.IsMaster && a.CountLoggingReplicas == 0 && a.CountReplicas > 1 {
+				a.StructureAnalysis = append(a.StructureAnalysis, NoLoggingReplicasStructureWarning)
+			}
+			if a.IsMaster && a.CountReplicas > 1 &&
+				!a.OracleGTIDImmediateTopology &&
+				!a.MariaDBGTIDImmediateTopology &&
+				!a.BinlogServerImmediateTopology &&
+				!a.PseudoGTIDImmediateTopology {
+				a.StructureAnalysis = append(a.StructureAnalysis, NoFailoverSupportStructureWarning)
+			}
+			if a.IsMaster && a.CountStatementBasedLoggingReplicas > 0 && a.CountMixedBasedLoggingReplicas > 0 {
+				a.StructureAnalysis = append(a.StructureAnalysis, StatementAndMixedLoggingReplicasStructureWarning)
+			}
+			if a.IsMaster && a.CountStatementBasedLoggingReplicas > 0 && a.CountRowBasedLoggingReplicas > 0 {
+				a.StructureAnalysis = append(a.StructureAnalysis, StatementAndRowLoggingReplicasStructureWarning)
+			}
+			if a.IsMaster && a.CountMixedBasedLoggingReplicas > 0 && a.CountRowBasedLoggingReplicas > 0 {
+				a.StructureAnalysis = append(a.StructureAnalysis, MixedAndRowLoggingReplicasStructureWarning)
+			}
+			if a.IsMaster && a.CountDistinctMajorVersionsLoggingReplicas > 1 {
+				a.StructureAnalysis = append(a.StructureAnalysis, MultipleMajorVersionsLoggingReplicasStructureWarning)
+			}
+
+			if a.CountReplicas > 0 && (a.GTIDMode != a.MinReplicaGTIDMode || a.GTIDMode != a.MaxReplicaGTIDMode) {
+				a.StructureAnalysis = append(a.StructureAnalysis, DifferentGTIDModesStructureWarning)
+			}
+			if a.MaxReplicaGTIDErrant != "" {
+				a.StructureAnalysis = append(a.StructureAnalysis, ErrantGTIDStructureWarning)
+			}
+
+			if a.IsMaster && a.IsReadOnly {
+				a.StructureAnalysis = append(a.StructureAnalysis, NoWriteableMasterStructureWarning)
+			}
+
+			if a.IsMaster && a.SemiSyncMasterEnabled && !a.SemiSyncMasterStatus && a.SemiSyncMasterWaitForReplicaCount > 0 && a.SemiSyncMasterClients < a.SemiSyncMasterWaitForReplicaCount {
+				a.StructureAnalysis = append(a.StructureAnalysis, NotEnoughValidSemiSyncReplicasStructureWarning)
+			}
+		}
+		appendAnalysis(&a)
+
+		if a.CountReplicas > 0 && hints.AuditAnalysis {
+			// Interesting enough for analysis
+			go auditInstanceAnalysisInChangelog(&a.AnalyzedInstanceKey, a.Analysis)
+		}
+		return nil
+	})
+
+	if err != nil {
+		return result, log.Errore(err)
+	}
+	// TODO: result, err = getConcensusReplicationAnalysis(result)
+	return result, log.Errore(err)
+}
+
+func getConcensusReplicationAnalysis(analysisEntries []ReplicationAnalysis) ([]ReplicationAnalysis, error) {
+	if !orcraft.IsRaftEnabled() {
+		return analysisEntries, nil
+	}
+	if !config.Config.ExpectFailureAnalysisConcensus {
+		return analysisEntries, nil
+	}
+	concensusAnalysisEntries := []ReplicationAnalysis{}
+	peerAnalysisMap, err := ReadPeerAnalysisMap()
+	if err != nil {
+		return analysisEntries, err
+	}
+	quorumSize, err := orcraft.QuorumSize()
+	if err != nil {
+		return analysisEntries, err
+	}
+
+	for _, analysisEntry := range analysisEntries {
+		instanceAnalysis := NewInstanceAnalysis(&analysisEntry.AnalyzedInstanceKey, analysisEntry.Analysis)
+		analysisKey := instanceAnalysis.String()
+
+		peerAnalysisCount := peerAnalysisMap[analysisKey]
+		if 1+peerAnalysisCount >= quorumSize {
+			// this node and enough other nodes in agreement
+			concensusAnalysisEntries = append(concensusAnalysisEntries, analysisEntry)
+		}
+	}
+	return concensusAnalysisEntries, nil
+}
+
+// auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table.
+// To not repeat recurring analysis code, the database_instance_last_analysis table is used, so that only changes to
+// analysis codes are written.
+func auditInstanceAnalysisInChangelog(instanceKey *InstanceKey, analysisCode AnalysisCode) error {
+	if lastWrittenAnalysis, found := recentInstantAnalysis.Get(instanceKey.DisplayString()); found {
+		if lastWrittenAnalysis == analysisCode {
+			// Surely nothing new.
+			// And let's expand the timeout
+			recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration)
+			return nil
+		}
+	}
+	// Passed the cache; but does database agree that there's a change? Here's a persistent cache; this comes here
+	// to verify no two orchestrator services are doing this without coordinating (namely, one dies, the other taking its place
+	// and has no familiarity of the former's cache)
+	analysisChangeWriteAttemptCounter.Inc(1)
+
+	lastAnalysisChanged := false
+	{
+		sqlResult, err := db.ExecOrchestrator(`
+			update database_instance_last_analysis set
+				analysis = ?,
+				analysis_timestamp = now()
+			where
+				hostname = ?
+				and port = ?
+				and analysis != ?
+			`,
+			string(analysisCode), instanceKey.Hostname, instanceKey.Port, string(analysisCode),
+		)
+		if err != nil {
+			return log.Errore(err)
+		}
+		rows, err := sqlResult.RowsAffected()
+		if err != nil {
+			return log.Errore(err)
+		}
+		lastAnalysisChanged = (rows > 0)
+	}
+	if !lastAnalysisChanged {
+		_, err := db.ExecOrchestrator(`
+			insert ignore into database_instance_last_analysis (
+					hostname, port, analysis_timestamp, analysis
+				) values (
+					?, ?, now(), ?
+				)
+			`,
+			instanceKey.Hostname, instanceKey.Port, string(analysisCode),
+		)
+		if err != nil {
+			return log.Errore(err)
+		}
+	}
+	recentInstantAnalysis.Set(instanceKey.DisplayString(), analysisCode, cache.DefaultExpiration)
+	if !lastAnalysisChanged {
+		return nil
+	}
+
+	_, err := db.ExecOrchestrator(`
+			insert into database_instance_analysis_changelog (
+					hostname, port, analysis_timestamp, analysis
+				) values (
+					?, ?, now(), ?
+				)
+			`,
+		instanceKey.Hostname, instanceKey.Port, string(analysisCode),
+	)
+	if err == nil {
+		analysisChangeWriteCounter.Inc(1)
+	}
+	return log.Errore(err)
+}
+
+// ExpireInstanceAnalysisChangelog removes old-enough analysis entries from the changelog
+func ExpireInstanceAnalysisChangelog() error {
+	_, err := db.ExecOrchestrator(`
+			delete
+				from database_instance_analysis_changelog
+			where
+				analysis_timestamp < now() - interval ? hour
+			`,
+		config.Config.UnseenInstanceForgetHours,
+	)
+	return log.Errore(err)
+}
+
+// ReadReplicationAnalysisChangelog
+func ReadReplicationAnalysisChangelog() (res [](*ReplicationAnalysisChangelog), err error) {
+	query := `
+		select
+      hostname,
+      port,
+			analysis_timestamp,
+			analysis
+		from
+			database_instance_analysis_changelog
+		order by
+			hostname, port, changelog_id
+		`
+	analysisChangelog := &ReplicationAnalysisChangelog{}
+	err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error {
+		key := InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")}
+
+		if !analysisChangelog.AnalyzedInstanceKey.Equals(&key) {
+			analysisChangelog = &ReplicationAnalysisChangelog{AnalyzedInstanceKey: key, Changelog: []string{}}
+			res = append(res, analysisChangelog)
+		}
+		analysisEntry := fmt.Sprintf("%s;%s,", m.GetString("analysis_timestamp"), m.GetString("analysis"))
+		analysisChangelog.Changelog = append(analysisChangelog.Changelog, analysisEntry)
+
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+}
+
+// ReadPeerAnalysisMap reads raft-peer failure analysis, and returns a PeerAnalysisMap,
+// indicating how many peers see which analysis
+func ReadPeerAnalysisMap() (peerAnalysisMap PeerAnalysisMap, err error) {
+	peerAnalysisMap = make(PeerAnalysisMap)
+	query := `
+		select
+      hostname,
+      port,
+			analysis
+		from
+			database_instance_peer_analysis
+		order by
+			peer, hostname, port
+		`
+	err = db.QueryOrchestratorRowsMap(query, func(m sqlutils.RowMap) error {
+		instanceKey := InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")}
+		analysis := m.GetString("analysis")
+		instanceAnalysis := NewInstanceAnalysis(&instanceKey, AnalysisCode(analysis))
+		mapKey := instanceAnalysis.String()
+		peerAnalysisMap[mapKey] = peerAnalysisMap[mapKey] + 1
+
+		return nil
+	})
+	return peerAnalysisMap, log.Errore(err)
+}
--- a/go/vt/orchestrator/inst/analysis_test.go
+++ b/go/vt/orchestrator/inst/analysis_test.go
@ -0,0 +1,50 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+func init() {
+	config.Config.HostnameResolveMethod = "none"
+	config.MarkConfigurationLoaded()
+	log.SetLevel(log.ERROR)
+}
+
+func TestGetAnalysisInstanceType(t *testing.T) {
+	{
+		analysis := &ReplicationAnalysis{}
+		test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "intermediate-master")
+	}
+	{
+		analysis := &ReplicationAnalysis{IsMaster: true}
+		test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "master")
+	}
+	{
+		analysis := &ReplicationAnalysis{IsCoMaster: true}
+		test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
+	}
+	{
+		analysis := &ReplicationAnalysis{IsMaster: true, IsCoMaster: true}
+		test.S(t).ExpectEquals(string(analysis.GetAnalysisInstanceType()), "co-master")
+	}
+}
--- a/go/vt/orchestrator/inst/audit.go
+++ b/go/vt/orchestrator/inst/audit.go
@ -0,0 +1,26 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+// Audit presents a single audit entry (namely in the database)
+type Audit struct {
+	AuditId          int64
+	AuditTimestamp   string
+	AuditType        string
+	AuditInstanceKey InstanceKey
+	Message          string
+}
--- a/go/vt/orchestrator/inst/audit_dao.go
+++ b/go/vt/orchestrator/inst/audit_dao.go
@ -0,0 +1,160 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+	"log/syslog"
+	"os"
+	"time"
+
+	"github.com/rcrowley/go-metrics"
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+)
+
+// syslogWriter is optional, and defaults to nil (disabled)
+var syslogWriter *syslog.Writer
+
+var auditOperationCounter = metrics.NewCounter()
+
+func init() {
+	metrics.Register("audit.write", auditOperationCounter)
+}
+
+// EnableSyslogWriter enables, if possible, writes to syslog. These will execute _in addition_ to normal logging
+func EnableAuditSyslog() (err error) {
+	syslogWriter, err = syslog.New(syslog.LOG_ERR, "orchestrator")
+	if err != nil {
+		syslogWriter = nil
+	}
+	return err
+}
+
+// AuditOperation creates and writes a new audit entry by given params
+func AuditOperation(auditType string, instanceKey *InstanceKey, message string) error {
+	if instanceKey == nil {
+		instanceKey = &InstanceKey{}
+	}
+	clusterName := ""
+	if instanceKey.Hostname != "" {
+		clusterName, _ = GetClusterName(instanceKey)
+	}
+
+	auditWrittenToFile := false
+	if config.Config.AuditLogFile != "" {
+		auditWrittenToFile = true
+		go func() error {
+			f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640)
+			if err != nil {
+				return log.Errore(err)
+			}
+
+			defer f.Close()
+			text := fmt.Sprintf("%s\t%s\t%s\t%d\t[%s]\t%s\t\n", time.Now().Format(log.TimeFormat), auditType, instanceKey.Hostname, instanceKey.Port, clusterName, message)
+			if _, err = f.WriteString(text); err != nil {
+				return log.Errore(err)
+			}
+			return nil
+		}()
+	}
+	if config.Config.AuditToBackendDB {
+		_, err := db.ExecOrchestrator(`
+			insert
+				into audit (
+					audit_timestamp, audit_type, hostname, port, cluster_name, message
+				) VALUES (
+					NOW(), ?, ?, ?, ?, ?
+				)
+			`,
+			auditType,
+			instanceKey.Hostname,
+			instanceKey.Port,
+			clusterName,
+			message,
+		)
+		if err != nil {
+			return log.Errore(err)
+		}
+	}
+	logMessage := fmt.Sprintf("auditType:%s instance:%s cluster:%s message:%s", auditType, instanceKey.DisplayString(), clusterName, message)
+	if syslogWriter != nil {
+		auditWrittenToFile = true
+		go func() {
+			syslogWriter.Info(logMessage)
+		}()
+	}
+	if !auditWrittenToFile {
+		log.Infof(logMessage)
+	}
+	auditOperationCounter.Inc(1)
+
+	return nil
+}
+
+// ReadRecentAudit returns a list of audit entries order chronologically descending, using page number.
+func ReadRecentAudit(instanceKey *InstanceKey, page int) ([]Audit, error) {
+	res := []Audit{}
+	args := sqlutils.Args()
+	whereCondition := ``
+	if instanceKey != nil {
+		whereCondition = `where hostname=? and port=?`
+		args = append(args, instanceKey.Hostname, instanceKey.Port)
+	}
+	query := fmt.Sprintf(`
+		select
+			audit_id,
+			audit_timestamp,
+			audit_type,
+			hostname,
+			port,
+			message
+		from
+			audit
+		%s
+		order by
+			audit_timestamp desc
+		limit ?
+		offset ?
+		`, whereCondition)
+	args = append(args, config.AuditPageSize, page*config.AuditPageSize)
+	err := db.QueryOrchestrator(query, args, func(m sqlutils.RowMap) error {
+		audit := Audit{}
+		audit.AuditId = m.GetInt64("audit_id")
+		audit.AuditTimestamp = m.GetString("audit_timestamp")
+		audit.AuditType = m.GetString("audit_type")
+		audit.AuditInstanceKey.Hostname = m.GetString("hostname")
+		audit.AuditInstanceKey.Port = m.GetInt("port")
+		audit.Message = m.GetString("message")
+
+		res = append(res, audit)
+		return nil
+	})
+
+	if err != nil {
+		log.Errore(err)
+	}
+	return res, err
+
+}
+
+// ExpireAudit removes old rows from the audit table
+func ExpireAudit() error {
+	return ExpireTableData("audit", "audit_timestamp")
+}
--- a/go/vt/orchestrator/inst/binlog.go
+++ b/go/vt/orchestrator/inst/binlog.go
@ -0,0 +1,196 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var detachPattern *regexp.Regexp
+
+func init() {
+	detachPattern, _ = regexp.Compile(`//([^/:]+):([\d]+)`) // e.g. `//binlog.01234:567890`
+}
+
+type BinlogType int
+
+const (
+	BinaryLog BinlogType = iota
+	RelayLog
+)
+
+// BinlogCoordinates described binary log coordinates in the form of log file & log position.
+type BinlogCoordinates struct {
+	LogFile string
+	LogPos  int64
+	Type    BinlogType
+}
+
+// rpad formats the binlog coordinates to a given size. If the size
+// increases this value is modified so it can be reused later. This
+// is to ensure consistent formatting in debug output.
+func rpad(coordinates BinlogCoordinates, length *int) string {
+	s := fmt.Sprintf("%+v", coordinates)
+	if len(s) > *length {
+		*length = len(s)
+	}
+
+	if len(s) >= *length {
+		return s
+	}
+	return fmt.Sprintf("%s%s", s, strings.Repeat(" ", *length-len(s)))
+}
+
+// ParseInstanceKey will parse an InstanceKey from a string representation such as 127.0.0.1:3306
+func ParseBinlogCoordinates(logFileLogPos string) (*BinlogCoordinates, error) {
+	tokens := strings.SplitN(logFileLogPos, ":", 2)
+	if len(tokens) != 2 {
+		return nil, fmt.Errorf("ParseBinlogCoordinates: Cannot parse BinlogCoordinates from %s. Expected format is file:pos", logFileLogPos)
+	}
+
+	if logPos, err := strconv.ParseInt(tokens[1], 10, 0); err != nil {
+		return nil, fmt.Errorf("ParseBinlogCoordinates: invalid pos: %s", tokens[1])
+	} else {
+		return &BinlogCoordinates{LogFile: tokens[0], LogPos: logPos}, nil
+	}
+}
+
+// DisplayString returns a user-friendly string representation of these coordinates
+func (this *BinlogCoordinates) DisplayString() string {
+	return fmt.Sprintf("%s:%d", this.LogFile, this.LogPos)
+}
+
+// String returns a user-friendly string representation of these coordinates
+func (this BinlogCoordinates) String() string {
+	return this.DisplayString()
+}
+
+// Equals tests equality of this corrdinate and another one.
+func (this *BinlogCoordinates) Equals(other *BinlogCoordinates) bool {
+	if other == nil {
+		return false
+	}
+	return this.LogFile == other.LogFile && this.LogPos == other.LogPos && this.Type == other.Type
+}
+
+// IsEmpty returns true if the log file is empty, unnamed
+func (this *BinlogCoordinates) IsEmpty() bool {
+	return this.LogFile == ""
+}
+
+// SmallerThan returns true if this coordinate is strictly smaller than the other.
+func (this *BinlogCoordinates) SmallerThan(other *BinlogCoordinates) bool {
+	if this.LogFile < other.LogFile {
+		return true
+	}
+	if this.LogFile == other.LogFile && this.LogPos < other.LogPos {
+		return true
+	}
+	return false
+}
+
+// SmallerThanOrEquals returns true if this coordinate is the same or equal to the other one.
+// We do NOT compare the type so we can not use this.Equals()
+func (this *BinlogCoordinates) SmallerThanOrEquals(other *BinlogCoordinates) bool {
+	if this.SmallerThan(other) {
+		return true
+	}
+	return this.LogFile == other.LogFile && this.LogPos == other.LogPos // No Type comparison
+}
+
+// FileSmallerThan returns true if this coordinate's file is strictly smaller than the other's.
+func (this *BinlogCoordinates) FileSmallerThan(other *BinlogCoordinates) bool {
+	return this.LogFile < other.LogFile
+}
+
+// FileNumberDistance returns the numeric distance between this corrdinate's file number and the other's.
+// Effectively it means "how many roatets/FLUSHes would make these coordinates's file reach the other's"
+func (this *BinlogCoordinates) FileNumberDistance(other *BinlogCoordinates) int {
+	thisNumber, _ := this.FileNumber()
+	otherNumber, _ := other.FileNumber()
+	return otherNumber - thisNumber
+}
+
+// FileNumber returns the numeric value of the file, and the length in characters representing the number in the filename.
+// Example: FileNumber() of mysqld.log.000789 is (789, 6)
+func (this *BinlogCoordinates) FileNumber() (int, int) {
+	tokens := strings.Split(this.LogFile, ".")
+	numPart := tokens[len(tokens)-1]
+	numLen := len(numPart)
+	fileNum, err := strconv.Atoi(numPart)
+	if err != nil {
+		return 0, 0
+	}
+	return fileNum, numLen
+}
+
+// PreviousFileCoordinatesBy guesses the filename of the previous binlog/relaylog, by given offset (number of files back)
+func (this *BinlogCoordinates) PreviousFileCoordinatesBy(offset int) (BinlogCoordinates, error) {
+	result := BinlogCoordinates{LogPos: 0, Type: this.Type}
+
+	fileNum, numLen := this.FileNumber()
+	if fileNum == 0 {
+		return result, errors.New("Log file number is zero, cannot detect previous file")
+	}
+	newNumStr := fmt.Sprintf("%d", (fileNum - offset))
+	newNumStr = strings.Repeat("0", numLen-len(newNumStr)) + newNumStr
+
+	tokens := strings.Split(this.LogFile, ".")
+	tokens[len(tokens)-1] = newNumStr
+	result.LogFile = strings.Join(tokens, ".")
+	return result, nil
+}
+
+// PreviousFileCoordinates guesses the filename of the previous binlog/relaylog
+func (this *BinlogCoordinates) PreviousFileCoordinates() (BinlogCoordinates, error) {
+	return this.PreviousFileCoordinatesBy(1)
+}
+
+// PreviousFileCoordinates guesses the filename of the previous binlog/relaylog
+func (this *BinlogCoordinates) NextFileCoordinates() (BinlogCoordinates, error) {
+	result := BinlogCoordinates{LogPos: 0, Type: this.Type}
+
+	fileNum, numLen := this.FileNumber()
+	newNumStr := fmt.Sprintf("%d", (fileNum + 1))
+	newNumStr = strings.Repeat("0", numLen-len(newNumStr)) + newNumStr
+
+	tokens := strings.Split(this.LogFile, ".")
+	tokens[len(tokens)-1] = newNumStr
+	result.LogFile = strings.Join(tokens, ".")
+	return result, nil
+}
+
+// Detach returns a detahced form of coordinates
+func (this *BinlogCoordinates) Detach() (detachedCoordinates BinlogCoordinates) {
+	detachedCoordinates = BinlogCoordinates{LogFile: fmt.Sprintf("//%s:%d", this.LogFile, this.LogPos), LogPos: this.LogPos}
+	return detachedCoordinates
+}
+
+// FileSmallerThan returns true if this coordinate's file is strictly smaller than the other's.
+func (this *BinlogCoordinates) ExtractDetachedCoordinates() (isDetached bool, detachedCoordinates BinlogCoordinates) {
+	detachedCoordinatesSubmatch := detachPattern.FindStringSubmatch(this.LogFile)
+	if len(detachedCoordinatesSubmatch) == 0 {
+		return false, *this
+	}
+	detachedCoordinates.LogFile = detachedCoordinatesSubmatch[1]
+	detachedCoordinates.LogPos, _ = strconv.ParseInt(detachedCoordinatesSubmatch[2], 10, 0)
+	return true, detachedCoordinates
+}
--- a/go/vt/orchestrator/inst/binlog_test.go
+++ b/go/vt/orchestrator/inst/binlog_test.go
@ -0,0 +1,138 @@
+package inst
+
+import (
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+var testCoordinates = BinlogCoordinates{LogFile: "mysql-bin.000010", LogPos: 108}
+
+func init() {
+	config.Config.HostnameResolveMethod = "none"
+	config.Config.KVClusterMasterPrefix = "test/master/"
+	config.MarkConfigurationLoaded()
+	log.SetLevel(log.ERROR)
+}
+
+func TestDetach(t *testing.T) {
+	detachedCoordinates := testCoordinates.Detach()
+	test.S(t).ExpectEquals(detachedCoordinates.LogFile, "//mysql-bin.000010:108")
+	test.S(t).ExpectEquals(detachedCoordinates.LogPos, testCoordinates.LogPos)
+}
+
+func TestDetachedCoordinates(t *testing.T) {
+	isDetached, detachedCoordinates := testCoordinates.ExtractDetachedCoordinates()
+	test.S(t).ExpectFalse(isDetached)
+	test.S(t).ExpectEquals(detachedCoordinates.LogFile, testCoordinates.LogFile)
+	test.S(t).ExpectEquals(detachedCoordinates.LogPos, testCoordinates.LogPos)
+}
+
+func TestDetachedCoordinates2(t *testing.T) {
+	detached := testCoordinates.Detach()
+	isDetached, coordinates := detached.ExtractDetachedCoordinates()
+
+	test.S(t).ExpectTrue(isDetached)
+	test.S(t).ExpectEquals(coordinates.LogFile, testCoordinates.LogFile)
+	test.S(t).ExpectEquals(coordinates.LogPos, testCoordinates.LogPos)
+}
+
+func TestPreviousFileCoordinates(t *testing.T) {
+	previous, err := testCoordinates.PreviousFileCoordinates()
+
+	test.S(t).ExpectNil(err)
+	test.S(t).ExpectEquals(previous.LogFile, "mysql-bin.000009")
+	test.S(t).ExpectEquals(previous.LogPos, int64(0))
+}
+
+func TestNextFileCoordinates(t *testing.T) {
+	next, err := testCoordinates.NextFileCoordinates()
+
+	test.S(t).ExpectNil(err)
+	test.S(t).ExpectEquals(next.LogFile, "mysql-bin.000011")
+	test.S(t).ExpectEquals(next.LogPos, int64(0))
+}
+
+func TestBinlogCoordinates(t *testing.T) {
+	c1 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	c2 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	c3 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 5000}
+	c4 := BinlogCoordinates{LogFile: "mysql-bin.00112", LogPos: 104}
+
+	test.S(t).ExpectTrue(c1.Equals(&c2))
+	test.S(t).ExpectFalse(c1.Equals(&c3))
+	test.S(t).ExpectFalse(c1.Equals(&c4))
+	test.S(t).ExpectFalse(c1.SmallerThan(&c2))
+	test.S(t).ExpectTrue(c1.SmallerThan(&c3))
+	test.S(t).ExpectTrue(c1.SmallerThan(&c4))
+	test.S(t).ExpectTrue(c3.SmallerThan(&c4))
+	test.S(t).ExpectFalse(c3.SmallerThan(&c2))
+	test.S(t).ExpectFalse(c4.SmallerThan(&c2))
+	test.S(t).ExpectFalse(c4.SmallerThan(&c3))
+
+	test.S(t).ExpectTrue(c1.SmallerThanOrEquals(&c2))
+	test.S(t).ExpectTrue(c1.SmallerThanOrEquals(&c3))
+}
+
+func TestBinlogPrevious(t *testing.T) {
+	c1 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	cres, err := c1.PreviousFileCoordinates()
+
+	test.S(t).ExpectNil(err)
+	test.S(t).ExpectEquals(c1.Type, cres.Type)
+	test.S(t).ExpectEquals(cres.LogFile, "mysql-bin.00016")
+
+	c2 := BinlogCoordinates{LogFile: "mysql-bin.00100", LogPos: 104}
+	cres, err = c2.PreviousFileCoordinates()
+
+	test.S(t).ExpectNil(err)
+	test.S(t).ExpectEquals(c1.Type, cres.Type)
+	test.S(t).ExpectEquals(cres.LogFile, "mysql-bin.00099")
+
+	c3 := BinlogCoordinates{LogFile: "mysql.00.prod.com.00100", LogPos: 104}
+	cres, err = c3.PreviousFileCoordinates()
+
+	test.S(t).ExpectNil(err)
+	test.S(t).ExpectEquals(c1.Type, cres.Type)
+	test.S(t).ExpectEquals(cres.LogFile, "mysql.00.prod.com.00099")
+
+	c4 := BinlogCoordinates{LogFile: "mysql.00.prod.com.00000", LogPos: 104}
+	_, err = c4.PreviousFileCoordinates()
+
+	test.S(t).ExpectNotNil(err)
+}
+
+func TestBinlogCoordinatesAsKey(t *testing.T) {
+	m := make(map[BinlogCoordinates]bool)
+
+	c1 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	c2 := BinlogCoordinates{LogFile: "mysql-bin.00022", LogPos: 104}
+	c3 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	c4 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 222}
+
+	m[c1] = true
+	m[c2] = true
+	m[c3] = true
+	m[c4] = true
+
+	test.S(t).ExpectEquals(len(m), 3)
+}
+
+func TestFileNumberDistance(t *testing.T) {
+	c1 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	c2 := BinlogCoordinates{LogFile: "mysql-bin.00022", LogPos: 104}
+
+	test.S(t).ExpectEquals(c1.FileNumberDistance(&c1), 0)
+	test.S(t).ExpectEquals(c1.FileNumberDistance(&c2), 5)
+	test.S(t).ExpectEquals(c2.FileNumberDistance(&c1), -5)
+}
+
+func TestFileNumber(t *testing.T) {
+	c1 := BinlogCoordinates{LogFile: "mysql-bin.00017", LogPos: 104}
+	fileNum, numLen := c1.FileNumber()
+
+	test.S(t).ExpectEquals(fileNum, 17)
+	test.S(t).ExpectEquals(numLen, 5)
+}
--- a/go/vt/orchestrator/inst/candidate_database_instance.go
+++ b/go/vt/orchestrator/inst/candidate_database_instance.go
@ -0,0 +1,55 @@
+/*
+   Copyright 2016 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+
+	"vitess.io/vitess/go/vt/orchestrator/db"
+)
+
+// CandidateDatabaseInstance contains information about explicit promotion rules for an instance
+type CandidateDatabaseInstance struct {
+	Hostname            string
+	Port                int
+	PromotionRule       CandidatePromotionRule
+	LastSuggestedString string
+	PromotionRuleExpiry string // generated when retrieved from database for consistency reasons
+}
+
+func NewCandidateDatabaseInstance(instanceKey *InstanceKey, promotionRule CandidatePromotionRule) *CandidateDatabaseInstance {
+	return &CandidateDatabaseInstance{
+		Hostname:      instanceKey.Hostname,
+		Port:          instanceKey.Port,
+		PromotionRule: promotionRule,
+	}
+}
+
+func (cdi *CandidateDatabaseInstance) WithCurrentTime() *CandidateDatabaseInstance {
+	cdi.LastSuggestedString, _ = db.ReadTimeNow()
+	return cdi
+}
+
+// String returns a string representation of the CandidateDatabaseInstance struct
+func (cdi *CandidateDatabaseInstance) String() string {
+	return fmt.Sprintf("%s:%d %s", cdi.Hostname, cdi.Port, cdi.PromotionRule)
+}
+
+// Key returns an instance key representing this candidate
+func (cdi *CandidateDatabaseInstance) Key() *InstanceKey {
+	return &InstanceKey{Hostname: cdi.Hostname, Port: cdi.Port}
+}
--- a/go/vt/orchestrator/inst/candidate_database_instance_dao.go
+++ b/go/vt/orchestrator/inst/candidate_database_instance_dao.go
@ -0,0 +1,109 @@
+/*
+   Copyright 2016 Simon J Mudd
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/db"
+)
+
+// RegisterCandidateInstance markes a given instance as suggested for successoring a master in the event of failover.
+func RegisterCandidateInstance(candidate *CandidateDatabaseInstance) error {
+	if candidate.LastSuggestedString == "" {
+		candidate = candidate.WithCurrentTime()
+	}
+	args := sqlutils.Args(candidate.Hostname, candidate.Port, string(candidate.PromotionRule), candidate.LastSuggestedString)
+
+	query := fmt.Sprintf(`
+			insert into candidate_database_instance (
+					hostname,
+					port,
+					promotion_rule,
+					last_suggested
+				) values (
+					?, ?, ?, ?
+				) on duplicate key update
+					last_suggested=values(last_suggested),
+					promotion_rule=values(promotion_rule)
+			`)
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(query, args...)
+		AuditOperation("register-candidate", candidate.Key(), string(candidate.PromotionRule))
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
+
+// ExpireCandidateInstances removes stale master candidate suggestions.
+func ExpireCandidateInstances() error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+				delete from candidate_database_instance
+				where last_suggested < NOW() - INTERVAL ? MINUTE
+				`, config.Config.CandidateInstanceExpireMinutes,
+		)
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
+
+// BulkReadCandidateDatabaseInstance returns a slice of
+// CandidateDatabaseInstance converted to JSON.
+/*
+root@myorchestrator [orchestrator]> select * from candidate_database_instance;
+-------------------+------+---------------------+----------+----------------+
+| hostname          | port | last_suggested      | priority | promotion_rule |
+-------------------+------+---------------------+----------+----------------+
+| host1.example.com | 3306 | 2016-11-22 17:41:06 |        1 | prefer         |
+| host2.example.com | 3306 | 2016-11-22 17:40:24 |        1 | prefer         |
+-------------------+------+---------------------+----------+----------------+
+2 rows in set (0.00 sec)
+*/
+func BulkReadCandidateDatabaseInstance() ([]CandidateDatabaseInstance, error) {
+	var candidateDatabaseInstances []CandidateDatabaseInstance
+
+	// Read all promotion rules from the table
+	query := `
+		SELECT
+			hostname,
+			port,
+			promotion_rule,
+			last_suggested,
+			last_suggested + INTERVAL ? MINUTE AS promotion_rule_expiry
+		FROM
+			candidate_database_instance
+	`
+	err := db.QueryOrchestrator(query, sqlutils.Args(config.Config.CandidateInstanceExpireMinutes), func(m sqlutils.RowMap) error {
+		cdi := CandidateDatabaseInstance{
+			Hostname:            m.GetString("hostname"),
+			Port:                m.GetInt("port"),
+			PromotionRule:       CandidatePromotionRule(m.GetString("promotion_rule")),
+			LastSuggestedString: m.GetString("last_suggested"),
+			PromotionRuleExpiry: m.GetString("promotion_rule_expiry"),
+		}
+		// add to end of candidateDatabaseInstances
+		candidateDatabaseInstances = append(candidateDatabaseInstances, cdi)
+
+		return nil
+	})
+	return candidateDatabaseInstances, err
+}
--- a/go/vt/orchestrator/inst/cluster.go
+++ b/go/vt/orchestrator/inst/cluster.go
@ -0,0 +1,136 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/kv"
+)
+
+func GetClusterMasterKVKey(clusterAlias string) string {
+	return fmt.Sprintf("%s%s", config.Config.KVClusterMasterPrefix, clusterAlias)
+}
+
+func getClusterMasterKVPair(clusterAlias string, masterKey *InstanceKey) *kv.KVPair {
+	if clusterAlias == "" {
+		return nil
+	}
+	if masterKey == nil {
+		return nil
+	}
+	return kv.NewKVPair(GetClusterMasterKVKey(clusterAlias), masterKey.StringCode())
+}
+
+// GetClusterMasterKVPairs returns all KV pairs associated with a master. This includes the
+// full identity of the master as well as a breakdown by hostname, port, ipv4, ipv6
+func GetClusterMasterKVPairs(clusterAlias string, masterKey *InstanceKey) (kvPairs [](*kv.KVPair)) {
+	masterKVPair := getClusterMasterKVPair(clusterAlias, masterKey)
+	if masterKVPair == nil {
+		return kvPairs
+	}
+	kvPairs = append(kvPairs, masterKVPair)
+
+	addPair := func(keySuffix, value string) {
+		key := fmt.Sprintf("%s/%s", masterKVPair.Key, keySuffix)
+		kvPairs = append(kvPairs, kv.NewKVPair(key, value))
+	}
+
+	addPair("hostname", masterKey.Hostname)
+	addPair("port", fmt.Sprintf("%d", masterKey.Port))
+	if ipv4, ipv6, err := readHostnameIPs(masterKey.Hostname); err == nil {
+		addPair("ipv4", ipv4)
+		addPair("ipv6", ipv6)
+	}
+	return kvPairs
+}
+
+// mappedClusterNameToAlias attempts to match a cluster with an alias based on
+// configured ClusterNameToAlias map
+func mappedClusterNameToAlias(clusterName string) string {
+	for pattern, alias := range config.Config.ClusterNameToAlias {
+		if pattern == "" {
+			// sanity
+			continue
+		}
+		if matched, _ := regexp.MatchString(pattern, clusterName); matched {
+			return alias
+		}
+	}
+	return ""
+}
+
+// ClusterInfo makes for a cluster status/info summary
+type ClusterInfo struct {
+	ClusterName                            string
+	ClusterAlias                           string // Human friendly alias
+	ClusterDomain                          string // CNAME/VIP/A-record/whatever of the master of this cluster
+	CountInstances                         uint
+	HeuristicLag                           int64
+	HasAutomatedMasterRecovery             bool
+	HasAutomatedIntermediateMasterRecovery bool
+}
+
+// ReadRecoveryInfo
+func (this *ClusterInfo) ReadRecoveryInfo() {
+	this.HasAutomatedMasterRecovery = this.filtersMatchCluster(config.Config.RecoverMasterClusterFilters)
+	this.HasAutomatedIntermediateMasterRecovery = this.filtersMatchCluster(config.Config.RecoverIntermediateMasterClusterFilters)
+}
+
+// filtersMatchCluster will see whether the given filters match the given cluster details
+func (this *ClusterInfo) filtersMatchCluster(filters []string) bool {
+	for _, filter := range filters {
+		if filter == this.ClusterName {
+			return true
+		}
+		if filter == this.ClusterAlias {
+			return true
+		}
+		if strings.HasPrefix(filter, "alias=") {
+			// Match by exact cluster alias name
+			alias := strings.SplitN(filter, "=", 2)[1]
+			if alias == this.ClusterAlias {
+				return true
+			}
+		} else if strings.HasPrefix(filter, "alias~=") {
+			// Match by cluster alias regex
+			aliasPattern := strings.SplitN(filter, "~=", 2)[1]
+			if matched, _ := regexp.MatchString(aliasPattern, this.ClusterAlias); matched {
+				return true
+			}
+		} else if filter == "*" {
+			return true
+		} else if matched, _ := regexp.MatchString(filter, this.ClusterName); matched && filter != "" {
+			return true
+		}
+	}
+	return false
+}
+
+// ApplyClusterAlias updates the given clusterInfo's ClusterAlias property
+func (this *ClusterInfo) ApplyClusterAlias() {
+	if this.ClusterAlias != "" && this.ClusterAlias != this.ClusterName {
+		// Already has an alias; abort
+		return
+	}
+	if alias := mappedClusterNameToAlias(this.ClusterName); alias != "" {
+		this.ClusterAlias = alias
+	}
+}
--- a/go/vt/orchestrator/inst/cluster_alias.go
+++ b/go/vt/orchestrator/inst/cluster_alias.go
@ -0,0 +1,35 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+// SetClusterAlias will write (and override) a single cluster name mapping
+func SetClusterAlias(clusterName string, alias string) error {
+	return writeClusterAlias(clusterName, alias)
+}
+
+// SetClusterAliasManualOverride will write (and override) a single cluster name mapping
+func SetClusterAliasManualOverride(clusterName string, alias string) error {
+	return writeClusterAliasManualOverride(clusterName, alias)
+}
+
+// GetClusterByAlias returns the cluster name associated with given alias.
+// The function returns with error when:
+// - No cluster is associated with the alias
+// - More than one cluster is associated with the alias
+func GetClusterByAlias(alias string) (string, error) {
+	return ReadClusterNameByAlias(alias)
+}
--- a/go/vt/orchestrator/inst/cluster_alias_dao.go
+++ b/go/vt/orchestrator/inst/cluster_alias_dao.go
@ -0,0 +1,226 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils"
+)
+
+// ReadClusterNameByAlias
+func ReadClusterNameByAlias(alias string) (clusterName string, err error) {
+	query := `
+		select
+			cluster_name
+		from
+			cluster_alias
+		where
+			alias = ?
+			or cluster_name = ?
+		`
+	err = db.QueryOrchestrator(query, sqlutils.Args(alias, alias), func(m sqlutils.RowMap) error {
+		clusterName = m.GetString("cluster_name")
+		return nil
+	})
+	if err != nil {
+		return "", err
+	}
+	if clusterName == "" {
+		err = fmt.Errorf("No cluster found for alias %s", alias)
+	}
+	return clusterName, err
+}
+
+// DeduceClusterName attempts to resolve a cluster name given a name or alias.
+// If unsuccessful to match by alias, the function returns the same given string
+func DeduceClusterName(nameOrAlias string) (clusterName string, err error) {
+	if nameOrAlias == "" {
+		return "", fmt.Errorf("empty cluster name")
+	}
+	if name, err := ReadClusterNameByAlias(nameOrAlias); err == nil {
+		return name, nil
+	}
+	return nameOrAlias, nil
+}
+
+// ReadAliasByClusterName returns the cluster alias for the given cluster name,
+// or the cluster name itself if not explicit alias found
+func ReadAliasByClusterName(clusterName string) (alias string, err error) {
+	alias = clusterName // default return value
+	query := `
+		select
+			alias
+		from
+			cluster_alias
+		where
+			cluster_name = ?
+		`
+	err = db.QueryOrchestrator(query, sqlutils.Args(clusterName), func(m sqlutils.RowMap) error {
+		alias = m.GetString("alias")
+		return nil
+	})
+	return clusterName, err
+}
+
+// WriteClusterAlias will write (and override) a single cluster name mapping
+func writeClusterAlias(clusterName string, alias string) error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+			replace into
+					cluster_alias (cluster_name, alias, last_registered)
+				values
+					(?, ?, now())
+			`,
+			clusterName, alias)
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
+
+// writeClusterAliasManualOverride will write (and override) a single cluster name mapping
+func writeClusterAliasManualOverride(clusterName string, alias string) error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+			replace into
+					cluster_alias_override (cluster_name, alias)
+				values
+					(?, ?)
+			`,
+			clusterName, alias)
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
+
+// UpdateClusterAliases writes down the cluster_alias table based on information
+// gained from database_instance
+func UpdateClusterAliases() error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+			replace into
+					cluster_alias (alias, cluster_name, last_registered)
+				select
+				    suggested_cluster_alias,
+						cluster_name,
+						now()
+					from
+				    database_instance
+				    left join database_instance_downtime using (hostname, port)
+				  where
+				    suggested_cluster_alias!=''
+						/* exclude newly demoted, downtimed masters */
+						and ifnull(
+								database_instance_downtime.downtime_active = 1
+								and database_instance_downtime.end_timestamp > now()
+								and database_instance_downtime.reason = ?
+							, 0) = 0
+					order by
+						ifnull(last_checked <= last_seen, 0) asc,
+						read_only desc,
+						num_slave_hosts asc
+			`, DowntimeLostInRecoveryMessage)
+		return log.Errore(err)
+	}
+	if err := ExecDBWriteFunc(writeFunc); err != nil {
+		return err
+	}
+	writeFunc = func() error {
+		// Handling the case where no cluster alias exists: we write a dummy alias in the form of the real cluster name.
+		_, err := db.ExecOrchestrator(`
+			replace into
+					cluster_alias (alias, cluster_name, last_registered)
+				select
+						cluster_name as alias, cluster_name, now()
+				  from
+				    database_instance
+				  group by
+				    cluster_name
+					having
+						sum(suggested_cluster_alias = '') = count(*)
+			`)
+		return log.Errore(err)
+	}
+	if err := ExecDBWriteFunc(writeFunc); err != nil {
+		return err
+	}
+	return nil
+}
+
+// ReplaceAliasClusterName replaces alis mapping of one cluster name onto a new cluster name.
+// Used in topology failover/recovery
+func ReplaceAliasClusterName(oldClusterName string, newClusterName string) (err error) {
+	{
+		writeFunc := func() error {
+			_, err := db.ExecOrchestrator(`
+			update cluster_alias
+				set cluster_name = ?
+				where cluster_name = ?
+			`,
+				newClusterName, oldClusterName)
+			return log.Errore(err)
+		}
+		err = ExecDBWriteFunc(writeFunc)
+	}
+	{
+		writeFunc := func() error {
+			_, err := db.ExecOrchestrator(`
+			update cluster_alias_override
+				set cluster_name = ?
+				where cluster_name = ?
+			`,
+				newClusterName, oldClusterName)
+			return log.Errore(err)
+		}
+		if ferr := ExecDBWriteFunc(writeFunc); ferr != nil {
+			err = ferr
+		}
+	}
+	return err
+}
+
+// ReadUnambiguousSuggestedClusterAliases reads potential master hostname:port who have suggested cluster aliases,
+// where no one else shares said suggested cluster alias. Such hostname:port are likely true owners
+// of the alias.
+func ReadUnambiguousSuggestedClusterAliases() (result map[string]InstanceKey, err error) {
+	result = map[string]InstanceKey{}
+
+	query := `
+		select
+			suggested_cluster_alias,
+			min(hostname) as hostname,
+			min(port) as port
+		from
+			database_instance
+		where
+			suggested_cluster_alias != ''
+			and replication_depth=0
+		group by
+			suggested_cluster_alias
+		having
+			count(*) = 1
+		`
+	err = db.QueryOrchestrator(query, sqlutils.Args(), func(m sqlutils.RowMap) error {
+		key := InstanceKey{Hostname: m.GetString("hostname"), Port: m.GetInt("port")}
+		suggestedAlias := m.GetString("suggested_cluster_alias")
+		result[suggestedAlias] = key
+		return nil
+	})
+	return result, err
+}
--- a/go/vt/orchestrator/inst/cluster_domain_dao.go
+++ b/go/vt/orchestrator/inst/cluster_domain_dao.go
@ -0,0 +1,54 @@
+/*
+   Copyright 2015 Shlomi Noach, courtesy Booking.com
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/db"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+)
+
+// WriteClusterDomainName will write (and override) the domain name of a cluster
+func WriteClusterDomainName(clusterName string, domainName string) error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+			insert into
+					cluster_domain_name (cluster_name, domain_name, last_registered)
+				values
+					(?, ?, NOW())
+				on duplicate key update
+					domain_name=values(domain_name),
+					last_registered=values(last_registered)
+			`,
+			clusterName, domainName)
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
+
+// ExpireClusterDomainName expires cluster_domain_name entries that haven't been updated recently.
+func ExpireClusterDomainName() error {
+	writeFunc := func() error {
+		_, err := db.ExecOrchestrator(`
+    	delete from cluster_domain_name
+				where last_registered < NOW() - INTERVAL ? MINUTE
+				`, config.Config.ExpiryHostnameResolvesMinutes,
+		)
+		return log.Errore(err)
+	}
+	return ExecDBWriteFunc(writeFunc)
+}
--- a/go/vt/orchestrator/inst/cluster_test.go
+++ b/go/vt/orchestrator/inst/cluster_test.go
@ -0,0 +1,84 @@
+/*
+   Copyright 2014 Outbrain Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package inst
+
+import (
+	"fmt"
+
+	"testing"
+
+	"vitess.io/vitess/go/vt/orchestrator/config"
+	"vitess.io/vitess/go/vt/orchestrator/external/golib/log"
+	test "vitess.io/vitess/go/vt/orchestrator/external/golib/tests"
+)
+
+var masterKey = InstanceKey{Hostname: "host1", Port: 3306}
+
+func init() {
+	config.Config.HostnameResolveMethod = "none"
+	config.Config.KVClusterMasterPrefix = "test/master/"
+	config.MarkConfigurationLoaded()
+	log.SetLevel(log.ERROR)
+}
+
+func TestGetClusterMasterKVKey(t *testing.T) {
+	kvKey := GetClusterMasterKVKey("foo")
+	test.S(t).ExpectEquals(kvKey, "test/master/foo")
+}
+
+func TestGetClusterMasterKVPair(t *testing.T) {
+	{
+		kvPair := getClusterMasterKVPair("myalias", &masterKey)
+		test.S(t).ExpectNotNil(kvPair)
+		test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias")
+		test.S(t).ExpectEquals(kvPair.Value, masterKey.StringCode())
+	}
+	{
+		kvPair := getClusterMasterKVPair("", &masterKey)
+		test.S(t).ExpectTrue(kvPair == nil)
+	}
+	{
+		kvPair := getClusterMasterKVPair("myalias", nil)
+		test.S(t).ExpectTrue(kvPair == nil)
+	}
+}
+
+func TestGetClusterMasterKVPairs(t *testing.T) {
+	kvPairs := GetClusterMasterKVPairs("myalias", &masterKey)
+	test.S(t).ExpectTrue(len(kvPairs) >= 2)
+
+	{
+		kvPair := kvPairs[0]
+		test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias")
+		test.S(t).ExpectEquals(kvPair.Value, masterKey.StringCode())
+	}
+	{
+		kvPair := kvPairs[1]
+		test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias/hostname")
+		test.S(t).ExpectEquals(kvPair.Value, masterKey.Hostname)
+	}
+	{
+		kvPair := kvPairs[2]
+		test.S(t).ExpectEquals(kvPair.Key, "test/master/myalias/port")
+		test.S(t).ExpectEquals(kvPair.Value, fmt.Sprintf("%d", masterKey.Port))
+	}
+}
+
+func TestGetClusterMasterKVPairs2(t *testing.T) {
+	kvPairs := GetClusterMasterKVPairs("", &masterKey)
+	test.S(t).ExpectEquals(len(kvPairs), 0)
+}
--- a/Показать больше
+++ b/Показать больше