Signed-off-by: Lee Yi Jie Joel <lee.yi.jie.joel@gmail.com>
This commit is contained in:
Lee Yi Jie Joel 2020-01-02 14:10:30 -08:00
Родитель 85fc9674ec
Коммит 615dc52d77
12 изменённых файлов: 1051 добавлений и 0 удалений

123
examples/are-you-alive/Gopkg.lock сгенерированный Normal file
Просмотреть файл

@ -0,0 +1,123 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
digest = "1:d6afaeed1502aa28e80a4ed0981d570ad91b2579193404256ce672ed0a609e0d"
name = "github.com/beorn7/perks"
packages = ["quantile"]
pruneopts = "UT"
revision = "4b2b341e8d7715fae06375aa633dbb6e91b3fb46"
version = "v1.0.0"
[[projects]]
digest = "1:ec6f9bf5e274c833c911923c9193867f3f18788c461f76f05f62bb1510e0ae65"
name = "github.com/go-sql-driver/mysql"
packages = ["."]
pruneopts = "UT"
revision = "72cd26f257d44c1114970e19afddcd812016007e"
version = "v1.4.1"
[[projects]]
digest = "1:318f1c959a8a740366fce4b1e1eb2fd914036b4af58fbd0a003349b305f118ad"
name = "github.com/golang/protobuf"
packages = ["proto"]
pruneopts = "UT"
revision = "b5d812f8a3706043e23a9cd5babf2e5423744d30"
version = "v1.3.1"
[[projects]]
digest = "1:31e761d97c76151dde79e9d28964a812c46efc5baee4085b86f68f0c654450de"
name = "github.com/konsorten/go-windows-terminal-sequences"
packages = ["."]
pruneopts = "UT"
revision = "f55edac94c9bbba5d6182a4be46d86a2c9b5b50e"
version = "v1.0.2"
[[projects]]
digest = "1:ff5ebae34cfbf047d505ee150de27e60570e8c394b3b8fdbb720ff6ac71985fc"
name = "github.com/matttproud/golang_protobuf_extensions"
packages = ["pbutil"]
pruneopts = "UT"
revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
version = "v1.0.1"
[[projects]]
digest = "1:1db19cfa69213ff994b19451023750122862f39643a5f37728126e1b5180eaac"
name = "github.com/prometheus/client_golang"
packages = [
"prometheus",
"prometheus/internal",
"prometheus/promauto",
"prometheus/promhttp",
]
pruneopts = "UT"
revision = "50c4339db732beb2165735d2cde0bff78eb3c5a5"
version = "v0.9.3"
[[projects]]
branch = "master"
digest = "1:2d5cd61daa5565187e1d96bae64dbbc6080dacf741448e9629c64fd93203b0d4"
name = "github.com/prometheus/client_model"
packages = ["go"]
pruneopts = "UT"
revision = "fd36f4220a901265f90734c3183c5f0c91daa0b8"
[[projects]]
digest = "1:8dcedf2e8f06c7f94e48267dea0bc0be261fa97b377f3ae3e87843a92a549481"
name = "github.com/prometheus/common"
packages = [
"expfmt",
"internal/bitbucket.org/ww/goautoneg",
"model",
]
pruneopts = "UT"
revision = "17f5ca1748182ddf24fc33a5a7caaaf790a52fcc"
version = "v0.4.1"
[[projects]]
branch = "master"
digest = "1:77b841555ca2ce5a45d7ba8b3eea5bd6e2e50c139d979b9b10f57a30fbd1132c"
name = "github.com/prometheus/procfs"
packages = [
".",
"internal/fs",
]
pruneopts = "UT"
revision = "a7aeb8df3389edaf53efcc319ad0063cb27c3960"
[[projects]]
digest = "1:04457f9f6f3ffc5fea48e71d62f2ca256637dee0a04d710288e27e05c8b41976"
name = "github.com/sirupsen/logrus"
packages = ["."]
pruneopts = "UT"
revision = "839c75faf7f98a33d445d181f3018b5c3409a45e"
version = "v1.4.2"
[[projects]]
branch = "master"
digest = "1:9ca267f99487ef450fff0c2a49eaf0788d9ff3562bbaeff19f564d380f84bc6d"
name = "golang.org/x/sys"
packages = ["unix"]
pruneopts = "UT"
revision = "dbbf3f1254d491605cf4a0034ce25d0dc71b0c58"
[[projects]]
digest = "1:c25289f43ac4a68d88b02245742347c94f1e108c534dda442188015ff80669b3"
name = "google.golang.org/appengine"
packages = ["cloudsql"]
pruneopts = "UT"
revision = "4c25cacc810c02874000e4f7071286a8e96b2515"
version = "v1.6.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
input-imports = [
"github.com/go-sql-driver/mysql",
"github.com/prometheus/client_golang/prometheus",
"github.com/prometheus/client_golang/prometheus/promauto",
"github.com/prometheus/client_golang/prometheus/promhttp",
"github.com/sirupsen/logrus",
]
solver-name = "gps-cdcl"
solver-version = 1

Просмотреть файл

@ -0,0 +1,29 @@
# Gopkg.toml example
#
# Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
#
# [prune]
# non-go = false
# go-tests = true
# unused-packages = true
[prune]
go-tests = true
unused-packages = true

Просмотреть файл

@ -0,0 +1,15 @@
# We are using the "dev" project in our registry for this
NAME := "us.gcr.io/planetscale-dev/are-you-alive"
TAG := $$(git log -1 --pretty=%H)
IMG := ${NAME}:${TAG}
LATEST := ${NAME}:latest
.PHONY: build push
build:
@docker build -f build/release/Dockerfile -t ${IMG} .
@docker tag ${IMG} ${LATEST}
push:
@docker push ${IMG}
@docker push ${LATEST}

Просмотреть файл

@ -0,0 +1,84 @@
# Are You Alive?
What does it mean to be alive?
Well we don't know what it means for you, but we know what it means for our
Cloud Database!
This project contains a simulated client application that can be used to measure
the health of a Vitess cluster over time.
## Design
For now, there is a specific database schema and vschema that you must apply to
the database that you are using for this test.
This client application:
1. Hammers the database with random data (not a load test though).
1. Measures all the important things:
- Client connection errors
- Write latency
- Read latency from masters
- Read latency from replicas
- Write errors
- Read errors on masters
- Write errors on replicas
- Errors in other operations on masters and replicas (e.g. COUNT)
- Latency on other operations on masters and replicas (e.g. COUNT)
- Data loss (by writing predictable data and testing for that)
1. Reports all these metrics to Prometheus.
That's it! Keep it as simple and generic as possible, and someday our customers
can use this to test their clusters too!
## Usage
First, [initialize your database with the correct schemas](schemas/README.md).
Run `are-you-alive --help` for usage. You can us the command line flags to
control the dataset size, whether to target reads at masters and replicas, your
mysql connection string, and the rate at which to send requests.
Example:
```
./are-you-alive --mysql_connection_string <mysql_connection_string>
```
Where `<mysql_connection_string>` points to the database you are trying to test,
and everything else will be set to defaults.
## Building
```
dep ensure -v
go build github.com/planetscale/are-you-alive/cmd/are-you-alive
```
## Testing
First, [install docker compose](https://docs.docker.com/compose/install/) and
make sure it's working. Then run:
```
dep ensure -v
docker-compose build
docker-compose up
```
This will create a local mysqld and a local prometheus to scrape the app. It
will also start the app with the `--initialize` flag which tells it to
automatically create the test database. You might have to run this twice to
give mysql a chance to do its first initialization.
After you run docker compose, navigate to `http://localhost:9090` to see
Prometheus and `http://localhost:8080/metrics` to see the raw metrics being
exported.
## Push to Registry
```
make build
make push
```

Просмотреть файл

@ -0,0 +1,352 @@
package main
import (
"database/sql"
"flag"
"fmt"
"github.com/go-sql-driver/mysql"
"github.com/planetscale/are-you-alive/pkg/client"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"math/rand"
"net/http"
"os"
"os/signal"
"sync"
"time"
)
/*
* To measure data loss, we need predictable writes. We do this with "minPage"
* and "maxPage". Once the difference between them is our desired dataset size,
* we can start deleting old records, but we expect to find one record for every
* "page" number between "minPage" and "maxPage".
*
* We don't measure "update loss" with this client right now.
*/
var (
maxPage = 0
minPage = 0
dataLossEvents = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "are_you_alive_data_loss_events",
Help: "Data loss events",
},
[]string{"database_name"},
)
)
func writeNextRecord(environmentName string, connectionString string) error {
// 1. Call monitored client
err := client.Write(environmentName, connectionString, maxPage)
if err != nil {
// Check to see if this is a duplicate key error. We've seen this
// sometimes happen, and when it does this client app gets stuck in an
// infinite loop of failure to write a duplicate key. It's possible
// that happens because a write is succesful but something goes wrong
// before the client recieves a response, so the client thinks the write
// failed and does not increment the count.
//
// So when we specifically see a duplicate key error, assume that's what
// happened, bump the count, and move on.
//
// See https://github.com/planetscale/planetscale-operator/issues/1776
if me, ok := err.(*mysql.MySQLError); ok && me.Number == 1062 {
logrus.WithError(err).Warnf(
"Key '%d' already found, incrementing count", maxPage)
maxPage = maxPage + 1
return nil
}
logrus.WithError(err).Error("Error writing record")
return err
}
// 2. Increment "maxPage"
maxPage = maxPage + 1
return nil
}
func readRandomRecord(environmentName string, connectionString string) error {
// 1. Pick Random Number Between "minPage" and "maxPage"
if minPage == maxPage {
logrus.Warn("Nothing has been inserted yet!")
return nil
}
page := (rand.Int() % (maxPage - minPage)) + minPage
// 2. Read Record
readID, readMsg, err := client.Read(environmentName, connectionString, page)
if err != nil {
if err == sql.ErrNoRows {
// This races with deletion, but if our page is greater than minPage
// we know that it should be in there. If it's less than minPage
// assume we are just racing the deletion goroutine and ignore the
// error.
if page <= minPage {
return nil
}
// For replicas, there is a chance we are suffering from replication
// lag, so ignore the missing row if we are a replica.
// TODO: Should we attempt to roughly figure out replication lag in
// this client, at least to catch major failures? We could probably
// multiply delay by the difference betwen maxCount and the page we
// are trying to read to figure out how long ago the row we were
// trying to write was written.
if client.ParseTabletType(connectionString) == "replica" ||
client.ParseTabletType(connectionString) == "rdonly" {
return nil
}
logrus.WithError(err).WithFields(logrus.Fields{
"page": page,
"minPage": minPage,
"maxPage": maxPage,
}).Error("Query succeeded but record not found, may mean data loss")
dataLossEvents.With(
prometheus.Labels{"database_name": client.ParseDBName(connectionString)}).Inc()
return err
}
logrus.WithError(err).Error("Error reading record")
return err
}
// Add zero here just so the metric exists for this database, even if it's
// zero.
dataLossEvents.With(
prometheus.Labels{"database_name": client.ParseDBName(connectionString)}).Add(0)
logrus.WithFields(logrus.Fields{
"readID": readID,
"readMsg": readMsg,
}).Debug("Read row!")
return nil
}
func runCount(environmentName string, connectionString string) error {
// 1. Run Count
count, err := client.Count(environmentName, connectionString)
if err != nil {
logrus.WithError(err).Error("Error counting records")
return err
}
logrus.WithFields(logrus.Fields{
"count": count,
}).Debug("Counted rows!")
// 2. Log if COUNT != "minPage" - "maxPage"
return nil
}
func deleteLastRecordIfNecessary(environmentName string, connectionString string) error {
// 1. Compare "maxPage" - "minPage" to Desired Dataset Size
if (maxPage - minPage) < *datasetSize {
return nil
}
logrus.WithFields(logrus.Fields{
"current": maxPage - minPage,
"desired": *datasetSize,
}).Debug("Deleting last record")
// 2. Delete Record If We Are Above Desired Size
err := client.Delete(environmentName, connectionString, minPage)
if err != nil {
logrus.WithError(err).Error("Error deleting record")
return err
}
// 3. Increment "minPage"
minPage = minPage + 1
return nil
}
var (
mysqlConnectionString = flag.String(
"mysql_connection_string", "", "Connection string for db to test")
prometheusMetricsAddress = flag.String(
"prometheus_metrics_address", ":8080", "Address on which to serve prometheus metrics")
debug = flag.Bool("debug", false, "Enable debug logging")
useVtgate = flag.Bool("vtgate", false, "Using vtgate (for @master and @replica)")
readFromReplica = flag.Bool("replica", false, "Read from replica")
readFromReadOnly = flag.Bool("rdonly", false, "Read from rdonly")
initialize = flag.Bool("initialize", false, "Initialize database (for testing)")
sleepTime = flag.Int("delay", 1*1000*1000*1000, "Delay in nanoseconds between ops")
datasetSize = flag.Int("dataset_size", 10, "Number of total records in database")
environmentName = flag.String("environment_name", "prod",
"Environment the database is deployed in that this client is pointing at")
)
type runner struct {
connString string
envName string
fn func(string, string) error
errMessage string
sleepTime time.Duration
}
func (r *runner) run() {
for {
time.Sleep(r.sleepTime)
err := r.fn(r.envName, r.connString)
if err != nil {
logrus.WithError(err).Error(r.errMessage)
}
}
}
func waitForCtrlC() {
var endWaiter sync.WaitGroup
endWaiter.Add(1)
var signalChannel chan os.Signal
signalChannel = make(chan os.Signal, 1)
signal.Notify(signalChannel, os.Interrupt)
go func() {
<-signalChannel
endWaiter.Done()
}()
endWaiter.Wait()
}
func runPrometheus() {
http.Handle("/metrics", promhttp.Handler())
logrus.Fatal(http.ListenAndServe(*prometheusMetricsAddress, nil))
}
func main() {
// 0. Handle Arguments
flag.Parse()
if *debug {
logrus.SetLevel(logrus.DebugLevel)
}
logrus.WithFields(logrus.Fields{
"mysqlConnectionString": *mysqlConnectionString,
"prometheusMetricsAddress": *prometheusMetricsAddress,
"debug": *debug,
}).Debug("Command line arguments")
connectionString := ""
if *mysqlConnectionString != "" {
connectionString = *mysqlConnectionString
} else if os.Getenv("MYSQL_CONN_STRING") != "" {
connectionString = os.Getenv("MYSQL_CONN_STRING")
}
masterConnectionString := connectionString
replicaConnectionString := connectionString
rdonlyConnectionString := connectionString
// When using vtgate, we want to append @master and @replica to the DSN, but
// this will fail against normal mysql which we're using for testing. See:
// https://vitess.io/docs/user-guides/faq/#how-do-i-choose-between-master-vs-replica-for-queries
if *useVtgate {
// We need to pass interpolateParams when using a vtgate because
// prepare is not supported.
//
// See:
// - https://github.com/go-sql-driver/mysql/blob/master/README.md#interpolateparams
// - https://github.com/src-d/go-mysql-server/issues/428
// - https://github.com/vitessio/vitess/pull/3862
masterConnectionString = fmt.Sprintf("%s@master?interpolateParams=true", connectionString)
replicaConnectionString = fmt.Sprintf("%s@replica?interpolateParams=true", connectionString)
rdonlyConnectionString = fmt.Sprintf("%s@rdonly?interpolateParams=true", connectionString)
}
fmt.Println("masterConnectionString:", masterConnectionString)
fmt.Println("replicaConnectionString:", replicaConnectionString)
fmt.Println("rdonlyConnectionString:", rdonlyConnectionString)
// 1. Set Up Prometheus Metrics
logrus.Info("Prometheus Go")
go runPrometheus()
// 2. Initialize Database
logrus.Info("Initializing database")
// For local testing, does not initialize vschema
if *initialize {
client.InitializeDatabase(*environmentName, masterConnectionString, "are_you_alive_messages")
}
client.WipeTestTable(*environmentName, masterConnectionString, "are_you_alive_messages")
// 3. Start goroutines to do various things
logrus.Info("Starting client goroutines")
deleter := runner{
connString: masterConnectionString,
envName: *environmentName,
fn: deleteLastRecordIfNecessary,
errMessage: "Recieved error deleting last record",
sleepTime: time.Duration(*sleepTime),
}
go deleter.run()
writer := runner{
connString: masterConnectionString,
envName: *environmentName,
fn: writeNextRecord,
errMessage: "Recieved error writing next record",
sleepTime: time.Duration(*sleepTime),
}
go writer.run()
reader := runner{
connString: masterConnectionString,
envName: *environmentName,
fn: readRandomRecord,
errMessage: "Recieved error reading record",
sleepTime: time.Duration(*sleepTime),
}
go reader.run()
counter := runner{
connString: masterConnectionString,
envName: *environmentName,
fn: runCount,
errMessage: "Recieved error running count",
sleepTime: time.Duration(*sleepTime),
}
go counter.run()
// Only bother starting a replica reader/counter if we are using a vtgate
// and actually are asking to do replica reads
if *useVtgate && *readFromReplica {
replicaReader := runner{
connString: replicaConnectionString,
envName: *environmentName,
fn: readRandomRecord,
errMessage: "Recieved error reading record from replica",
sleepTime: time.Duration(*sleepTime),
}
go replicaReader.run()
replicaRowCounter := runner{
connString: replicaConnectionString,
envName: *environmentName,
fn: runCount,
errMessage: "Recieved error running count on replica",
sleepTime: time.Duration(*sleepTime),
}
go replicaRowCounter.run()
}
// Only bother starting a rdonly reader/counter if we are using a vtgate and
// actually are asking to do rdonly reads
if *useVtgate && *readFromReadOnly {
replicaReader := runner{
connString: rdonlyConnectionString,
envName: *environmentName,
fn: readRandomRecord,
errMessage: "Recieved error reading record from rdonly",
sleepTime: time.Duration(*sleepTime),
}
go replicaReader.run()
replicaRowCounter := runner{
connString: rdonlyConnectionString,
envName: *environmentName,
fn: runCount,
errMessage: "Recieved error running count on rdonly",
sleepTime: time.Duration(*sleepTime),
}
go replicaRowCounter.run()
}
logrus.Info("Press Ctrl+C to end\n")
waitForCtrlC()
logrus.Info("\n")
}

Просмотреть файл

@ -0,0 +1,5 @@
# Kubernetes Deployment
Configuration for deploying `are-you-alive` on Kubernetes.
Also deploys Prometheus and Alertmanager.

Просмотреть файл

@ -0,0 +1,29 @@
# https://www.firehydrant.io/blog/developer-a-go-app-with-docker-compose/
version: '3'
services:
app:
build: build/dev
image: are-you-alive-dev
volumes:
- .:/go/src/github.com/planetscale/are-you-alive
working_dir: /go/src/github.com/planetscale/are-you-alive
environment:
MYSQL_CONN_STRING: root:mysql@tcp(mysql)/testfixture
depends_on:
- mysql
ports:
- 8080:8080
prom:
image: quay.io/prometheus/prometheus:v2.0.0
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus"
ports:
- 9090:9090
depends_on:
- app
mysql:
image: mysql:5.7
environment:
MYSQL_DATABASE: testfixture
MYSQL_ROOT_PASSWORD: mysql

Просмотреть файл

@ -0,0 +1,347 @@
package client
import (
"database/sql"
"fmt"
mysql "github.com/go-sql-driver/mysql"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sirupsen/logrus"
"strings"
)
/*
* This package is meant to provide a client that includes prometheus metrics
* for common database issues.
*/
var (
defaultBuckets = []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}
countErrorLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_count_error_latency_seconds",
Help: "Latency to recieve a count error",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
readErrorLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_read_error_latency_seconds",
Help: "Latency to recieve a read error",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
deleteErrorLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_delete_error_latency_seconds",
Help: "Latency to recieve a delete error",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name"},
)
writeErrorLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_write_error_latency_seconds",
Help: "Latency to recieve a write error",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name"},
)
connectErrorLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_connect_error_latency_seconds",
Help: "Latency to recieve a connect error",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
countLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_count_latency_seconds",
Help: "Time it takes to count to the database",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
readLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_read_latency_seconds",
Help: "Time it takes to read to the database",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
deleteLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_delete_latency_seconds",
Help: "Time it takes to delete to the database",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name"},
)
writeLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_write_latency_seconds",
Help: "Time it takes to write to the database",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name"},
)
connectLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "are_you_alive_connect_latency_seconds",
Help: "Time it takes to connect to the database",
Buckets: defaultBuckets,
},
[]string{"environment_name", "database_name", "tablet_type"},
)
)
// ParseDBName extracts the database name from a mysql connection string.
func ParseDBName(connectionString string) string {
mysqlConfig, err := mysql.ParseDSN(connectionString)
if err != nil {
logrus.WithError(err).Fatal("Error parsing DSN!")
}
return mysqlConfig.DBName
}
// ParseTabletType extracts the tablet type from a vitess specific mysql
// connection string.
//
// See https://vitess.io/docs/faq/queries/ for where these come from.
func ParseTabletType(connectionString string) string {
databaseName := ParseDBName(connectionString)
if strings.HasSuffix(databaseName, "@master") {
return "master"
} else if strings.HasSuffix(databaseName, "@replica") {
return "replica"
} else if strings.HasSuffix(databaseName, "@rdonly") {
return "rdonly"
} else {
return "default"
}
}
func openDatabase(environmentName string, connectionString string) (*sql.DB, error) {
databaseName := ParseDBName(connectionString)
tabletType := ParseTabletType(connectionString)
// NOTE: This is probably not measuring open connections. I think they
// connections are created/fetched from the pool when an operation is
// actually performed. We could force this with a ping probably, but for
// now this is here just as a sanity check that this is actually all
// happening locally. We should just see everything complete within
// milliseconds.
labels := prometheus.Labels{
"environment_name": environmentName,
"database_name": databaseName,
"tablet_type": tabletType}
connectTimer := prometheus.NewTimer(connectLatency.With(labels))
connectErrorTimer := prometheus.NewTimer(connectErrorLatency.With(labels))
db, err := sql.Open("mysql", connectionString)
if err != nil {
logrus.WithError(err).Error("Error connecting to database")
connectErrorTimer.ObserveDuration()
return nil, err
}
connectTimer.ObserveDuration()
return db, nil
}
// InitializeDatabase will connect to the given connectionString, drop the
// given tableName, and recreate it with the schema that the rest of the client
// expects. This is not something any normal client would do but is convenient
// here because we are just using this client for monitoring.
func InitializeDatabase(environmentName string, connectionString string, tableName string) error {
// 0. Create logger
log := logrus.WithField("connection_string", connectionString)
// 1. Open client to database
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
return err
}
defer db.Close()
// 2. Delete test table, but continue if it's not there
if _, err := db.Exec(fmt.Sprintf("DROP TABLE %s", tableName)); err != nil {
log.WithError(err).Warn("Error deleting database")
}
// 3. Create table
createSQL := fmt.Sprintf(
"CREATE TABLE IF NOT EXISTS %s(page INT, message VARCHAR(255) NOT NULL, PRIMARY KEY (page))", tableName)
if _, err := db.Exec(createSQL); err != nil {
log.WithError(err).Error("Error creating database")
return err
}
return nil
}
// WipeTestTable connects to the database given by connectionString and deletes
// everything in the table given by tableName because this client expects the
// table to be empty. No client would normally do this, but it's convenient for
// testing.
func WipeTestTable(environmentName string, connectionString string, tableName string) error {
// 0. Create logger
log := logrus.WithField("connection_string", connectionString)
// 1. Open client to database
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
return err
}
defer db.Close()
// 2. Clear database
if _, err := db.Exec(fmt.Sprintf("DELETE FROM %s", tableName)); err != nil {
log.WithError(err).Warn("Error clearing table")
}
return nil
}
// Write will write the record given by page to the test table in the database
// referenced by connectionString.
func Write(environmentName string, connectionString string, page int) error {
// 0. Create logger
log := logrus.WithField("connection_string", connectionString)
// 1. Open client to database
databaseName := ParseDBName(connectionString)
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
return err
}
defer db.Close()
// 2. Write record
labels := prometheus.Labels{
"environment_name": environmentName,
"database_name": databaseName}
writeTimer := prometheus.NewTimer(writeLatency.With(labels))
writeErrorTimer := prometheus.NewTimer(writeErrorLatency.With(labels))
if _, err := db.Exec("INSERT INTO are_you_alive_messages (page, message) VALUES (?, ?)", page, "foo"); err != nil {
log.WithError(err).Error("Error inserting into database")
writeErrorTimer.ObserveDuration()
return err
}
writeTimer.ObserveDuration()
return nil
}
// Read will read the record given by page from the test table in the database
// referenced by connectionString.
func Read(environmentName string, connectionString string, page int) (int, string, error) {
// 0. Create logger
log := logrus.WithField("connection_string", connectionString)
// 1. Open client to database
databaseName := ParseDBName(connectionString)
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
return 0, "", err
}
defer db.Close()
// 2. Read record
tabletType := ParseTabletType(connectionString)
labels := prometheus.Labels{
"environment_name": environmentName,
"database_name": databaseName,
"tablet_type": tabletType}
readTimer := prometheus.NewTimer(readLatency.With(labels))
readErrorTimer := prometheus.NewTimer(readErrorLatency.With(labels))
row := db.QueryRow("SELECT * FROM are_you_alive_messages WHERE page=?", page)
var readID int
var readMsg string
if err := row.Scan(&readID, &readMsg); err != nil {
if err == sql.ErrNoRows {
// If our error is just that we didn't find anything, don't treat
// this as an error or a success. Just return and let the caller
// deal with it so we don't mess up our metrics.
return 0, "", err
}
log.WithError(err).Error("Error connecting to database")
readErrorTimer.ObserveDuration()
return 0, "", err
}
logrus.WithFields(logrus.Fields{
"readId": readID,
"readMsg": readMsg,
}).Debug("Successfully read row")
readTimer.ObserveDuration()
return readID, readMsg, nil
}
// Count will count all the documents in the test table in the database
// referenced by connectionString.
func Count(environmentName string, connectionString string) (int, error) {
// 0. Create logger
log := logrus.WithField("connection_string", connectionString)
// 1. Open client to database
databaseName := ParseDBName(connectionString)
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
return 0, err
}
defer db.Close()
// 2. Run Count
tabletType := ParseTabletType(connectionString)
labels := prometheus.Labels{
"environment_name": environmentName,
"database_name": databaseName,
"tablet_type": tabletType}
countTimer := prometheus.NewTimer(countLatency.With(labels))
countErrorTimer := prometheus.NewTimer(countErrorLatency.With(labels))
row := db.QueryRow("SELECT COUNT(*) FROM are_you_alive_messages")
var count int
if err := row.Scan(&count); err != nil {
log.WithError(err).Error("Error running count")
countErrorTimer.ObserveDuration()
return 0, err
}
logrus.WithFields(logrus.Fields{
"count": count,
}).Debug("Successfully ran count")
countTimer.ObserveDuration()
return count, nil
}
// Delete will delete the record given by page from the test table in the
// database referenced by connectionString.
func Delete(environmentName string, connectionString string, page int) error {
// 0. Create logger
log := logrus.WithFields(logrus.Fields{
"connection_string": connectionString,
"page": page,
})
// 1. Open client to database
databaseName := ParseDBName(connectionString)
labels := prometheus.Labels{
"environment_name": environmentName,
"database_name": databaseName}
deleteTimer := prometheus.NewTimer(deleteLatency.With(labels))
deleteErrorTimer := prometheus.NewTimer(deleteErrorLatency.With(labels))
db, err := openDatabase(environmentName, connectionString)
if err != nil {
log.WithError(err).Error("Error opening database")
deleteErrorTimer.ObserveDuration()
return err
}
defer db.Close()
// 2. Delete record
if _, err := db.Exec("DELETE FROM are_you_alive_messages WHERE page=?", page); err != nil {
log.WithError(err).Error("Error deleting record")
deleteErrorTimer.ObserveDuration()
return err
}
deleteTimer.ObserveDuration()
return nil
}

Просмотреть файл

@ -0,0 +1,34 @@
# my global config
global:
scrape_interval: 5s # By default, scrape targets every 5 seconds.
evaluation_interval: 5s # By default, scrape targets every 5 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'local-integration-test'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first.rules"
# - "second.rules"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
- job_name: "app"
scrape_interval: "5s"
static_configs:
- targets: ['app:8080']

Просмотреть файл

@ -0,0 +1,9 @@
# Test Schemas
MySQL schemas and Vitess schemas that must be applied to your cluster to use
this test helper. They should be applied using `vtctlclient` like this:
```
vtctlclient -server "<vtctld_server>" ApplySchema -sql "$(cat create_test_table.sql)" <database_name>
vtctlclient -server "<vtctld_server>" ApplyVSchema -vschema "$(cat vschema.json)" <database_name>
```

Просмотреть файл

@ -0,0 +1,6 @@
CREATE TABLE IF NOT EXISTS are_you_alive_messages (
page INT,
message VARCHAR(255) NOT NULL,
PRIMARY KEY (page)
)

Просмотреть файл

@ -0,0 +1,18 @@
{
"sharded": true,
"vindexes": {
"hash": {
"type": "hash"
}
},
"tables": {
"are_you_alive_messages": {
"column_vindexes": [
{
"column": "page",
"name": "hash"
}
]
}
}
}