vitess-gh/go/vt/mysqlctl/clone.go

473 строки
14 KiB
Go

// Copyright 2012, Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mysqlctl
import (
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
log "github.com/golang/glog"
"github.com/youtube/vitess/go/ioutil2"
"github.com/youtube/vitess/go/vt/hook"
"github.com/youtube/vitess/go/vt/mysqlctl/proto"
)
// These methods deal with cloning a running instance of mysql.
const (
maxLagSeconds = 5
)
const (
SnapshotManifestFile = "snapshot_manifest.json"
)
// Validate that this instance is a reasonable source of data.
func (mysqld *Mysqld) validateCloneSource(serverMode bool, hookExtraEnv map[string]string) error {
// NOTE(msolomon) Removing this check for now - I don't see the value of validating this.
// // needs to be master, or slave that's not too far behind
// slaveStatus, err := mysqld.slaveStatus()
// if err != nil {
// if err != ErrNotSlave {
// return fmt.Errorf("mysqlctl: validateCloneSource failed, %v", err)
// }
// } else {
// lagSeconds, _ := strconv.Atoi(slaveStatus["seconds_behind_master"])
// if lagSeconds > maxLagSeconds {
// return fmt.Errorf("mysqlctl: validateCloneSource failed, lag_seconds exceed maximum tolerance (%v)", lagSeconds)
// }
// }
// make sure we can write locally
if err := mysqld.ValidateSnapshotPath(); err != nil {
return err
}
// run a hook to check local things
// FIXME(alainjobart) What other parameters do we have to
// provide? dbname, host, socket?
params := make([]string, 0, 1)
if serverMode {
params = append(params, "--server-mode")
}
h := hook.NewHook("preflight_snapshot", params)
h.ExtraEnv = hookExtraEnv
if err := h.ExecuteOptional(); err != nil {
return err
}
// FIXME(msolomon) check free space based on an estimate of the current
// size of the db files.
// Also, check that we aren't already cloning/compressing or acting as a
// source. Mysqld being down isn't enough, presumably that will be
// restarted as soon as the snapshot is taken.
return nil
}
func (mysqld *Mysqld) ValidateCloneTarget(hookExtraEnv map[string]string) error {
// run a hook to check local things
h := hook.NewSimpleHook("preflight_restore")
h.ExtraEnv = hookExtraEnv
if err := h.ExecuteOptional(); err != nil {
return err
}
qr, err := mysqld.fetchSuperQuery("SHOW DATABASES")
if err != nil {
return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err)
}
for _, row := range qr.Rows {
if strings.HasPrefix(row[0].String(), "vt_") {
dbName := row[0].String()
tableQr, err := mysqld.fetchSuperQuery("SHOW TABLES FROM " + dbName)
if err != nil {
return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err)
} else if len(tableQr.Rows) == 0 {
// no tables == empty db, all is well
continue
}
return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, found active db %v", dbName)
}
}
return nil
}
func findFilesToServe(srcDir, dstDir string, compress bool) ([]string, []string, error) {
fiList, err := ioutil.ReadDir(srcDir)
if err != nil {
return nil, nil, err
}
sources := make([]string, 0, len(fiList))
destinations := make([]string, 0, len(fiList))
for _, fi := range fiList {
if !fi.IsDir() {
srcPath := path.Join(srcDir, fi.Name())
var dstPath string
if compress {
dstPath = path.Join(dstDir, fi.Name()+".gz")
} else {
dstPath = path.Join(dstDir, fi.Name())
}
sources = append(sources, srcPath)
destinations = append(destinations, dstPath)
}
}
return sources, destinations, nil
}
func (mysqld *Mysqld) FindVtDatabases() ([]string, error) {
fiList, err := ioutil.ReadDir(mysqld.config.DataDir)
if err != nil {
return nil, err
}
dbNames := make([]string, 0, 16)
for _, fi := range fiList {
if strings.HasSuffix(fi.Name(), "vt_") {
dbNames = append(dbNames, fi.Name())
}
}
return dbNames, nil
}
func (mysqld *Mysqld) createSnapshot(concurrency int, serverMode bool) ([]SnapshotFile, error) {
sources := make([]string, 0, 128)
destinations := make([]string, 0, 128)
// clean out and start fresh
log.Infof("removing previous snapshots: %v", mysqld.SnapshotDir)
if err := os.RemoveAll(mysqld.SnapshotDir); err != nil {
return nil, err
}
// FIXME(msolomon) innodb paths must match patterns in mycnf -
// probably belongs as a derived path.
type snapPair struct{ srcDir, dstDir string }
dps := []snapPair{
{mysqld.config.InnodbDataHomeDir, path.Join(mysqld.SnapshotDir, innodbDataSubdir)},
{mysqld.config.InnodbLogGroupHomeDir, path.Join(mysqld.SnapshotDir, innodbLogSubdir)},
}
dataDirEntries, err := ioutil.ReadDir(mysqld.config.DataDir)
if err != nil {
return nil, err
}
for _, de := range dataDirEntries {
dbDirPath := path.Join(mysqld.config.DataDir, de.Name())
// If this is not a directory, try to eval it as a syslink.
if !de.IsDir() {
dbDirPath, err = filepath.EvalSymlinks(dbDirPath)
if err != nil {
return nil, err
}
de, err = os.Stat(dbDirPath)
if err != nil {
return nil, err
}
}
if de.IsDir() {
// Copy anything that defines a db.opt file - that includes empty databases.
_, err := os.Stat(path.Join(dbDirPath, "db.opt"))
if err == nil {
dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())})
} else {
// Look for at least one .frm file
dbDirEntries, err := ioutil.ReadDir(dbDirPath)
if err == nil {
for _, dbEntry := range dbDirEntries {
if strings.HasSuffix(dbEntry.Name(), ".frm") {
dps = append(dps, snapPair{dbDirPath, path.Join(mysqld.SnapshotDir, dataDir, de.Name())})
break
}
}
} else {
return nil, err
}
}
}
}
for _, dp := range dps {
if err := os.MkdirAll(dp.dstDir, 0775); err != nil {
return nil, err
}
if s, d, err := findFilesToServe(dp.srcDir, dp.dstDir, !serverMode); err != nil {
return nil, err
} else {
sources = append(sources, s...)
destinations = append(destinations, d...)
}
}
return newSnapshotFiles(sources, destinations, mysqld.SnapshotDir, concurrency, !serverMode)
}
// This function runs on the machine acting as the source for the clone.
//
// Check master/slave status and determine restore needs.
// If this instance is a slave, stop replication, otherwise place in read-only mode.
// Record replication position.
// Shutdown mysql
// Check paths for storing data
//
// Depending on the serverMode flag, we do the following:
// serverMode = false:
// Compress /vt/vt_[0-9a-f]+/data/vt_.+
// Compute hash (of compressed files, as we serve .gz files here)
// Place in /vt/clone_src where they will be served by http server (not rpc)
// Restart mysql
// serverMode = true:
// Make symlinks for /vt/vt_[0-9a-f]+/data/vt_.+ to innodb files
// Compute hash (of uncompressed files, as we serve uncompressed files)
// Place symlinks in /vt/clone_src where they will be served by http server
// Leave mysql stopped, return slaveStartRequired, readOnly
func (mysqld *Mysqld) CreateSnapshot(dbName, sourceAddr string, allowHierarchicalReplication bool, concurrency int, serverMode bool, hookExtraEnv map[string]string) (snapshotManifestUrlPath string, slaveStartRequired, readOnly bool, err error) {
if dbName == "" {
return "", false, false, errors.New("CreateSnapshot failed: no database name provided")
}
if err = mysqld.validateCloneSource(serverMode, hookExtraEnv); err != nil {
return
}
// save initial state so we can restore on Start()
slaveStartRequired = false
sourceIsMaster := false
readOnly = true
slaveStatus, slaveErr := mysqld.slaveStatus()
if slaveErr == nil {
slaveStartRequired = (slaveStatus["Slave_IO_Running"] == "Yes" && slaveStatus["Slave_SQL_Running"] == "Yes")
} else if slaveErr == ErrNotSlave {
sourceIsMaster = true
} else {
// If we can't get any data, just fail.
return
}
readOnly, err = mysqld.IsReadOnly()
if err != nil {
return
}
// Stop sources of writes so we can get a consistent replication position.
// If the source is a slave use the master replication position
// unless we are allowing hierachical replicas.
masterAddr := ""
var replicationPosition *proto.ReplicationPosition
if sourceIsMaster {
if err = mysqld.SetReadOnly(true); err != nil {
return
}
replicationPosition, err = mysqld.MasterStatus()
if err != nil {
return
}
masterAddr = mysqld.IpAddr()
} else {
if err = mysqld.StopSlave(hookExtraEnv); err != nil {
return
}
replicationPosition, err = mysqld.SlaveStatus()
if err != nil {
return
}
// We are a slave, check our replication strategy before
// choosing the master address.
if allowHierarchicalReplication {
masterAddr = mysqld.IpAddr()
} else {
masterAddr, err = mysqld.GetMasterAddr()
if err != nil {
return
}
}
}
if err = mysqld.Shutdown(true, MysqlWaitTime); err != nil {
return
}
var smFile string
dataFiles, snapshotErr := mysqld.createSnapshot(concurrency, serverMode)
if snapshotErr != nil {
log.Errorf("CreateSnapshot failed: %v", snapshotErr)
} else {
var sm *SnapshotManifest
sm, snapshotErr = newSnapshotManifest(sourceAddr, mysqld.IpAddr(),
masterAddr, dbName, dataFiles, replicationPosition, nil)
if snapshotErr != nil {
log.Errorf("CreateSnapshot failed: %v", snapshotErr)
} else {
smFile = path.Join(mysqld.SnapshotDir, SnapshotManifestFile)
if snapshotErr = writeJson(smFile, sm); snapshotErr != nil {
log.Errorf("CreateSnapshot failed: %v", snapshotErr)
}
}
}
// restore our state if required
if serverMode && snapshotErr == nil {
log.Infof("server mode snapshot worked, not restarting mysql")
} else {
if err = mysqld.SnapshotSourceEnd(slaveStartRequired, readOnly, false /*deleteSnapshot*/, hookExtraEnv); err != nil {
return
}
}
if snapshotErr != nil {
return "", slaveStartRequired, readOnly, snapshotErr
}
relative, err := filepath.Rel(mysqld.SnapshotDir, smFile)
if err != nil {
return "", slaveStartRequired, readOnly, nil
}
return path.Join(SnapshotURLPath, relative), slaveStartRequired, readOnly, nil
}
func (mysqld *Mysqld) SnapshotSourceEnd(slaveStartRequired, readOnly, deleteSnapshot bool, hookExtraEnv map[string]string) error {
if deleteSnapshot {
// clean out our files
log.Infof("removing snapshot links: %v", mysqld.SnapshotDir)
if err := os.RemoveAll(mysqld.SnapshotDir); err != nil {
log.Warningf("failed to remove old snapshot: %v", err)
return err
}
}
// Try to restart mysqld
if err := mysqld.Start(MysqlWaitTime); err != nil {
return err
}
// Restore original mysqld state that we saved above.
if slaveStartRequired {
if err := mysqld.StartSlave(hookExtraEnv); err != nil {
return err
}
// this should be quick, but we might as well just wait
if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
return err
}
}
// And set read-only mode
if err := mysqld.SetReadOnly(readOnly); err != nil {
return err
}
return nil
}
func writeJson(filename string, x interface{}) error {
data, err := json.MarshalIndent(x, " ", " ")
if err != nil {
return err
}
return ioutil2.WriteFileAtomic(filename, data, 0660)
}
func ReadSnapshotManifest(filename string) (*SnapshotManifest, error) {
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
sm := new(SnapshotManifest)
if err = json.Unmarshal(data, sm); err != nil {
return nil, fmt.Errorf("ReadSnapshotManifest failed: %v %v", filename, err)
}
return sm, nil
}
// This piece runs on the presumably empty machine acting as the target in the
// create replica action.
//
// validate target (self)
// shutdown_mysql()
// create temp data directory /vt/target/vt_<keyspace>
// copy compressed data files via HTTP
// verify hash of compressed files
// uncompress into /vt/vt_<target-uid>/data/vt_<keyspace>
// start_mysql()
// clean up compressed files
func (mysqld *Mysqld) RestoreFromSnapshot(snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error {
if snapshotManifest == nil {
return errors.New("RestoreFromSnapshot: nil snapshotManifest")
}
log.V(6).Infof("ValidateCloneTarget")
if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil {
return err
}
log.V(6).Infof("Shutdown mysqld")
if err := mysqld.Shutdown(true, MysqlWaitTime); err != nil {
return err
}
log.V(6).Infof("Fetch snapshot")
if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil {
return err
}
log.V(6).Infof("Restart mysqld")
if err := mysqld.Start(MysqlWaitTime); err != nil {
return err
}
cmdList, err := StartReplicationCommands(mysqld, snapshotManifest.ReplicationState)
if err != nil {
return err
}
if err := mysqld.executeSuperQueryList(cmdList); err != nil {
return err
}
if !dontWaitForSlaveStart {
if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
return err
}
}
h := hook.NewSimpleHook("postflight_restore")
h.ExtraEnv = hookExtraEnv
if err := h.ExecuteOptional(); err != nil {
return err
}
return nil
}
func (mysqld *Mysqld) fetchSnapshot(snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int) error {
replicaDbPath := path.Join(mysqld.config.DataDir, snapshotManifest.DbName)
cleanDirs := []string{mysqld.SnapshotDir, replicaDbPath,
mysqld.config.InnodbDataHomeDir, mysqld.config.InnodbLogGroupHomeDir}
// clean out and start fresh
// FIXME(msolomon) this might be changed to allow partial recovery, but at that point
// we are starting to reimplement rsync.
for _, dir := range cleanDirs {
if err := os.RemoveAll(dir); err != nil {
return err
}
if err := os.MkdirAll(dir, 0775); err != nil {
return err
}
}
return fetchFiles(snapshotManifest, mysqld.TabletDir, fetchConcurrency, fetchRetryCount)
}