зеркало из https://github.com/mozilla/mig.git
[medium] add "idle" status to agents that stop sending heartbeats
This commit is contained in:
Родитель
ddbf9745ee
Коммит
bd6d224822
|
@ -362,7 +362,7 @@ GET <root>/search
|
|||
Status depends on the type. Below are the available statuses per type:
|
||||
|
||||
- `action`: init, preparing, invalid, inflight, completed
|
||||
- `agent`: online, upgraded, destroyed, offline
|
||||
- `agent`: online, upgraded, destroyed, offline, idle
|
||||
- `command`: prepared, sent, success, timeout, cancelled, expired, failed
|
||||
- `investigator`: active, disabled
|
||||
|
||||
|
|
|
@ -391,7 +391,7 @@ http://localhost:1664/api/v1/investigator/create/</code></pre>
|
|||
<blockquote>
|
||||
<ul>
|
||||
<li><cite>action</cite>: init, preparing, invalid, inflight, completed</li>
|
||||
<li><cite>agent</cite>: online, upgraded, destroyed, offline</li>
|
||||
<li><cite>agent</cite>: online, upgraded, destroyed, offline, idle</li>
|
||||
<li><cite>command</cite>: prepared, sent, success, timeout, cancelled, expired, failed</li>
|
||||
<li><cite>investigator</cite>: active, disabled</li>
|
||||
</ul>
|
||||
|
|
|
@ -12,6 +12,7 @@ const (
|
|||
AgtStatusUpgraded string = "upgraded"
|
||||
AgtStatusDestroyed string = "destroyed"
|
||||
AgtStatusOffline string = "offline"
|
||||
AgtStatusIdle string = "idle"
|
||||
)
|
||||
|
||||
// Agent stores the description of an agent and serves as a canvas
|
||||
|
|
|
@ -171,11 +171,11 @@ func (db *DB) ActiveAgentsByTarget(target string) (agents []mig.Agent, err error
|
|||
_ = txn.Rollback()
|
||||
return
|
||||
}
|
||||
rows, err := txn.Query(`SELECT DISTINCT ON (queueloc) id, name, queueloc, os, version, pid,
|
||||
rows, err := txn.Query(fmt.Sprintf(`SELECT DISTINCT ON (queueloc) id, name, queueloc, os, version, pid,
|
||||
starttime, destructiontime, heartbeattime, status
|
||||
FROM agents
|
||||
WHERE agents.status = 'online' AND (` + target + `)
|
||||
ORDER BY agents.queueloc, agents.heartbeattime DESC`)
|
||||
WHERE agents.status IN ('%s', '%s') AND (%s)
|
||||
ORDER BY agents.queueloc, agents.heartbeattime DESC`, mig.AgtStatusOnline, mig.AgtStatusIdle, target))
|
||||
if err != nil {
|
||||
_ = txn.Rollback()
|
||||
err = fmt.Errorf("Error while finding agents: '%v'", err)
|
||||
|
@ -312,13 +312,24 @@ func (db *DB) CountDisappearedAgents(seenSince, activeSince time.Time) (sum floa
|
|||
return
|
||||
}
|
||||
|
||||
// MarkOfflineAgents updates the status of agents that have not sent a heartbeat since pointInTime
|
||||
// MarkOfflineAgents updates the status of idle agents that have not sent a heartbeat since pointInTime
|
||||
func (db *DB) MarkOfflineAgents(pointInTime time.Time) (err error) {
|
||||
_, err = db.c.Exec(`UPDATE agents SET status=$1
|
||||
WHERE heartbeattime<$2 AND status!=$3`,
|
||||
mig.AgtStatusOffline, pointInTime, mig.AgtStatusOffline)
|
||||
WHERE heartbeattime<$2 AND status=$3`,
|
||||
mig.AgtStatusOffline, pointInTime, mig.AgtStatusIdle)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to mark agents as offline in database: '%v'", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarkIdleAgents updates the status of online agents that have not sent a heartbeat since pointInTime
|
||||
func (db *DB) MarkIdleAgents(pointInTime time.Time) (err error) {
|
||||
_, err = db.c.Exec(`UPDATE agents SET status=$1
|
||||
WHERE heartbeattime<$2 AND status=$3`,
|
||||
mig.AgtStatusIdle, pointInTime, mig.AgtStatusOnline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to mark agents as idle in database: '%v'", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -46,7 +46,11 @@ func spoolInspection(ctx Context) (err error) {
|
|||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = timeoutAgents(ctx)
|
||||
err = markOfflineAgents(ctx)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = markIdleAgents(ctx)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
@ -215,13 +219,13 @@ func cleanDir(ctx Context, targetDir string) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// timeoutAgents updates the status of agents that are no longer heartbeating to "offline"
|
||||
func timeoutAgents(ctx Context) (err error) {
|
||||
// markOfflineAgents updates the status of idle agents that passed the agent timeout to "offline"
|
||||
func markOfflineAgents(ctx Context) (err error) {
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("timeoutAgents() -> %v", e)
|
||||
err = fmt.Errorf("markOfflineAgents() -> %v", e)
|
||||
}
|
||||
ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: "leaving timeoutAgents()"}.Debug()
|
||||
ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: "leaving markOfflineAgents()"}.Debug()
|
||||
}()
|
||||
timeOutPeriod, err := time.ParseDuration(ctx.Agent.TimeOut)
|
||||
if err != nil {
|
||||
|
@ -234,3 +238,23 @@ func timeoutAgents(ctx Context) (err error) {
|
|||
}
|
||||
return
|
||||
}
|
||||
|
||||
// markIdleAgents updates the status of agents that stopped sending heartbeats
|
||||
func markIdleAgents(ctx Context) (err error) {
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("markIdleAgents() -> %v", e)
|
||||
}
|
||||
ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: "leaving markIdleAgents()"}.Debug()
|
||||
}()
|
||||
hbFreq, err := time.ParseDuration(ctx.Agent.HeartbeatFreq)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
pointInTime := time.Now().Add(-hbFreq * 5)
|
||||
err = ctx.DB.MarkIdleAgents(pointInTime)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче