file: honor matchlimit, uses channels for result processing

The matchlimit option to the file module was not working correctly due
to a couple reasons.

First, it relied on the Totalhits value in the module statistics to
compare the number of hits to the match limit. This value was compiled
in buildResults, so was 0 throughout module execution. Because of this
matchlimit would never be exceeded.

Also, the comparison to Totalhits was only occuring on directory entry.
This means if the match limit was hit while scanning a single directory,
it would continue to scan files exceeding the match limit.

This modifies the way results are processed by Search types, so
individual checks submit matched files via a channel to the parent
Search entry. The Search entry can then maintain a list of matches, and
increment Totalhits as required while the processing is occuring instead
of in buildResults.

Closes #382
This commit is contained in:
Aaron Meihm 2017-08-21 15:37:13 -05:00
Родитель 543fe004fb
Коммит d0e77beaf8
2 изменённых файлов: 189 добавлений и 115 удалений

Просмотреть файл

@ -64,12 +64,12 @@ type run struct {
// parameters describes the parameters the file module uses as input upon // parameters describes the parameters the file module uses as input upon
// invocation // invocation
type parameters struct { type parameters struct {
Searches map[string]Search `json:"searches,omitempty"` Searches map[string]*Search `json:"searches,omitempty"`
} }
func newParameters() *parameters { func newParameters() *parameters {
var p parameters var p parameters
p.Searches = make(map[string]Search) p.Searches = make(map[string]*Search)
return &p return &p
} }
@ -92,6 +92,8 @@ type Search struct {
isactive bool isactive bool
iscurrent bool iscurrent bool
currentdepth uint64 currentdepth uint64
matchChan chan checkMatchNotify // Channel to notify search processor of a check hit
filesMatchingAll []string // If Options.MatchAll, stores files matching all checks
} }
type options struct { type options struct {
@ -128,7 +130,9 @@ const (
checkSHA3_512 checkSHA3_512
) )
// check represents an individual check that is part of a search.
type check struct { type check struct {
checkid int // Internal check ID, set by the search parent
code checkType code checkType
matched uint64 matched uint64
matchedfiles []string matchedfiles []string
@ -137,17 +141,107 @@ type check struct {
minsize, maxsize uint64 minsize, maxsize uint64
minmtime, maxmtime time.Time minmtime, maxmtime time.Time
inversematch, mismatch bool inversematch, mismatch bool
matchChan chan checkMatchNotify
waitNotify chan bool
}
// checkMatchNotify is sent from the check to the parent Search via the checks matchChan to
// notify the Search type's search processor that a match has been found for an individual check.
type checkMatchNotify struct {
checkid int
file string
} }
// pretty much infinity when it comes to file searches // pretty much infinity when it comes to file searches
const unlimited float64 = 1125899906842624 const unlimited float64 = 1125899906842624
// processMatch processes incoming matches from individual checks which are part of the search. It
// also manages the total hit statistics. The match processor does some preprocessing, such as identifying
// files that match all checks for a search if MatchAll is set, to make building the results simpler.
//
// Although this function runs in a goroutine, execution is serialized via a wait channel this function
// will write to when its ready for the next result.
func (s *Search) processMatch() {
for {
var c *check
match := <-s.matchChan
c = nil
for i := range s.checks {
if s.checks[i].checkid == match.checkid {
c = &s.checks[i]
}
}
if c == nil {
// This is fatal, and means we received a result for a check which we
// do not know about
panic("processMatch received check result for invalid check id")
}
// See if we need to add the file for this check, if it already exists we are done
found := false
for _, x := range c.matchedfiles {
if x == match.file {
found = true
break
}
}
if found {
c.waitNotify <- true
continue
}
c.matchedfiles = append(c.matchedfiles, match.file)
c.matched++
// If this search has MatchAll set, see if this file now matches all checks in
// the search. If so, add it to the allMatched list.
if s.Options.MatchAll && !s.allChecksMatched(match.file) {
allmatch := true
for _, c := range s.checks {
if !c.hasMatch(match.file) {
allmatch = false
break
}
}
if allmatch {
s.filesMatchingAll = append(s.filesMatchingAll, match.file)
// Since this should be considered a match now, increment the hits
// counter
stats.Totalhits++
}
} else {
// MatchAll isn't set, so we just count every hit here as a match
stats.Totalhits++
}
c.waitNotify <- true
}
}
// allChecksMatched returns true if the file is in the filesMatchingAll list for a search
func (s *Search) allChecksMatched(file string) bool {
for _, f := range s.filesMatchingAll {
if f == file {
return true
}
}
return false
}
func (s *Search) makeChecks() (err error) { func (s *Search) makeChecks() (err error) {
var nextCheckID int
defer func() { defer func() {
if e := recover(); e != nil { if e := recover(); e != nil {
err = fmt.Errorf("makeChecks() -> %v", e) err = fmt.Errorf("makeChecks() -> %v", e)
} }
}() }()
nextCID := func() check {
ret := check{}
nextCheckID++
ret.checkid = nextCheckID
ret.matchChan = s.matchChan
ret.waitNotify = make(chan bool, 0)
return ret
}
if s.Options.Debug == "print" { if s.Options.Debug == "print" {
debug = true debug = true
} }
@ -161,7 +255,7 @@ func (s *Search) makeChecks() (err error) {
s.Options.MatchLimit = unlimited s.Options.MatchLimit = unlimited
} }
for _, v := range s.Contents { for _, v := range s.Contents {
var c check c := nextCID()
c.code = checkContent c.code = checkContent
c.value = v c.value = v
if len(v) > 1 && v[:1] == "!" { if len(v) > 1 && v[:1] == "!" {
@ -176,7 +270,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.Names { for _, v := range s.Names {
var c check c := nextCID()
c.code = checkName c.code = checkName
c.value = v c.value = v
if len(v) > 1 && v[:1] == "!" { if len(v) > 1 && v[:1] == "!" {
@ -191,7 +285,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.Sizes { for _, v := range s.Sizes {
var c check c := nextCID()
c.code = checkSize c.code = checkSize
c.value = v c.value = v
c.minsize, c.maxsize, err = parseSize(v) c.minsize, c.maxsize, err = parseSize(v)
@ -205,7 +299,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.Modes { for _, v := range s.Modes {
var c check c := nextCID()
c.code = checkMode c.code = checkMode
c.value = v c.value = v
if s.hasMismatch("mode") { if s.hasMismatch("mode") {
@ -216,7 +310,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.Mtimes { for _, v := range s.Mtimes {
var c check c := nextCID()
c.code = checkMtime c.code = checkMtime
c.value = v c.value = v
if s.hasMismatch("mtime") { if s.hasMismatch("mtime") {
@ -230,7 +324,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.MD5 { for _, v := range s.MD5 {
var c check c := nextCID()
c.code = checkMD5 c.code = checkMD5
c.value = strings.ToUpper(v) c.value = strings.ToUpper(v)
if s.hasMismatch("md5") { if s.hasMismatch("md5") {
@ -240,7 +334,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.SHA1 { for _, v := range s.SHA1 {
var c check c := nextCID()
c.code = checkSHA1 c.code = checkSHA1
c.value = strings.ToUpper(v) c.value = strings.ToUpper(v)
if s.hasMismatch("sha1") { if s.hasMismatch("sha1") {
@ -250,7 +344,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.SHA2 { for _, v := range s.SHA2 {
var c check c := nextCID()
c.value = strings.ToUpper(v) c.value = strings.ToUpper(v)
if s.hasMismatch("sha2") { if s.hasMismatch("sha2") {
c.mismatch = true c.mismatch = true
@ -267,7 +361,7 @@ func (s *Search) makeChecks() (err error) {
s.checkmask |= c.code s.checkmask |= c.code
} }
for _, v := range s.SHA3 { for _, v := range s.SHA3 {
var c check c := nextCID()
c.value = strings.ToUpper(v) c.value = strings.ToUpper(v)
if s.hasMismatch("sha3") { if s.hasMismatch("sha3") {
c.mismatch = true c.mismatch = true
@ -410,20 +504,24 @@ func (s *Search) unmarkcurrent() {
return return
} }
// storeMatch writes a matched file to the check's parent Search type results
// processor, where it can be processed and stored with the check.
func (c *check) storeMatch(file string) { func (c *check) storeMatch(file string) {
store := true c.matchChan <- checkMatchNotify{
debugprint("storing match %v\n", file) file: file,
for _, storedFile := range c.matchedfiles { checkid: c.checkid,
// only store files once per check }
if file == storedFile { _ = <-c.waitNotify
store = false }
// hasMatch returns true if a check has matched against a file
func (c *check) hasMatch(file string) bool {
for _, x := range c.matchedfiles {
if x == file {
return true
} }
} }
if store { return false
c.matched++
c.matchedfiles = append(c.matchedfiles, file)
}
return
} }
func (r *run) ValidateParameters() (err error) { func (r *run) ValidateParameters() (err error) {
@ -688,6 +786,12 @@ func (r *run) Run(in modules.ModuleReader) (resStr string) {
} }
for label, search := range r.Parameters.Searches { for label, search := range r.Parameters.Searches {
// Allocate our match input channel; checks will write the filename and their respective
// check ID to this channel when a match is identified
search.matchChan = make(chan checkMatchNotify, 0)
// Start the incoming match processor for the search entry
go search.processMatch()
// Create all the checks for the search
debugprint("making checks for label %s\n", label) debugprint("making checks for label %s\n", label)
err := search.makeChecks() err := search.makeChecks()
if err != nil { if err != nil {
@ -842,6 +946,15 @@ func (r *run) pathWalk(path string, roots []string) (traversed []string, err err
} }
// loop over the content of the directory // loop over the content of the directory
for _, dirEntry := range dirContent { for _, dirEntry := range dirContent {
// While we are iterating over the directory content, consult Totalhits to
// make sure we do not exceed our match limit. If we hit the match limit, we
// deactivate the search.
for _, search := range r.Parameters.Searches {
if stats.Totalhits >= search.Options.MatchLimit {
search.deactivate()
activesearches--
}
}
entryAbsPath := path entryAbsPath := path
// append path separator if missing // append path separator if missing
if entryAbsPath[len(entryAbsPath)-1] != os.PathSeparator { if entryAbsPath[len(entryAbsPath)-1] != os.PathSeparator {
@ -1128,10 +1241,11 @@ func (c check) wantThis(match bool) bool {
func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) {
matchedall = true matchedall = true
if (s.checkmask & checkName) != 0 { if (s.checkmask & checkName) != 0 {
for i, c := range s.checks { for i := range s.checks {
if (c.code & checkName) == 0 { if (s.checks[i].code & checkName) == 0 {
continue continue
} }
c := &s.checks[i]
match := c.regex.MatchString(path.Base(fi.Name())) match := c.regex.MatchString(path.Base(fi.Name()))
if match { if match {
debugprint("file name '%s' matches regex '%s'\n", fi.Name(), c.value) debugprint("file name '%s' matches regex '%s'\n", fi.Name(), c.value)
@ -1141,7 +1255,6 @@ func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) {
} else { } else {
matchedall = false matchedall = false
} }
s.checks[i] = c
} }
} }
return return
@ -1150,10 +1263,11 @@ func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) {
func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) {
matchedall = true matchedall = true
if (s.checkmask & checkMode) != 0 { if (s.checkmask & checkMode) != 0 {
for i, c := range s.checks { for i := range s.checks {
if (c.code & checkMode) == 0 { if (s.checks[i].code & checkMode) == 0 {
continue continue
} }
c := &s.checks[i]
match := c.regex.MatchString(fi.Mode().String()) match := c.regex.MatchString(fi.Mode().String())
if match { if match {
debugprint("file '%s' mode '%s' matches regex '%s'\n", debugprint("file '%s' mode '%s' matches regex '%s'\n",
@ -1164,7 +1278,6 @@ func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) {
} else { } else {
matchedall = false matchedall = false
} }
s.checks[i] = c
} }
} }
return return
@ -1173,10 +1286,11 @@ func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) {
func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) {
matchedall = true matchedall = true
if (s.checkmask & checkSize) != 0 { if (s.checkmask & checkSize) != 0 {
for i, c := range s.checks { for i := range s.checks {
if (c.code & checkSize) == 0 { if (s.checks[i].code & checkSize) == 0 {
continue continue
} }
c := &s.checks[i]
match := false match := false
if fi.Size() >= int64(c.minsize) && fi.Size() <= int64(c.maxsize) { if fi.Size() >= int64(c.minsize) && fi.Size() <= int64(c.maxsize) {
match = true match = true
@ -1188,7 +1302,6 @@ func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) {
} else { } else {
matchedall = false matchedall = false
} }
s.checks[i] = c
} }
} }
return return
@ -1197,10 +1310,11 @@ func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) {
func (s Search) checkMtime(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkMtime(file string, fi os.FileInfo) (matchedall bool) {
matchedall = true matchedall = true
if (s.checkmask & checkMtime) != 0 { if (s.checkmask & checkMtime) != 0 {
for i, c := range s.checks { for i := range s.checks {
if (c.code & checkMtime) == 0 { if (s.checks[i].code & checkMtime) == 0 {
continue continue
} }
c := &s.checks[i]
match := false match := false
if fi.ModTime().After(c.minmtime) && fi.ModTime().Before(c.maxmtime) { if fi.ModTime().After(c.minmtime) && fi.ModTime().Before(c.maxmtime) {
match = true match = true
@ -1213,7 +1327,6 @@ func (s Search) checkMtime(file string, fi os.FileInfo) (matchedall bool) {
} else { } else {
matchedall = false matchedall = false
} }
s.checks[i] = c
} }
} }
return return
@ -1239,8 +1352,8 @@ func (r *run) checkContent(f fileEntry) {
if !search.isactive { if !search.isactive {
continue continue
} }
for i, c := range search.checks { for i := range search.checks {
if c.code&checkContent == 0 { if search.checks[i].code&checkContent == 0 {
continue continue
} }
// init the map // init the map
@ -1273,11 +1386,12 @@ func (r *run) checkContent(f fileEntry) {
// the macroal flag is set to false // the macroal flag is set to false
macroalstatus[label] = true macroalstatus[label] = true
// apply the content checks regexes to the current scan // apply the content checks regexes to the current scan
for i, c := range search.checks { for i := range search.checks {
// skip this check if it's not a content check or if it has already matched // skip this check if it's not a content check or if it has already matched
if c.code&checkContent == 0 || (checksstatus[label][i] && !search.Options.Macroal) { if search.checks[i].code&checkContent == 0 || (checksstatus[label][i] && !search.Options.Macroal) {
continue continue
} }
c := &search.checks[i]
hasactivechecks = true hasactivechecks = true
/* Matching Logic /* Matching Logic
@ -1351,7 +1465,6 @@ func (r *run) checkContent(f fileEntry) {
} }
} }
} }
search.checks[i] = c
} }
if search.Options.Macroal && !macroalstatus[label] { if search.Options.Macroal && !macroalstatus[label] {
// we have failed to match all content regexes on this line, // we have failed to match all content regexes on this line,
@ -1376,23 +1489,24 @@ func (r *run) checkContent(f fileEntry) {
// we match all content regexes on all lines of the file, // we match all content regexes on all lines of the file,
// as requested via the Macroal flag // as requested via the Macroal flag
// now store the filename in all checks // now store the filename in all checks
for i, c := range search.checks { for i := range search.checks {
if c.code&checkContent == 0 { if search.checks[i].code&checkContent == 0 {
continue continue
} }
c := &search.checks[i]
if c.wantThis(checksstatus[label][i]) { if c.wantThis(checksstatus[label][i]) {
c.storeMatch(f.filename) c.storeMatch(f.filename)
} }
search.checks[i] = c
} }
// we're done with this search // we're done with this search
continue continue
} }
// 2. If any check with inversematch=true failed to match, record that as a success // 2. If any check with inversematch=true failed to match, record that as a success
for i, c := range search.checks { for i := range search.checks {
if c.code&checkContent == 0 { if search.checks[i].code&checkContent == 0 {
continue continue
} }
c := &search.checks[i]
if !checksstatus[label][i] && c.inversematch { if !checksstatus[label][i] && c.inversematch {
debugprint("in search '%s' on file '%s', check '%s' has not matched and is set to inversematch, record this as a positive result\n", debugprint("in search '%s' on file '%s', check '%s' has not matched and is set to inversematch, record this as a positive result\n",
label, f.filename, c.value) label, f.filename, c.value)
@ -1401,26 +1515,24 @@ func (r *run) checkContent(f fileEntry) {
} }
// adjust check status to true because the check did in fact match as an inverse // adjust check status to true because the check did in fact match as an inverse
checksstatus[label][i] = true checksstatus[label][i] = true
search.checks[i] = c
} }
} }
// 3. deactivate searches that have matchall=true, but did not match against // 3. deactivate searches that have matchall=true, but did not match against
if search.isactive && (search.checkmask&checkContent != 0) && search.Options.MatchAll { if search.isactive && (search.checkmask&checkContent != 0) && search.Options.MatchAll {
for i, c := range search.checks { for i := range search.checks {
if c.code&checkContent == 0 { if search.checks[i].code&checkContent == 0 {
continue continue
} }
c := &search.checks[i]
// check hasn't matched, or has matched and we didn't want it to, deactivate the search // check hasn't matched, or has matched and we didn't want it to, deactivate the search
if !checksstatus[label][i] || (checksstatus[label][i] && c.inversematch) { if !checksstatus[label][i] || (checksstatus[label][i] && c.inversematch) {
if c.wantThis(checksstatus[label][i]) { if c.wantThis(checksstatus[label][i]) {
c.storeMatch(f.filename) c.storeMatch(f.filename)
search.checks[i] = c
} }
search.deactivate() search.deactivate()
} }
} }
} }
r.Parameters.Searches[label] = search
} }
return return
} }
@ -1451,10 +1563,11 @@ func (r *run) checkHash(f fileEntry, hashtype checkType) {
} }
for label, search := range r.Parameters.Searches { for label, search := range r.Parameters.Searches {
if search.isactive && (search.checkmask&hashtype) != 0 { if search.isactive && (search.checkmask&hashtype) != 0 {
for i, c := range search.checks { for i := range search.checks {
if c.code&hashtype == 0 { if search.checks[i].code&hashtype == 0 {
continue continue
} }
c := &search.checks[i]
match := false match := false
if c.value == hash { if c.value == hash {
match = true match = true
@ -1465,7 +1578,6 @@ func (r *run) checkHash(f fileEntry, hashtype checkType) {
} else if search.Options.MatchAll { } else if search.Options.MatchAll {
search.deactivate() search.deactivate()
} }
search.checks[i] = c
} }
} }
r.Parameters.Searches[label] = search r.Parameters.Searches[label] = search
@ -1564,52 +1676,15 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) {
// first pass on the results: if matchall is set, verify that all // first pass on the results: if matchall is set, verify that all
// the checks matched on all the files // the checks matched on all the files
if search.Options.MatchAll { if search.Options.MatchAll {
// collect all the files that were found across all checks of this search // The results processor which is part of the search has already prepared a list
var allFiles, matchedFiles []string // of files that match all searches, so we leverage that to build our results.
for _, c := range search.checks { if len(search.filesMatchingAll) == 0 {
// populate allFiles as a slice of unique files search.filesMatchingAll = append(search.filesMatchingAll, "")
for _, matchedFile := range c.matchedfiles {
store := true
for _, afile := range allFiles {
if afile == matchedFile {
store = false
} }
} for _, matchedFile := range search.filesMatchingAll {
if store {
allFiles = append(allFiles, matchedFile)
}
}
}
// verify that each file has matched on all the checks
for _, foundFile := range allFiles {
debugprint("checking if file %s matched all checks\n", foundFile)
matchedallchecks := true
for _, c := range search.checks {
found := false
for _, matchedFile := range c.matchedfiles {
if foundFile == matchedFile {
found = true
}
}
if !found {
debugprint("check %d did not match\n", c.code)
matchedallchecks = false
break
}
}
if matchedallchecks {
matchedFiles = append(matchedFiles, foundFile)
}
}
if len(matchedFiles) == 0 {
matchedFiles = append(matchedFiles, "")
}
// now that we have a clean list of files that matched all checks, store it
for _, matchedFile := range matchedFiles {
var mf MatchedFile var mf MatchedFile
mf.File = matchedFile mf.File = matchedFile
if mf.File != "" { if mf.File != "" {
stats.Totalhits++
fi, err := os.Stat(mf.File) fi, err := os.Stat(mf.File)
if err != nil { if err != nil {
panic(err) panic(err)
@ -1625,7 +1700,7 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) {
} }
} }
} }
mf.Search = search mf.Search = *search
mf.Search.Options.MatchLimit = 0 mf.Search.Options.MatchLimit = 0
// store the value of maxerrors if greater than the one // store the value of maxerrors if greater than the one
// we already have, we'll need it further down to return // we already have, we'll need it further down to return
@ -1652,7 +1727,6 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) {
var mf MatchedFile var mf MatchedFile
mf.File = file mf.File = file
if mf.File != "" { if mf.File != "" {
stats.Totalhits++
fi, err := os.Stat(file) fi, err := os.Stat(file)
if err != nil { if err != nil {
panic(err) panic(err)

Просмотреть файл

@ -392,7 +392,7 @@ func (r *run) ParamsCreator() (interface{}, error) {
} }
fmt.Printf("Stored %s %s\nEnter a new parameter, or 'done' to exit.\n", checkType, checkValue) fmt.Printf("Stored %s %s\nEnter a new parameter, or 'done' to exit.\n", checkType, checkValue)
} }
p.Searches[label] = search p.Searches[label] = &search
fmt.Println("Stored search", label) fmt.Println("Stored search", label)
} }
exit: exit:
@ -467,7 +467,7 @@ func (r *run) ParamsParser(args []string) (interface{}, error) {
s.Options.Debug = "print" s.Options.Debug = "print"
} }
p := newParameters() p := newParameters()
p.Searches["s1"] = s p.Searches["s1"] = &s
r.Parameters = *p r.Parameters = *p
return r.Parameters, r.ValidateParameters() return r.Parameters, r.ValidateParameters()
} }