diff --git a/modules/file/file.go b/modules/file/file.go index 7c1dfd60..2b16a19c 100644 --- a/modules/file/file.go +++ b/modules/file/file.go @@ -64,34 +64,36 @@ type run struct { // parameters describes the parameters the file module uses as input upon // invocation type parameters struct { - Searches map[string]Search `json:"searches,omitempty"` + Searches map[string]*Search `json:"searches,omitempty"` } func newParameters() *parameters { var p parameters - p.Searches = make(map[string]Search) + p.Searches = make(map[string]*Search) return &p } // Search contains the fields used to execute an individual search type Search struct { - Description string `json:"description,omitempty"` - Paths []string `json:"paths"` - Contents []string `json:"contents,omitempty"` - Names []string `json:"names,omitempty"` - Sizes []string `json:"sizes,omitempty"` - Modes []string `json:"modes,omitempty"` - Mtimes []string `json:"mtimes,omitempty"` - MD5 []string `json:"md5,omitempty"` - SHA1 []string `json:"sha1,omitempty"` - SHA2 []string `json:"sha2,omitempty"` - SHA3 []string `json:"sha3,omitempty"` - Options options `json:"options,omitempty"` - checks []check - checkmask checkType - isactive bool - iscurrent bool - currentdepth uint64 + Description string `json:"description,omitempty"` + Paths []string `json:"paths"` + Contents []string `json:"contents,omitempty"` + Names []string `json:"names,omitempty"` + Sizes []string `json:"sizes,omitempty"` + Modes []string `json:"modes,omitempty"` + Mtimes []string `json:"mtimes,omitempty"` + MD5 []string `json:"md5,omitempty"` + SHA1 []string `json:"sha1,omitempty"` + SHA2 []string `json:"sha2,omitempty"` + SHA3 []string `json:"sha3,omitempty"` + Options options `json:"options,omitempty"` + checks []check + checkmask checkType + isactive bool + iscurrent bool + currentdepth uint64 + matchChan chan checkMatchNotify // Channel to notify search processor of a check hit + filesMatchingAll []string // If Options.MatchAll, stores files matching all checks } type options struct { @@ -128,7 +130,9 @@ const ( checkSHA3_512 ) +// check represents an individual check that is part of a search. type check struct { + checkid int // Internal check ID, set by the search parent code checkType matched uint64 matchedfiles []string @@ -137,17 +141,107 @@ type check struct { minsize, maxsize uint64 minmtime, maxmtime time.Time inversematch, mismatch bool + matchChan chan checkMatchNotify + waitNotify chan bool +} + +// checkMatchNotify is sent from the check to the parent Search via the checks matchChan to +// notify the Search type's search processor that a match has been found for an individual check. +type checkMatchNotify struct { + checkid int + file string } // pretty much infinity when it comes to file searches const unlimited float64 = 1125899906842624 +// processMatch processes incoming matches from individual checks which are part of the search. It +// also manages the total hit statistics. The match processor does some preprocessing, such as identifying +// files that match all checks for a search if MatchAll is set, to make building the results simpler. +// +// Although this function runs in a goroutine, execution is serialized via a wait channel this function +// will write to when its ready for the next result. +func (s *Search) processMatch() { + for { + var c *check + match := <-s.matchChan + + c = nil + for i := range s.checks { + if s.checks[i].checkid == match.checkid { + c = &s.checks[i] + } + } + if c == nil { + // This is fatal, and means we received a result for a check which we + // do not know about + panic("processMatch received check result for invalid check id") + } + // See if we need to add the file for this check, if it already exists we are done + found := false + for _, x := range c.matchedfiles { + if x == match.file { + found = true + break + } + } + if found { + c.waitNotify <- true + continue + } + c.matchedfiles = append(c.matchedfiles, match.file) + c.matched++ + + // If this search has MatchAll set, see if this file now matches all checks in + // the search. If so, add it to the allMatched list. + if s.Options.MatchAll && !s.allChecksMatched(match.file) { + allmatch := true + for _, c := range s.checks { + if !c.hasMatch(match.file) { + allmatch = false + break + } + } + if allmatch { + s.filesMatchingAll = append(s.filesMatchingAll, match.file) + // Since this should be considered a match now, increment the hits + // counter + stats.Totalhits++ + } + } else { + // MatchAll isn't set, so we just count every hit here as a match + stats.Totalhits++ + } + + c.waitNotify <- true + } +} + +// allChecksMatched returns true if the file is in the filesMatchingAll list for a search +func (s *Search) allChecksMatched(file string) bool { + for _, f := range s.filesMatchingAll { + if f == file { + return true + } + } + return false +} + func (s *Search) makeChecks() (err error) { + var nextCheckID int defer func() { if e := recover(); e != nil { err = fmt.Errorf("makeChecks() -> %v", e) } }() + nextCID := func() check { + ret := check{} + nextCheckID++ + ret.checkid = nextCheckID + ret.matchChan = s.matchChan + ret.waitNotify = make(chan bool, 0) + return ret + } if s.Options.Debug == "print" { debug = true } @@ -161,7 +255,7 @@ func (s *Search) makeChecks() (err error) { s.Options.MatchLimit = unlimited } for _, v := range s.Contents { - var c check + c := nextCID() c.code = checkContent c.value = v if len(v) > 1 && v[:1] == "!" { @@ -176,7 +270,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.Names { - var c check + c := nextCID() c.code = checkName c.value = v if len(v) > 1 && v[:1] == "!" { @@ -191,7 +285,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.Sizes { - var c check + c := nextCID() c.code = checkSize c.value = v c.minsize, c.maxsize, err = parseSize(v) @@ -205,7 +299,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.Modes { - var c check + c := nextCID() c.code = checkMode c.value = v if s.hasMismatch("mode") { @@ -216,7 +310,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.Mtimes { - var c check + c := nextCID() c.code = checkMtime c.value = v if s.hasMismatch("mtime") { @@ -230,7 +324,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.MD5 { - var c check + c := nextCID() c.code = checkMD5 c.value = strings.ToUpper(v) if s.hasMismatch("md5") { @@ -240,7 +334,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.SHA1 { - var c check + c := nextCID() c.code = checkSHA1 c.value = strings.ToUpper(v) if s.hasMismatch("sha1") { @@ -250,7 +344,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.SHA2 { - var c check + c := nextCID() c.value = strings.ToUpper(v) if s.hasMismatch("sha2") { c.mismatch = true @@ -267,7 +361,7 @@ func (s *Search) makeChecks() (err error) { s.checkmask |= c.code } for _, v := range s.SHA3 { - var c check + c := nextCID() c.value = strings.ToUpper(v) if s.hasMismatch("sha3") { c.mismatch = true @@ -410,20 +504,24 @@ func (s *Search) unmarkcurrent() { return } +// storeMatch writes a matched file to the check's parent Search type results +// processor, where it can be processed and stored with the check. func (c *check) storeMatch(file string) { - store := true - debugprint("storing match %v\n", file) - for _, storedFile := range c.matchedfiles { - // only store files once per check - if file == storedFile { - store = false + c.matchChan <- checkMatchNotify{ + file: file, + checkid: c.checkid, + } + _ = <-c.waitNotify +} + +// hasMatch returns true if a check has matched against a file +func (c *check) hasMatch(file string) bool { + for _, x := range c.matchedfiles { + if x == file { + return true } } - if store { - c.matched++ - c.matchedfiles = append(c.matchedfiles, file) - } - return + return false } func (r *run) ValidateParameters() (err error) { @@ -688,6 +786,12 @@ func (r *run) Run(in modules.ModuleReader) (resStr string) { } for label, search := range r.Parameters.Searches { + // Allocate our match input channel; checks will write the filename and their respective + // check ID to this channel when a match is identified + search.matchChan = make(chan checkMatchNotify, 0) + // Start the incoming match processor for the search entry + go search.processMatch() + // Create all the checks for the search debugprint("making checks for label %s\n", label) err := search.makeChecks() if err != nil { @@ -842,6 +946,15 @@ func (r *run) pathWalk(path string, roots []string) (traversed []string, err err } // loop over the content of the directory for _, dirEntry := range dirContent { + // While we are iterating over the directory content, consult Totalhits to + // make sure we do not exceed our match limit. If we hit the match limit, we + // deactivate the search. + for _, search := range r.Parameters.Searches { + if stats.Totalhits >= search.Options.MatchLimit { + search.deactivate() + activesearches-- + } + } entryAbsPath := path // append path separator if missing if entryAbsPath[len(entryAbsPath)-1] != os.PathSeparator { @@ -1128,10 +1241,11 @@ func (c check) wantThis(match bool) bool { func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) { matchedall = true if (s.checkmask & checkName) != 0 { - for i, c := range s.checks { - if (c.code & checkName) == 0 { + for i := range s.checks { + if (s.checks[i].code & checkName) == 0 { continue } + c := &s.checks[i] match := c.regex.MatchString(path.Base(fi.Name())) if match { debugprint("file name '%s' matches regex '%s'\n", fi.Name(), c.value) @@ -1141,7 +1255,6 @@ func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) { } else { matchedall = false } - s.checks[i] = c } } return @@ -1150,10 +1263,11 @@ func (s Search) checkName(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) { matchedall = true if (s.checkmask & checkMode) != 0 { - for i, c := range s.checks { - if (c.code & checkMode) == 0 { + for i := range s.checks { + if (s.checks[i].code & checkMode) == 0 { continue } + c := &s.checks[i] match := c.regex.MatchString(fi.Mode().String()) if match { debugprint("file '%s' mode '%s' matches regex '%s'\n", @@ -1164,7 +1278,6 @@ func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) { } else { matchedall = false } - s.checks[i] = c } } return @@ -1173,10 +1286,11 @@ func (s Search) checkMode(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) { matchedall = true if (s.checkmask & checkSize) != 0 { - for i, c := range s.checks { - if (c.code & checkSize) == 0 { + for i := range s.checks { + if (s.checks[i].code & checkSize) == 0 { continue } + c := &s.checks[i] match := false if fi.Size() >= int64(c.minsize) && fi.Size() <= int64(c.maxsize) { match = true @@ -1188,7 +1302,6 @@ func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) { } else { matchedall = false } - s.checks[i] = c } } return @@ -1197,10 +1310,11 @@ func (s Search) checkSize(file string, fi os.FileInfo) (matchedall bool) { func (s Search) checkMtime(file string, fi os.FileInfo) (matchedall bool) { matchedall = true if (s.checkmask & checkMtime) != 0 { - for i, c := range s.checks { - if (c.code & checkMtime) == 0 { + for i := range s.checks { + if (s.checks[i].code & checkMtime) == 0 { continue } + c := &s.checks[i] match := false if fi.ModTime().After(c.minmtime) && fi.ModTime().Before(c.maxmtime) { match = true @@ -1213,7 +1327,6 @@ func (s Search) checkMtime(file string, fi os.FileInfo) (matchedall bool) { } else { matchedall = false } - s.checks[i] = c } } return @@ -1239,8 +1352,8 @@ func (r *run) checkContent(f fileEntry) { if !search.isactive { continue } - for i, c := range search.checks { - if c.code&checkContent == 0 { + for i := range search.checks { + if search.checks[i].code&checkContent == 0 { continue } // init the map @@ -1273,11 +1386,12 @@ func (r *run) checkContent(f fileEntry) { // the macroal flag is set to false macroalstatus[label] = true // apply the content checks regexes to the current scan - for i, c := range search.checks { + for i := range search.checks { // skip this check if it's not a content check or if it has already matched - if c.code&checkContent == 0 || (checksstatus[label][i] && !search.Options.Macroal) { + if search.checks[i].code&checkContent == 0 || (checksstatus[label][i] && !search.Options.Macroal) { continue } + c := &search.checks[i] hasactivechecks = true /* Matching Logic @@ -1351,7 +1465,6 @@ func (r *run) checkContent(f fileEntry) { } } } - search.checks[i] = c } if search.Options.Macroal && !macroalstatus[label] { // we have failed to match all content regexes on this line, @@ -1376,23 +1489,24 @@ func (r *run) checkContent(f fileEntry) { // we match all content regexes on all lines of the file, // as requested via the Macroal flag // now store the filename in all checks - for i, c := range search.checks { - if c.code&checkContent == 0 { + for i := range search.checks { + if search.checks[i].code&checkContent == 0 { continue } + c := &search.checks[i] if c.wantThis(checksstatus[label][i]) { c.storeMatch(f.filename) } - search.checks[i] = c } // we're done with this search continue } // 2. If any check with inversematch=true failed to match, record that as a success - for i, c := range search.checks { - if c.code&checkContent == 0 { + for i := range search.checks { + if search.checks[i].code&checkContent == 0 { continue } + c := &search.checks[i] if !checksstatus[label][i] && c.inversematch { debugprint("in search '%s' on file '%s', check '%s' has not matched and is set to inversematch, record this as a positive result\n", label, f.filename, c.value) @@ -1401,26 +1515,24 @@ func (r *run) checkContent(f fileEntry) { } // adjust check status to true because the check did in fact match as an inverse checksstatus[label][i] = true - search.checks[i] = c } } // 3. deactivate searches that have matchall=true, but did not match against if search.isactive && (search.checkmask&checkContent != 0) && search.Options.MatchAll { - for i, c := range search.checks { - if c.code&checkContent == 0 { + for i := range search.checks { + if search.checks[i].code&checkContent == 0 { continue } + c := &search.checks[i] // check hasn't matched, or has matched and we didn't want it to, deactivate the search if !checksstatus[label][i] || (checksstatus[label][i] && c.inversematch) { if c.wantThis(checksstatus[label][i]) { c.storeMatch(f.filename) - search.checks[i] = c } search.deactivate() } } } - r.Parameters.Searches[label] = search } return } @@ -1451,10 +1563,11 @@ func (r *run) checkHash(f fileEntry, hashtype checkType) { } for label, search := range r.Parameters.Searches { if search.isactive && (search.checkmask&hashtype) != 0 { - for i, c := range search.checks { - if c.code&hashtype == 0 { + for i := range search.checks { + if search.checks[i].code&hashtype == 0 { continue } + c := &search.checks[i] match := false if c.value == hash { match = true @@ -1465,7 +1578,6 @@ func (r *run) checkHash(f fileEntry, hashtype checkType) { } else if search.Options.MatchAll { search.deactivate() } - search.checks[i] = c } } r.Parameters.Searches[label] = search @@ -1564,52 +1676,15 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) { // first pass on the results: if matchall is set, verify that all // the checks matched on all the files if search.Options.MatchAll { - // collect all the files that were found across all checks of this search - var allFiles, matchedFiles []string - for _, c := range search.checks { - // populate allFiles as a slice of unique files - for _, matchedFile := range c.matchedfiles { - store := true - for _, afile := range allFiles { - if afile == matchedFile { - store = false - } - } - if store { - allFiles = append(allFiles, matchedFile) - } - } + // The results processor which is part of the search has already prepared a list + // of files that match all searches, so we leverage that to build our results. + if len(search.filesMatchingAll) == 0 { + search.filesMatchingAll = append(search.filesMatchingAll, "") } - // verify that each file has matched on all the checks - for _, foundFile := range allFiles { - debugprint("checking if file %s matched all checks\n", foundFile) - matchedallchecks := true - for _, c := range search.checks { - found := false - for _, matchedFile := range c.matchedfiles { - if foundFile == matchedFile { - found = true - } - } - if !found { - debugprint("check %d did not match\n", c.code) - matchedallchecks = false - break - } - } - if matchedallchecks { - matchedFiles = append(matchedFiles, foundFile) - } - } - if len(matchedFiles) == 0 { - matchedFiles = append(matchedFiles, "") - } - // now that we have a clean list of files that matched all checks, store it - for _, matchedFile := range matchedFiles { + for _, matchedFile := range search.filesMatchingAll { var mf MatchedFile mf.File = matchedFile if mf.File != "" { - stats.Totalhits++ fi, err := os.Stat(mf.File) if err != nil { panic(err) @@ -1625,7 +1700,7 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) { } } } - mf.Search = search + mf.Search = *search mf.Search.Options.MatchLimit = 0 // store the value of maxerrors if greater than the one // we already have, we'll need it further down to return @@ -1652,7 +1727,6 @@ func (r *run) buildResults(t0 time.Time) (resStr string, err error) { var mf MatchedFile mf.File = file if mf.File != "" { - stats.Totalhits++ fi, err := os.Stat(file) if err != nil { panic(err) diff --git a/modules/file/paramscreator.go b/modules/file/paramscreator.go index 6b44d6c2..0b75948b 100644 --- a/modules/file/paramscreator.go +++ b/modules/file/paramscreator.go @@ -392,7 +392,7 @@ func (r *run) ParamsCreator() (interface{}, error) { } fmt.Printf("Stored %s %s\nEnter a new parameter, or 'done' to exit.\n", checkType, checkValue) } - p.Searches[label] = search + p.Searches[label] = &search fmt.Println("Stored search", label) } exit: @@ -467,7 +467,7 @@ func (r *run) ParamsParser(args []string) (interface{}, error) { s.Options.Debug = "print" } p := newParameters() - p.Searches["s1"] = s + p.Searches["s1"] = &s r.Parameters = *p return r.Parameters, r.ValidateParameters() }