2017-05-18 20:23:05 +03:00
|
|
|
//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
|
|
|
|
//
|
2019-04-18 11:28:29 +03:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-05-18 20:23:05 +03:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Merging Corpora.
|
|
|
|
//
|
|
|
|
// The task:
|
|
|
|
// Take the existing corpus (possibly empty) and merge new inputs into
|
|
|
|
// it so that only inputs with new coverage ('features') are added.
|
|
|
|
// The process should tolerate the crashes, OOMs, leaks, etc.
|
|
|
|
//
|
|
|
|
// Algorithm:
|
|
|
|
// The outter process collects the set of files and writes their names
|
|
|
|
// into a temporary "control" file, then repeatedly launches the inner
|
|
|
|
// process until all inputs are processed.
|
|
|
|
// The outer process does not actually execute the target code.
|
|
|
|
//
|
|
|
|
// The inner process reads the control file and sees a) list of all the inputs
|
|
|
|
// and b) the last processed input. Then it starts processing the inputs one
|
|
|
|
// by one. Before processing every input it writes one line to control file:
|
|
|
|
// STARTED INPUT_ID INPUT_SIZE
|
|
|
|
// After processing an input it write another line:
|
|
|
|
// DONE INPUT_ID Feature1 Feature2 Feature3 ...
|
|
|
|
// If a crash happens while processing an input the last line in the control
|
|
|
|
// file will be "STARTED INPUT_ID" and so the next process will know
|
|
|
|
// where to resume.
|
|
|
|
//
|
|
|
|
// Once all inputs are processed by the innner process(es) the outer process
|
|
|
|
// reads the control files and does the merge based entirely on the contents
|
|
|
|
// of control file.
|
|
|
|
// It uses a single pass greedy algorithm choosing first the smallest inputs
|
|
|
|
// within the same size the inputs that have more new features.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef LLVM_FUZZER_MERGE_H
|
|
|
|
#define LLVM_FUZZER_MERGE_H
|
|
|
|
|
|
|
|
#include "FuzzerDefs.h"
|
|
|
|
|
|
|
|
#include <istream>
|
|
|
|
#include <ostream>
|
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace fuzzer {
|
|
|
|
|
|
|
|
struct MergeFileInfo {
|
|
|
|
std::string Name;
|
|
|
|
size_t Size = 0;
|
2019-04-18 11:28:29 +03:00
|
|
|
Vector<uint32_t> Features, Cov;
|
2017-05-18 20:23:05 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Merger {
|
2018-03-29 21:18:36 +03:00
|
|
|
Vector<MergeFileInfo> Files;
|
2017-05-18 20:23:05 +03:00
|
|
|
size_t NumFilesInFirstCorpus = 0;
|
|
|
|
size_t FirstNotProcessedFile = 0;
|
|
|
|
std::string LastFailure;
|
|
|
|
|
|
|
|
bool Parse(std::istream &IS, bool ParseCoverage);
|
|
|
|
bool Parse(const std::string &Str, bool ParseCoverage);
|
|
|
|
void ParseOrExit(std::istream &IS, bool ParseCoverage);
|
2019-04-18 11:28:29 +03:00
|
|
|
size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
|
|
|
|
const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
|
2018-03-29 21:18:36 +03:00
|
|
|
Vector<std::string> *NewFiles);
|
2017-05-18 20:23:05 +03:00
|
|
|
size_t ApproximateMemoryConsumption() const;
|
2018-03-29 21:18:36 +03:00
|
|
|
Set<uint32_t> AllFeatures() const;
|
2017-05-18 20:23:05 +03:00
|
|
|
};
|
|
|
|
|
2019-04-18 11:28:29 +03:00
|
|
|
void CrashResistantMerge(const Vector<std::string> &Args,
|
|
|
|
const Vector<SizedFile> &OldCorpus,
|
|
|
|
const Vector<SizedFile> &NewCorpus,
|
|
|
|
Vector<std::string> *NewFiles,
|
|
|
|
const Set<uint32_t> &InitialFeatures,
|
|
|
|
Set<uint32_t> *NewFeatures,
|
|
|
|
const Set<uint32_t> &InitialCov,
|
|
|
|
Set<uint32_t> *NewCov,
|
|
|
|
const std::string &CFPath,
|
|
|
|
bool Verbose);
|
|
|
|
|
2017-05-18 20:23:05 +03:00
|
|
|
} // namespace fuzzer
|
|
|
|
|
|
|
|
#endif // LLVM_FUZZER_MERGE_H
|