diff --git a/.gitignore b/.gitignore index e37825f..0fe2c40 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,2 @@ *.pyc -dashboard.zip -histogram_tools.py -histogram_specs.py -specs.py -histogram_specs.json -validation/ -html/data -out.txt -Histograms.json +build/ diff --git a/cmake/externals.cmake b/cmake/externals.cmake new file mode 100644 index 0000000..2585c93 --- /dev/null +++ b/cmake/externals.cmake @@ -0,0 +1,18 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +include(ExternalProject) +set_property(DIRECTORY PROPERTY EP_BASE "${CMAKE_BINARY_DIR}/externals") + +externalproject_add( + rapidjson + SVN_REPOSITORY http://rapidjson.googlecode.com/svn/trunk/ + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" +) + +set(RAPIDJSON_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/externals/Source/rapidjson/include") +include_directories(${RAPIDJSON_INCLUDE_DIRS}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem ${RAPIDJSON_INCLUDE_DIRS}") diff --git a/dashboard/auxiliary.py b/dashboard/auxiliary.py index 65b427a..e5b0d64 100644 --- a/dashboard/auxiliary.py +++ b/dashboard/auxiliary.py @@ -27,8 +27,8 @@ class HistogramAggregator: aggregator1.merge(**aggregator2.dump()) """ def __init__(self, values = [], buildId = "", revision = None): - replace_nan_inf(values) - self.values = values + self.values = list(values) + replace_nan_inf(self.values) self.buildId = buildId self.revision = revision @@ -37,13 +37,13 @@ class HistogramAggregator: if len(self.values) != len(values): # Choose the histogram with highest buildId if self.buildId < buildId: - self.values = values + self.values = list(values) self.buildId = buildId self.revision = revision else: if self.buildId < buildId: - self.values = values self.buildId = buildId + self.revision = revision for i in xrange(0, len(values) - 6): self.values[i] += values[i] # Entries [-6:-1] may have -1 indicating missing entry @@ -76,4 +76,4 @@ def replace_nan_inf(values): elif math.isnan(val): # this isn't good... but we can't handle all possible corner cases # NaN shouldn't be possible... besides it's not known to happen - values[i] = null + values[i] = None diff --git a/src/Aggregator.cpp b/src/Aggregator.cpp new file mode 100644 index 0000000..ebe5c99 --- /dev/null +++ b/src/Aggregator.cpp @@ -0,0 +1,59 @@ +#include "cache/ResultSet.h" +#include "cache/MeasureFile.h" +#include "cache/Aggregate.h" + +#include + +#include + +/** Print usage */ +void usage() { + printf("Usage: aggregate -o [FILE]\n"); +} + +using namespace std; + +/** Main file */ +int main(int argc, char *argv[]) { + FILE* output = stdout; + + // Parse arguments + int c; + while ((c = getopt(argc, argv, "ho:")) != -1) { + switch (c) { + case 'o': + output = fopen(optarg, "w"); + break; + case 'h': + usage(); + exit(0); + break; + case '?': + usage(); + return 1; + break; + default: + abort(); + } + } + + // Aggregated result set + ResultSet set; + +/* + // Read input file names from stdin + cin.sync_with_stdio(false); + string filename; + while(getline(cin, filename)) { + set.aggregate(filename.data()); + } + set.output(output); + +*/ + + // Close output file + fclose(output); + + return 0; +} + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..a2d66bc --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,24 @@ +set(MERGERESULTS_SRC + cache/InternedString.cpp + cache/Aggregate.cpp + cache/MeasureFile.cpp + cache/ResultSet.cpp + MergeResult.cpp +) + +add_executable(mergeresults ${MERGERESULTS_SRC}) +add_dependencies(mergeresults rapidjson) +target_link_libraries(mergeresults) + +set(AGGREGATOR_SRC + cache/InternedString.cpp + cache/Aggregate.cpp + cache/MeasureFile.cpp + cache/ResultSet.cpp + Aggregator.cpp +) + +include_directories(${LIBLZMA_INCLUDE_DIRS}) +add_executable(aggregator ${AGGREGATOR_SRC}) +add_dependencies(aggregator rapidjson) +target_link_libraries(aggregator ${LIBLZMA_LIBRARIES}) diff --git a/src/CompressedFileReader.cpp b/src/CompressedFileReader.cpp new file mode 100644 index 0000000..cc7c72d --- /dev/null +++ b/src/CompressedFileReader.cpp @@ -0,0 +1,184 @@ +#include "CompressedFileReader.h" + +#include +#include +#include +#include + +#define INBUF_SIZE (4 * 4096) + +CompressedFileReader::CompressedFileReader(FILE* input) + : _input(input), _stream(nullptr), _inbuf(nullptr), _outbuf(nullptr), + _size(4096), _nextLine(nullptr) { + // Allocate buffers + _inbuf = new uint8_t[INBUF_SIZE]; + _outbuf = (uint8_t*)malloc(_size); + _nextLine = _outbuf; + + // Allocated an initialize stream + _stream = new lzma_stream; + *_stream = LZMA_STREAM_INIT; + + // Initialized decoding stream + lzma_ret ret = lzma_auto_decoder(_stream, UINT64_MAX, LZMA_CONCATENATED); + if (ret != LZMA_OK) { + const char* msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "unable to allocate memory"; + break; + case LZMA_OPTIONS_ERROR: + msg = "invalid options"; + break; + case LZMA_PROG_ERROR: + msg = "unknown error"; + break; + default: + assert(false); + msg = "invalid error code"; + break; + } + + // Print error message + fprintf( + stderr, + "CompressedFileReader: lzma_auto_decoder() failed, %s\n", + msg + ); + + // Delete stream + delete _stream; + _stream = nullptr; + + return; + } + + // Setup stream + _stream->next_in = NULL; + _stream->avail_in = 0; + _stream->next_out = _outbuf; + _stream->avail_out = _size; +} + +char* CompressedFileReader::nextLine() { + // If there is no decoder stream, then we're either done, or there was an + // error somewhere in the process + if (!_stream) { + return nullptr; + } + + lzma_action action = LZMA_RUN; + + // Bring us to a state where: _outbuf == _nextLine + // do this by moving memory between _nextLine and _stream->next_out + // to begin at _outbuf (update _nextLine = _outbuf, _stream->next_out =...) + assert(_stream->next_out - _nextLine >= 0); + memmove(_outbuf, _nextLine, _stream->next_out - _nextLine); + _stream->next_out -= _nextLine - _outbuf; + _stream->avail_out += _nextLine - _outbuf; + _nextLine = _outbuf; + assert(_stream->next_out >= _outbuf); + + fprintf(stderr, "Getting line:\n"); + + // Optional optimization: + // Search for line breaks in interval from _outbuf, to _stream->next_out, + // if found, update _nextLine, flip '\n' to '\0' and return... This should + // give slightly better locality + assert(_nextLine == _outbuf); + while (_nextLine < _stream->next_out) { + if (*_nextLine == '\n') { + *_nextLine = '\0'; + _nextLine += 1; + return (char*)_outbuf; + } + _nextLine++; + } + // Okay, so there is no _nextLine, yet... we'll keep moving it forward though + + // Read until we reach a line break + while (true) { + // If there no available input, read from file + if (_stream->avail_in == 0 && !feof(_input)) { + _stream->next_in = _inbuf; + _stream->avail_in = fread(_inbuf, 1, INBUF_SIZE, _input); + + // if there is an error + if (ferror(_input)) { + fprintf( + stderr, + "CompressedFileReader: fread() failed, %s\n", + strerror(errno) + ); + // Free streaming context, we can't continue after an error + lzma_end(_stream); + delete _stream; + _stream = nullptr; + + return nullptr; + } + + // if at end of file, finish decoding, flushing buffers + if (feof(_input)) { + action = LZMA_FINISH; + } + } + + // Decode LZMA stream + lzma_ret ret = lzma_code(_stream, action); + + // If there is no more output buffer space, or we're at the end of the + // stream, search for line breaks and return, if any is found + if (_stream->avail_out == 0 || ret == LZMA_STREAM_END) { + // Find next line and return it, if there is any... + while (_nextLine < _stream->next_out) { + if (*_nextLine == '\n') { + *_nextLine = '\0'; + _nextLine += 1; + return (char*)_outbuf; + } + _nextLine++; + } + + // Realloc _outbuf, note that we should have filled _outbuf first + assert(_stream->next_out == _nextLine); + fprintf(stderr, "Size: %i != %i \n", _size, _nextLine - _outbuf ); + fprintf(stderr, "avail_out: %i, '%i'\n", _stream->avail_out, *(_stream->next_out -1)); + assert((_nextLine - _outbuf) == _size); + + // Double size, to get a nice amortized complexity, no we don't bother + // scaling down the allocation + _outbuf = (uint8_t*)realloc(_outbuf, _size * 2); + + // Update _nextLine, _stream->next_out and _stream->avail_out + _nextLine = _outbuf + _size; + _stream->next_out = _outbuf + _size; + _stream->avail_out = _size; + + // Store the updated size + _size *= 2; + } + + // End stream and set _stream null if we're done + if (ret == LZMA_STREAM_END) { + lzma_end(_stream); + delete _stream; + _stream = nullptr; + } else if (ret != LZMA_OK) { + assert(false); + } + } +} + +CompressedFileReader::~CompressedFileReader() { + // Free stream if not at end + if (_stream) { + lzma_end(_stream); + delete _stream; + _stream = nullptr; + } + + // Release other buffers + free(_outbuf); + delete[] _inbuf; +} \ No newline at end of file diff --git a/src/CompressedFileReader.h b/src/CompressedFileReader.h new file mode 100644 index 0000000..ded5984 --- /dev/null +++ b/src/CompressedFileReader.h @@ -0,0 +1,42 @@ +#ifndef COMPRESSEDFILEREADER_H +#define COMPRESSEDFILEREADER_H + +#include +#include + +/** Read compressed files line by line */ +class CompressedFileReader { + /** Input file object */ + FILE* _input; + + /** lzma decoder stream */ + lzma_stream* _stream; + + /** Input buffer, buffering data from input file to decoder */ + uint8_t* _inbuf; + + /** Output buffer, returned when a line is read */ + uint8_t* _outbuf; + + /** Size of bytes allocated for _outbuf, doubled when more space is needed */ + size_t _size; + + /** Position where next line starts, _outbuf, if no line available */ + uint8_t* _nextLine; +public: + /** Create a compressed file reader */ + CompressedFileReader(FILE* input); + + /** + * Get next line, null, if at end of stream or error, errors are also printed + * to stderr. The returned pointer is valid until next invocation or + * destruction of the CompressedFileReader. + */ + char* nextLine(); + + /** Destroy compressed file reader, freeing all allocated memory */ + ~CompressedFileReader(); +}; + + +#endif // COMPRESSEDFILEREADER_H \ No newline at end of file diff --git a/src/MergeResult.cpp b/src/MergeResult.cpp new file mode 100644 index 0000000..0f03410 --- /dev/null +++ b/src/MergeResult.cpp @@ -0,0 +1,128 @@ +#include "cache/ResultSet.h" +#include "cache/MeasureFile.h" +#include "cache/Aggregate.h" + +#include "rapidjson/document.h" + +#include + +#include +#include +#include +#include + +/** Print usage */ +void usage() { + printf("Usage: mergeresults -i [FILE] -o [FILE]\n"); +} + +using namespace std; +using namespace rapidjson; + + +/** Main file */ +int main(int argc, char *argv[]) { + vector inputs; + FILE* output = stdout; + + // Parse arguments + int c; + while ((c = getopt(argc, argv, "hi:o:")) != -1) { + switch (c) { + case 'i': + inputs.push_back(optarg); + break; + case 'o': + output = fopen(optarg, "w"); + break; + case 'h': + usage(); + exit(0); + break; + case '?': + usage(); + return 1; + break; + default: + abort(); + } + } + + // If input files are given read them + if (!inputs.empty()) { + // Input one by one + ResultSet set; + for (auto file : inputs) { + ifstream stream(file); + set.mergeStream(stream); + } + set.output(output); + + } else { + // if no input files are given, we read from cin and output whenever the + // the filePath changes. This will mergeresult of sorted input, hence, + // perfect when piping in from GNU sort, which can efficiently merge sorted + // files + cin.sync_with_stdio(false); + + // filePath and measureFile currently aggregated + string filePath; + MeasureFile* measureFile = nullptr; + + string line; + int nb_line = 0; + while (getline(cin, line)) { + nb_line++; + + // Find delimiter + size_t del = line.find('\t'); + if (del == string::npos) { + fprintf(stderr, "No tab on line %i\n", nb_line); + continue; + } + + // Find current file path + string currentFilePath = line.substr(0, del); + + // If we're reached a new filePath, output the old one + if (filePath != currentFilePath) { + if (measureFile) { + measureFile->output(output, filePath); + delete measureFile; + measureFile = nullptr; + } + filePath = currentFilePath; + } + + // Parse JSON document + Document d; + d.Parse<0>(line.data() + del + 1); + + // Check that we have an object + if (!d.IsObject()) { + fprintf(stderr, "JSON root is not an object on line %i\n", nb_line); + continue; + } + + // Allocate MeasureFile if not already aggregated + if (!measureFile) { + measureFile = new MeasureFile(); + } + + // Merge in JSON + measureFile->mergeJSON(d); + } + + // Output last MeasureFile, if there was ever one + if (measureFile) { + measureFile->output(output, filePath); + delete measureFile; + measureFile = nullptr; + } + } + + // Close output file + fclose(output); + + return 0; +} diff --git a/src/cache/Aggregate.cpp b/src/cache/Aggregate.cpp new file mode 100644 index 0000000..1407d9f --- /dev/null +++ b/src/cache/Aggregate.cpp @@ -0,0 +1,113 @@ +#include "Aggregate.h" + +#include + +using namespace std; +using namespace rapidjson; + +InternedStringContext Aggregate::_buildIdStringCtx; + +InternedStringContext Aggregate::_revisionStringCtx; + + +void Aggregate::mergeJSON(const Value& dump) { + const Value::Member* jvalues = dump.FindMember("values"); + const Value::Member* jrevision = dump.FindMember("revision"); + const Value::Member* jbuildId = dump.FindMember("buildId"); + if (!jvalues || !jvalues->value.IsArray()) { + fprintf(stderr, "'values' in dump isn't an array\n"); + return; + } + if (!jrevision || !jrevision->value.IsString()) { + fprintf(stderr, "'revision' in dump isn't a string\n"); + return; + } + if (!jbuildId || !jbuildId->value.IsString()) { + fprintf(stderr, "'buildId' in dump isn't a string\n"); + return; + } + const char* buildId = jbuildId->value.GetString(); + const char* revision = jrevision->value.GetString(); + const Value& values = jvalues->value; + size_t length = values.Size(); + + // Check length of values + if(length == 0) { + fprintf(stderr, "Empty 'values' array in dump!\n"); + } + + // Check that we have doubles + for (size_t i = 0; i < length; i++) { + if(!values[i].IsNumber()) { + fprintf(stderr, "Array contains non-double value!\n"); + return; + } + } + + // Check if length matches + if (_length != length) { + // Replace if we have newer buildId or current length is zero + if (_length == 0 || _buildId < buildId) { + // Set buildId and revision + _buildId = _buildIdStringCtx.createString(buildId); + _revision = _revisionStringCtx.createString(revision); + _length = length; + + // Free old values + if (_values) { + delete[] _values; + } + _values = new double[length]; + + for (size_t i = 0; i < length; i++) { + _values[i] = values[i].GetDouble(); + } + } + } else { + // Update revision and buildId if we have a newer one + if (_buildId < buildId) { + _buildId = _buildIdStringCtx.createString(buildId); + _revision = _revisionStringCtx.createString(revision); + } + + size_t i; + for (i = 0; i < length - 6; i++) { + _values[i] += values[i].GetDouble(); + } + for (; i < length - 1; i++) { + double val = values[i].GetDouble(); + // Do not accumulate -1 (this indicates missing entry) + if (val == -1 && _values[i] == -1) { + continue; + } + _values[i] += val; + } + _values[i] += values[i].GetDouble(); + } +} + +void Aggregate::output(OutputContext& ctx, PathNode* owner) { + if (ctx.comma) { + fputc(',', ctx.file); + } + ctx.comma = true; + fputc('\"', ctx.file); + owner->output(ctx.file); + fputs("\":{\"values\": [", ctx.file); + if(_length > 0) { + fprintf(ctx.file, "%.20g", _values[0]); + for(size_t i = 1; i < _length; i++) { + fprintf(ctx.file, ",%.20g", _values[i]); + } + } + fprintf(ctx.file, "],\"buildId\":\"%s\",\"revision\":\"%s\"}", + _buildId.data(), _revision.data()); +} + + + + + + + + diff --git a/src/cache/Aggregate.h b/src/cache/Aggregate.h new file mode 100644 index 0000000..531c0c8 --- /dev/null +++ b/src/cache/Aggregate.h @@ -0,0 +1,48 @@ +#ifndef AGGREGATE_H +#define AGGREGATE_H + +#include "PathNode.h" +#include "InternedString.h" + +#include "rapidjson/document.h" + +/** Representation of histogram aggregate */ +class Aggregate { + /** Latests revision of aggregated histograms */ + InternedString _revision; + + /** + * Highest BuildId of aggregated histograms, used to find newest BuildId when + * merging data into Aggregate. + */ + InternedString _buildId; + + /** Aggregated values, null, if _length is zero */ + double* _values; + + /** Length of _values, zero implies _values == null */ + size_t _length; + + /** String context for interning BuildIds */ + static InternedStringContext _buildIdStringCtx; + + /** String context for interning revision string */ + static InternedStringContext _revisionStringCtx; +public: + Aggregate() + : _values(nullptr), _length(0) {} + + /** Merge aggregated values from JSON dump */ + void mergeJSON(const rapidjson::Value& dump); + + /** Output context */ + struct OutputContext { + FILE* file; + bool comma; + }; + + /** Output to file */ + void output(OutputContext& ctx, PathNode* owner); +}; + +#endif // AGGREGATE_H diff --git a/src/cache/InternedString.cpp b/src/cache/InternedString.cpp new file mode 100644 index 0000000..e1de304 --- /dev/null +++ b/src/cache/InternedString.cpp @@ -0,0 +1,72 @@ +#include +#include +#include + +#include "InternedString.h" + +#ifdef LOG_INTERNEDSTRING +#define log(...) fprintf(stderr, __VA_ARGS__); +#else +#define log(...) +#endif + +void InternedString::releaseBuffer() { + // If we have a buffer + if (_buffer) { + // Decrement reference count + _buffer->refCount--; + assert(_buffer->refCount >= 0); + log("DEC: to %i of '%s'\n", _buffer->refCount, _buffer->payload.data()); + + // If there are no more references + if (_buffer->refCount == 0) { + // Erase from owners cache, if owner is still alive + if (_buffer->owner) { + size_t count = _buffer->owner->_cache.erase(_buffer->payload.data()); + assert(count == 1); + } + log("DEL: '%s'\n", _buffer->payload.data()); + // Free buffer + delete _buffer; + } + + // Remove pointer to buffer + _buffer = nullptr; + } +} + +const char* InternedString::_emptyString = ""; + +InternedString InternedStringContext::createString(const char* s) { + // Empty InternedStrings are a special case + if(strlen(s) == 0) { + return InternedString(); + } + + // Find buffer + InternedString::Buffer* buf = nullptr; + auto it = _cache.find(s); + + // If a buffer doesn't exist create a new buffer + if (it == _cache.end()) { + buf = new InternedString::Buffer(s, this); + _cache.insert(std::make_pair( + buf->payload.data(), (InternedString::Buffer*)buf)); + assert(buf->refCount == 1); + } else { + buf = it->second; + buf->refCount++; + } + assert(buf); + assert(buf->payload == s); + return InternedString(buf); +} + + +InternedStringContext::~InternedStringContext() { + for(auto item : _cache) { + assert(item.second->owner == this); + item.second->owner = nullptr; + } + _cache.clear(); +} \ No newline at end of file diff --git a/src/cache/InternedString.h b/src/cache/InternedString.h new file mode 100644 index 0000000..2128093 --- /dev/null +++ b/src/cache/InternedString.h @@ -0,0 +1,167 @@ +#ifndef INTERNED_STRING_H +#define INTERNED_STRING_H + +#include + +#include +#include + +class InternedStringContext; + +/** + * Interned immutable string, used to reduce memory allocations when dealing + * with a lot of instances of the same string. + */ +class InternedString { + /** Buffer storing the contents of an interned string */ + struct Buffer { + Buffer(const char* s, InternedStringContext* owner) + : refCount(1), payload(s), owner(owner) {} + size_t refCount; + std::string payload; + InternedStringContext* owner; + }; + + /** Internal Buffer, nullptr, for empty strings */ + Buffer* _buffer; + + /** + * Initialized InternedString from buffer + * Note, buffers are always allocated by instances of InternedStringContext. + */ + explicit InternedString(Buffer* buffer) + : _buffer(buffer) {} + + /** Release current buffer, decrementing refCount and freeing it if needed */ + void releaseBuffer(); + + /** Empty string to return when _buffer is null */ + static const char* _emptyString; +public: + /** Initialized empty InternedString */ + InternedString() + : _buffer(nullptr) {} + + /** Copy-construct InternedString */ + InternedString(const InternedString& s) { + _buffer = s._buffer; + if(_buffer) { + _buffer->refCount++; + } + } + + /** Assignment operator */ + InternedString& operator= (const InternedString& s) { + releaseBuffer(); + _buffer = s._buffer; + if(_buffer) { + _buffer->refCount++; + } + } + + /** Compare two InternedStrings */ + bool operator==(const InternedString& s) const { + if(_buffer && s._buffer) { + if(_buffer->owner != s._buffer->owner || !_buffer->owner) { + return _buffer->payload == s._buffer->payload; + } + } + return _buffer == s._buffer; + } + + /** Compare to C string */ + bool operator==(const char* s) const { + if(!_buffer) { + return *s == '\0'; + } + return _buffer->payload == s; + } + + /** Compare to std::string */ + bool operator==(const std::string& s) const { + if(!_buffer) { + return s.empty(); + } + return _buffer->payload == s; + } + + /** Compare strings */ + bool operator<(const char* s) const { + if(!_buffer) { + return *s != '\0'; + } + return _buffer->payload < s; + } + + /** Get string as const char* */ + const char* data() const { + if (_buffer) + return _buffer->payload.data(); + return _emptyString; + } + + /** Write to FILE */ + void output(FILE* f) { + if(_buffer) { + fputs(_buffer->payload.data(), f); + } + } + + /** Check if this is an empty string */ + bool empty() { + return _buffer; + } + + /** Destroy and deref InternedScript */ + ~InternedString() { + releaseBuffer(); + } + + friend class InternedStringContext; +}; + +/** Context that owns a collection of interned strings */ +class InternedStringContext { + /** Hash for C strings */ + struct StrHash { + size_t operator()(const char* s) const { + size_t hash = 0; + while(*s != '\0') { + hash = (hash << 6) ^ *(s++); + } + return hash; + } + }; + + /** Comparison operator for C strings */ + struct StrCmp { + bool operator()(const char* s1, const char* s2) const { + return strcmp(s1, s2) == 0; + } + }; + + /** Buffer Cache type */ + typedef std::unordered_map + BufferCache; + + /** Buffer Cache */ + BufferCache _cache; +public: + /** Create new interned string from C string */ + InternedString createString(const char* s); + + /** Create new interned string from const char* and string length */ + InternedString createString(const char* s, size_t n); + + /** Create new interned string from std::string */ + InternedString createString(const std::string& s) { + return createString(s.data()); + } + + /** Free InternedStringContext and freeing buffers when they are deleted */ + ~InternedStringContext(); + + friend class InternedString; +}; + +#endif // INTERNED_STRING_H diff --git a/src/cache/MeasureFile.cpp b/src/cache/MeasureFile.cpp new file mode 100644 index 0000000..5755b81 --- /dev/null +++ b/src/cache/MeasureFile.cpp @@ -0,0 +1,60 @@ +#include "MeasureFile.h" + +#include "Aggregate.h" + +#include + +using namespace std; +using namespace rapidjson; + +InternedStringContext MeasureFile::_filterStringCtx; + +void MeasureFile::mergeJSON(Value& blob) { + // For each member + for (auto it = blob.MemberBegin(); it != blob.MemberEnd(); ++it) { + // First find filter path + const char* filterPath = it->name.GetString(); + + // Check that we have an object + if (!it->value.IsObject()) { + printf("Value of filterPath: %s is not an object\n", filterPath); + continue; + } + + // Find PathNode + PathNode* n = _filterRoot.find(filterPath, _filterStringCtx); + if (!n->target()) { + n->setTarget(new Aggregate()); + } + n->target()->mergeJSON(it->value); + + #if FIRST_DUMP_ONLY + break; + #endif + } +} + +/** Output to file */ +void MeasureFile::output(FILE* f, PathNode* owner) { + owner->output(f); + fputs("\t{", f); + + Aggregate::OutputContext ctx; + ctx.file = f; + ctx.comma = false; + _filterRoot.outputTargetTree(ctx); + + fputs("}\n", f); +} + +void MeasureFile::output(FILE* f, const std::string& filePath) { + fputs(filePath.data(), f); + fputs("\t{", f); + + Aggregate::OutputContext ctx; + ctx.file = f; + ctx.comma = false; + _filterRoot.outputTargetTree(ctx); + + fputs("}\n", f); +} \ No newline at end of file diff --git a/src/cache/MeasureFile.h b/src/cache/MeasureFile.h new file mode 100644 index 0000000..094e4c6 --- /dev/null +++ b/src/cache/MeasureFile.h @@ -0,0 +1,32 @@ +#ifndef MEASUREFILE_H +#define MEASUREFILE_H + +#include "PathNode.h" +#include "InternedString.h" + +#include "rapidjson/document.h" + +#include + +class Aggregate; + +/** + * In-memory representation of the aggregated data stored in a single JSON file. + * This is called an MeasureFile as there may be multiple of these files for a + * given measure under different channel, product, version and by-date. + */ +class MeasureFile { + PathNode _filterRoot; + static InternedStringContext _filterStringCtx; +public: + /** Merge with JSON from file */ + void mergeJSON(rapidjson::Value& blob); + + /** Output to file */ + void output(FILE* f, PathNode* owner); + + /** Output to file */ + void output(FILE* f, const std::string& filePath); +}; + +#endif // MEASUREFILE_H diff --git a/src/cache/PathNode.h b/src/cache/PathNode.h new file mode 100644 index 0000000..13155a5 --- /dev/null +++ b/src/cache/PathNode.h @@ -0,0 +1,126 @@ +#ifndef PATHNODE_H +#define PATHNODE_H + +#include +#include +#include + +#include "InternedString.h" + +/** Node on a path separated by slashes */ +template +class PathNode { + InternedString _value; + PathNode* _parent; + std::vector*> _children; + Target* _target; + + /** Create internal PathNode */ + PathNode(const InternedString& value, PathNode* parent) + : _value(value), _parent(parent), _target(nullptr) {} + +public: + /** Create new root PathNode */ + PathNode() + : _value(), _parent(nullptr), _target(nullptr) {} + + /** Delete node, children, targets and remove from parent if any */ + ~PathNode() { + // Delete target + if (_target) { + delete _target; + _target = nullptr; + } + + // Delete children + for (auto child : _children) { + child->_parent = nullptr; + delete child; + } + + // Remove from parent if we still have one + if (_parent) { + std::vector*>& siblings = _parent->_children; + for(auto it = siblings.begin(); it != siblings.end(); it++) { + if(*it == this) { + siblings.erase(it); + break; + } + } + } + } + + /** Set a target object, this will be deleted with this node */ + void setTarget(Target* target) { + _target = target; + } + + /** Get current target, nullptr if none is set */ + Target* target() const { + return _target; + } + + /** Write Path to file */ + void output(FILE* f) { + if(_parent && _parent->_parent) { + _parent->output(f); + fputc('/', f); + } + _value.output(f); + } + + /** Invoke output(ctx, parent) on the entire target tree */ + template + void outputTargetTree(Context& ctx) { + // If we have a target, output it + if(_target) { + _target->output(ctx, this); + } + + // Call recursively on children + for(auto child : _children) { + child->outputTargetTree(ctx); + } + } + + /** Find/create PathNode under current node */ + PathNode* find(const char* path, InternedStringContext& ctx) { + const char* reminder = path; + while(*reminder != '/' && *reminder != '\0'){ + reminder++; + } + size_t n = reminder - path; + + // Find child node + PathNode* next = nullptr; + for(auto child : _children) { + if(strncmp(child->_value.data(), path, n) == 0) { + next = child; + break; + } + } + + // Create new child node if we need a new one + if(!next) { + // Create interned string with const cast hack to avoid copying the string + char tmp = *reminder; + *(const_cast(reminder)) = '\0'; + InternedString name(ctx.createString(path)); + *(const_cast(reminder)) = tmp; + + // Create next node + next = new PathNode(name, this); + _children.push_back(next); + } + + // If there is more path to lookup we that from next + if(*reminder != '\0') { + return next->find(reminder + 1, ctx); + } + + // Otherwise return next + return next; + } +}; + +#endif // PATHNODE_H diff --git a/src/cache/ResultSet.cpp b/src/cache/ResultSet.cpp new file mode 100644 index 0000000..3a37c32 --- /dev/null +++ b/src/cache/ResultSet.cpp @@ -0,0 +1,56 @@ +#include "ResultSet.h" +#include "MeasureFile.h" +#include "Aggregate.h" + +#include "rapidjson/document.h" + +#include +#include + +using namespace std; +using namespace rapidjson; + + +void ResultSet::mergeStream(istream& stream) { + // Read input line by line + string line; + int nb_line = 0; + while (getline(stream, line)) { + nb_line++; + + // Find delimiter + size_t del = line.find('\t'); + if (del == string::npos) { + fprintf(stderr, "No tab on line %i\n", nb_line); + continue; + } + + // Find filePath + string filePath = line.substr(0, del); + + // Parse JSON document + Document d; + d.Parse<0>(line.data() + del + 1); + + // Check that we have an object + if (!d.IsObject()) { + fprintf(stderr, "JSON root is not an object on line %i\n", nb_line); + continue; + } + + // Find blob and merge with it + PathNode* n = _fileRoot.find(filePath.data(), _pathStringCtx); + if(!n->target()) { + n->setTarget(new MeasureFile()); + } + n->target()->mergeJSON(d); + } +} + +void ResultSet::aggregate(const char* filename) { + +} + +void ResultSet::output(FILE* f) { + _fileRoot.outputTargetTree(f); +} diff --git a/src/cache/ResultSet.h b/src/cache/ResultSet.h new file mode 100644 index 0000000..faed558 --- /dev/null +++ b/src/cache/ResultSet.h @@ -0,0 +1,26 @@ +#ifndef RESULTSET_H +#define RESULTSET_H + +#include "PathNode.h" +#include "InternedString.h" + +#include + +class MeasureFile; + +/** A collection of results for various measures files */ +class ResultSet { + PathNode _fileRoot; + InternedStringContext _pathStringCtx; +public: + /** Merge a result-set file into this ResultSet */ + void mergeStream(std::istream& stream); + + /** Output result-set to file */ + void output(FILE* f); + + /** Decompress and aggregated file */ + void aggregate(const char* filename); +}; + +#endif // RESULTSET_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..e7b4bbe --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,3 @@ + +add_subdirectory(merge-test) +#add_subdirectory(decompress) \ No newline at end of file diff --git a/tests/decompress/CMakeLists.txt b/tests/decompress/CMakeLists.txt new file mode 100644 index 0000000..2e4f628 --- /dev/null +++ b/tests/decompress/CMakeLists.txt @@ -0,0 +1,15 @@ +file(COPY a-b-c-def-g.txt.xz a-b-c-def-g.txt.lzma DESTINATION .) + +set(COMPRESSEDFILEREADERTEST_SRC + CompressedFileReaderTest.cpp + ../../src/CompressedFileReader.cpp +) + +include_directories(${LIBLZMA_INCLUDE_DIRS}) +add_executable(compressedfilereadertest ${COMPRESSEDFILEREADERTEST_SRC}) +target_link_libraries(compressedfilereadertest ${LIBLZMA_LIBRARIES}) + +add_test( + NAME CompressedFileReaderTest + COMMAND ./compressedfilereadertest +) diff --git a/tests/decompress/CompressedFileReaderTest.cpp b/tests/decompress/CompressedFileReaderTest.cpp new file mode 100644 index 0000000..d4600ce --- /dev/null +++ b/tests/decompress/CompressedFileReaderTest.cpp @@ -0,0 +1,89 @@ + +#include "../../src/CompressedFileReader.h" + +#include +#include +#include + +int main(int argc, char* argv[]) { + // Test XZ reading + { + FILE* input = fopen("a-b-c-def-g.txt.xz", "r"); + + CompressedFileReader reader(input); + char* line; + + // 1. line: "a" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "a") == 0); + + // 2. line: "b" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "b") == 0); + + // 3. line: "c" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "c") == 0); + + // 4. line: "def" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "def") == 0); + + // 5. line: "g" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "g") == 0); + + // End of input + line = reader.nextLine(); + assert(!line); + + fclose(input); + } + + // Test lzma reading (legacy only) + { + FILE* input = fopen("a-b-c-def-g.txt.lzma", "r"); + + CompressedFileReader reader(input); + char* line; + + // 1. line: "a" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "a") == 0); + + // 2. line: "b" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "b") == 0); + + // 3. line: "c" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "c") == 0); + + // 4. line: "def" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "def") == 0); + + // 5. line: "g" + line = reader.nextLine(); + assert(line); + assert(strcmp(line, "g") == 0); + + // End of input + line = reader.nextLine(); + assert(!line); + + fclose(input); + } + + return 0; +} + diff --git a/tests/decompress/a-b-c-def-g.txt b/tests/decompress/a-b-c-def-g.txt new file mode 100644 index 0000000..2d18577 --- /dev/null +++ b/tests/decompress/a-b-c-def-g.txt @@ -0,0 +1,5 @@ +a +b +c +def +g \ No newline at end of file diff --git a/tests/decompress/a-b-c-def-g.txt.lzma b/tests/decompress/a-b-c-def-g.txt.lzma new file mode 100644 index 0000000..0a55224 Binary files /dev/null and b/tests/decompress/a-b-c-def-g.txt.lzma differ diff --git a/tests/decompress/a-b-c-def-g.txt.xz b/tests/decompress/a-b-c-def-g.txt.xz new file mode 100644 index 0000000..ed9bcc6 Binary files /dev/null and b/tests/decompress/a-b-c-def-g.txt.xz differ diff --git a/tests/merge-test/CMakeLists.txt b/tests/merge-test/CMakeLists.txt new file mode 100644 index 0000000..e3733d5 --- /dev/null +++ b/tests/merge-test/CMakeLists.txt @@ -0,0 +1,11 @@ +file(COPY result-1.txt result-2.txt DESTINATION .) + +add_test( + NAME merge-test-with-options + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/with-options.sh" +) + +add_test( + NAME merge-test-with-pipes + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/with-pipes.sh" +) \ No newline at end of file diff --git a/tests/merge-test/result-1.txt b/tests/merge-test/result-1.txt new file mode 100644 index 0000000..ca6a280 --- /dev/null +++ b/tests/merge-test/result-1.txt @@ -0,0 +1 @@ +aurora/24/CYCLE_COLLECTOR/by-submission-date {"20130805/idle_daily/Fennec/Android/16/arm":{"values": [5, 10, 5, -1, -1, -1, -1, -1, 3],"buildId": "20130805004006","revision": "http://hg.mozilla.org/releases/mozilla-aurora/rev/4ea223de889c"}} diff --git a/tests/merge-test/result-2.txt b/tests/merge-test/result-2.txt new file mode 100644 index 0000000..9fbf569 --- /dev/null +++ b/tests/merge-test/result-2.txt @@ -0,0 +1 @@ +aurora/24/CYCLE_COLLECTOR/by-submission-date {"20130805/idle_daily/Fennec/Android/16/arm":{"values": [10, 5, 10, -1, -1, -1, -1, -1, 1],"buildId": "20130805004006","revision": "http://hg.mozilla.org/releases/mozilla-aurora/rev/4ea223de889c"}} diff --git a/tests/merge-test/with-options.sh b/tests/merge-test/with-options.sh new file mode 100755 index 0000000..98680ad --- /dev/null +++ b/tests/merge-test/with-options.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e + +echo "got here" + +# Merge test files +../../mergeresults -i result-1.txt -i result-2.txt -o output.txt; + +# Check number of lines +test `cat output.txt | wc -l` -eq 1; + +# Check that we have 15 in there +test `cat output.txt | grep 15 | wc -l` -eq 1; diff --git a/tests/merge-test/with-pipes.sh b/tests/merge-test/with-pipes.sh new file mode 100755 index 0000000..763b6fa --- /dev/null +++ b/tests/merge-test/with-pipes.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +# Merge test files +cat result-1.txt result-2.txt | ../../mergeresults > output.txt; + +# Check number of lines +test `cat output.txt | wc -l` -eq 1; + +# Check that we have 15 in there +test `cat output.txt | grep 15 | wc -l` -eq 1;