Fixed bugs added C++ speedups...

This commit is contained in:
Jonas Finnemann Jensen 2013-11-25 15:12:50 -08:00
Родитель e238dcbbfa
Коммит 76bfc18e40
28 изменённых файлов: 1308 добавлений и 14 удалений

10
.gitignore поставляемый
Просмотреть файл

@ -1,10 +1,2 @@
*.pyc
dashboard.zip
histogram_tools.py
histogram_specs.py
specs.py
histogram_specs.json
validation/
html/data
out.txt
Histograms.json
build/

18
cmake/externals.cmake Normal file
Просмотреть файл

@ -0,0 +1,18 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
include(ExternalProject)
set_property(DIRECTORY PROPERTY EP_BASE "${CMAKE_BINARY_DIR}/externals")
externalproject_add(
rapidjson
SVN_REPOSITORY http://rapidjson.googlecode.com/svn/trunk/
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
)
set(RAPIDJSON_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/externals/Source/rapidjson/include")
include_directories(${RAPIDJSON_INCLUDE_DIRS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isystem ${RAPIDJSON_INCLUDE_DIRS}")

Просмотреть файл

@ -27,8 +27,8 @@ class HistogramAggregator:
aggregator1.merge(**aggregator2.dump())
"""
def __init__(self, values = [], buildId = "", revision = None):
replace_nan_inf(values)
self.values = values
self.values = list(values)
replace_nan_inf(self.values)
self.buildId = buildId
self.revision = revision
@ -37,13 +37,13 @@ class HistogramAggregator:
if len(self.values) != len(values):
# Choose the histogram with highest buildId
if self.buildId < buildId:
self.values = values
self.values = list(values)
self.buildId = buildId
self.revision = revision
else:
if self.buildId < buildId:
self.values = values
self.buildId = buildId
self.revision = revision
for i in xrange(0, len(values) - 6):
self.values[i] += values[i]
# Entries [-6:-1] may have -1 indicating missing entry
@ -76,4 +76,4 @@ def replace_nan_inf(values):
elif math.isnan(val):
# this isn't good... but we can't handle all possible corner cases
# NaN shouldn't be possible... besides it's not known to happen
values[i] = null
values[i] = None

59
src/Aggregator.cpp Normal file
Просмотреть файл

@ -0,0 +1,59 @@
#include "cache/ResultSet.h"
#include "cache/MeasureFile.h"
#include "cache/Aggregate.h"
#include <unistd.h>
#include <iostream>
/** Print usage */
void usage() {
printf("Usage: aggregate -o [FILE]\n");
}
using namespace std;
/** Main file */
int main(int argc, char *argv[]) {
FILE* output = stdout;
// Parse arguments
int c;
while ((c = getopt(argc, argv, "ho:")) != -1) {
switch (c) {
case 'o':
output = fopen(optarg, "w");
break;
case 'h':
usage();
exit(0);
break;
case '?':
usage();
return 1;
break;
default:
abort();
}
}
// Aggregated result set
ResultSet set;
/*
// Read input file names from stdin
cin.sync_with_stdio(false);
string filename;
while(getline(cin, filename)) {
set.aggregate(filename.data());
}
set.output(output);
*/
// Close output file
fclose(output);
return 0;
}

24
src/CMakeLists.txt Normal file
Просмотреть файл

@ -0,0 +1,24 @@
set(MERGERESULTS_SRC
cache/InternedString.cpp
cache/Aggregate.cpp
cache/MeasureFile.cpp
cache/ResultSet.cpp
MergeResult.cpp
)
add_executable(mergeresults ${MERGERESULTS_SRC})
add_dependencies(mergeresults rapidjson)
target_link_libraries(mergeresults)
set(AGGREGATOR_SRC
cache/InternedString.cpp
cache/Aggregate.cpp
cache/MeasureFile.cpp
cache/ResultSet.cpp
Aggregator.cpp
)
include_directories(${LIBLZMA_INCLUDE_DIRS})
add_executable(aggregator ${AGGREGATOR_SRC})
add_dependencies(aggregator rapidjson)
target_link_libraries(aggregator ${LIBLZMA_LIBRARIES})

Просмотреть файл

@ -0,0 +1,184 @@
#include "CompressedFileReader.h"
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define INBUF_SIZE (4 * 4096)
CompressedFileReader::CompressedFileReader(FILE* input)
: _input(input), _stream(nullptr), _inbuf(nullptr), _outbuf(nullptr),
_size(4096), _nextLine(nullptr) {
// Allocate buffers
_inbuf = new uint8_t[INBUF_SIZE];
_outbuf = (uint8_t*)malloc(_size);
_nextLine = _outbuf;
// Allocated an initialize stream
_stream = new lzma_stream;
*_stream = LZMA_STREAM_INIT;
// Initialized decoding stream
lzma_ret ret = lzma_auto_decoder(_stream, UINT64_MAX, LZMA_CONCATENATED);
if (ret != LZMA_OK) {
const char* msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "unable to allocate memory";
break;
case LZMA_OPTIONS_ERROR:
msg = "invalid options";
break;
case LZMA_PROG_ERROR:
msg = "unknown error";
break;
default:
assert(false);
msg = "invalid error code";
break;
}
// Print error message
fprintf(
stderr,
"CompressedFileReader: lzma_auto_decoder() failed, %s\n",
msg
);
// Delete stream
delete _stream;
_stream = nullptr;
return;
}
// Setup stream
_stream->next_in = NULL;
_stream->avail_in = 0;
_stream->next_out = _outbuf;
_stream->avail_out = _size;
}
char* CompressedFileReader::nextLine() {
// If there is no decoder stream, then we're either done, or there was an
// error somewhere in the process
if (!_stream) {
return nullptr;
}
lzma_action action = LZMA_RUN;
// Bring us to a state where: _outbuf == _nextLine
// do this by moving memory between _nextLine and _stream->next_out
// to begin at _outbuf (update _nextLine = _outbuf, _stream->next_out =...)
assert(_stream->next_out - _nextLine >= 0);
memmove(_outbuf, _nextLine, _stream->next_out - _nextLine);
_stream->next_out -= _nextLine - _outbuf;
_stream->avail_out += _nextLine - _outbuf;
_nextLine = _outbuf;
assert(_stream->next_out >= _outbuf);
fprintf(stderr, "Getting line:\n");
// Optional optimization:
// Search for line breaks in interval from _outbuf, to _stream->next_out,
// if found, update _nextLine, flip '\n' to '\0' and return... This should
// give slightly better locality
assert(_nextLine == _outbuf);
while (_nextLine < _stream->next_out) {
if (*_nextLine == '\n') {
*_nextLine = '\0';
_nextLine += 1;
return (char*)_outbuf;
}
_nextLine++;
}
// Okay, so there is no _nextLine, yet... we'll keep moving it forward though
// Read until we reach a line break
while (true) {
// If there no available input, read from file
if (_stream->avail_in == 0 && !feof(_input)) {
_stream->next_in = _inbuf;
_stream->avail_in = fread(_inbuf, 1, INBUF_SIZE, _input);
// if there is an error
if (ferror(_input)) {
fprintf(
stderr,
"CompressedFileReader: fread() failed, %s\n",
strerror(errno)
);
// Free streaming context, we can't continue after an error
lzma_end(_stream);
delete _stream;
_stream = nullptr;
return nullptr;
}
// if at end of file, finish decoding, flushing buffers
if (feof(_input)) {
action = LZMA_FINISH;
}
}
// Decode LZMA stream
lzma_ret ret = lzma_code(_stream, action);
// If there is no more output buffer space, or we're at the end of the
// stream, search for line breaks and return, if any is found
if (_stream->avail_out == 0 || ret == LZMA_STREAM_END) {
// Find next line and return it, if there is any...
while (_nextLine < _stream->next_out) {
if (*_nextLine == '\n') {
*_nextLine = '\0';
_nextLine += 1;
return (char*)_outbuf;
}
_nextLine++;
}
// Realloc _outbuf, note that we should have filled _outbuf first
assert(_stream->next_out == _nextLine);
fprintf(stderr, "Size: %i != %i \n", _size, _nextLine - _outbuf );
fprintf(stderr, "avail_out: %i, '%i'\n", _stream->avail_out, *(_stream->next_out -1));
assert((_nextLine - _outbuf) == _size);
// Double size, to get a nice amortized complexity, no we don't bother
// scaling down the allocation
_outbuf = (uint8_t*)realloc(_outbuf, _size * 2);
// Update _nextLine, _stream->next_out and _stream->avail_out
_nextLine = _outbuf + _size;
_stream->next_out = _outbuf + _size;
_stream->avail_out = _size;
// Store the updated size
_size *= 2;
}
// End stream and set _stream null if we're done
if (ret == LZMA_STREAM_END) {
lzma_end(_stream);
delete _stream;
_stream = nullptr;
} else if (ret != LZMA_OK) {
assert(false);
}
}
}
CompressedFileReader::~CompressedFileReader() {
// Free stream if not at end
if (_stream) {
lzma_end(_stream);
delete _stream;
_stream = nullptr;
}
// Release other buffers
free(_outbuf);
delete[] _inbuf;
}

Просмотреть файл

@ -0,0 +1,42 @@
#ifndef COMPRESSEDFILEREADER_H
#define COMPRESSEDFILEREADER_H
#include <stdio.h>
#include <lzma.h>
/** Read compressed files line by line */
class CompressedFileReader {
/** Input file object */
FILE* _input;
/** lzma decoder stream */
lzma_stream* _stream;
/** Input buffer, buffering data from input file to decoder */
uint8_t* _inbuf;
/** Output buffer, returned when a line is read */
uint8_t* _outbuf;
/** Size of bytes allocated for _outbuf, doubled when more space is needed */
size_t _size;
/** Position where next line starts, _outbuf, if no line available */
uint8_t* _nextLine;
public:
/** Create a compressed file reader */
CompressedFileReader(FILE* input);
/**
* Get next line, null, if at end of stream or error, errors are also printed
* to stderr. The returned pointer is valid until next invocation or
* destruction of the CompressedFileReader.
*/
char* nextLine();
/** Destroy compressed file reader, freeing all allocated memory */
~CompressedFileReader();
};
#endif // COMPRESSEDFILEREADER_H

128
src/MergeResult.cpp Normal file
Просмотреть файл

@ -0,0 +1,128 @@
#include "cache/ResultSet.h"
#include "cache/MeasureFile.h"
#include "cache/Aggregate.h"
#include "rapidjson/document.h"
#include <unistd.h>
#include <string>
#include <fstream>
#include <iostream>
#include <vector>
/** Print usage */
void usage() {
printf("Usage: mergeresults -i [FILE] -o [FILE]\n");
}
using namespace std;
using namespace rapidjson;
/** Main file */
int main(int argc, char *argv[]) {
vector<char*> inputs;
FILE* output = stdout;
// Parse arguments
int c;
while ((c = getopt(argc, argv, "hi:o:")) != -1) {
switch (c) {
case 'i':
inputs.push_back(optarg);
break;
case 'o':
output = fopen(optarg, "w");
break;
case 'h':
usage();
exit(0);
break;
case '?':
usage();
return 1;
break;
default:
abort();
}
}
// If input files are given read them
if (!inputs.empty()) {
// Input one by one
ResultSet set;
for (auto file : inputs) {
ifstream stream(file);
set.mergeStream(stream);
}
set.output(output);
} else {
// if no input files are given, we read from cin and output whenever the
// the filePath changes. This will mergeresult of sorted input, hence,
// perfect when piping in from GNU sort, which can efficiently merge sorted
// files
cin.sync_with_stdio(false);
// filePath and measureFile currently aggregated
string filePath;
MeasureFile* measureFile = nullptr;
string line;
int nb_line = 0;
while (getline(cin, line)) {
nb_line++;
// Find delimiter
size_t del = line.find('\t');
if (del == string::npos) {
fprintf(stderr, "No tab on line %i\n", nb_line);
continue;
}
// Find current file path
string currentFilePath = line.substr(0, del);
// If we're reached a new filePath, output the old one
if (filePath != currentFilePath) {
if (measureFile) {
measureFile->output(output, filePath);
delete measureFile;
measureFile = nullptr;
}
filePath = currentFilePath;
}
// Parse JSON document
Document d;
d.Parse<0>(line.data() + del + 1);
// Check that we have an object
if (!d.IsObject()) {
fprintf(stderr, "JSON root is not an object on line %i\n", nb_line);
continue;
}
// Allocate MeasureFile if not already aggregated
if (!measureFile) {
measureFile = new MeasureFile();
}
// Merge in JSON
measureFile->mergeJSON(d);
}
// Output last MeasureFile, if there was ever one
if (measureFile) {
measureFile->output(output, filePath);
delete measureFile;
measureFile = nullptr;
}
}
// Close output file
fclose(output);
return 0;
}

113
src/cache/Aggregate.cpp поставляемый Normal file
Просмотреть файл

@ -0,0 +1,113 @@
#include "Aggregate.h"
#include <stdio.h>
using namespace std;
using namespace rapidjson;
InternedStringContext Aggregate::_buildIdStringCtx;
InternedStringContext Aggregate::_revisionStringCtx;
void Aggregate::mergeJSON(const Value& dump) {
const Value::Member* jvalues = dump.FindMember("values");
const Value::Member* jrevision = dump.FindMember("revision");
const Value::Member* jbuildId = dump.FindMember("buildId");
if (!jvalues || !jvalues->value.IsArray()) {
fprintf(stderr, "'values' in dump isn't an array\n");
return;
}
if (!jrevision || !jrevision->value.IsString()) {
fprintf(stderr, "'revision' in dump isn't a string\n");
return;
}
if (!jbuildId || !jbuildId->value.IsString()) {
fprintf(stderr, "'buildId' in dump isn't a string\n");
return;
}
const char* buildId = jbuildId->value.GetString();
const char* revision = jrevision->value.GetString();
const Value& values = jvalues->value;
size_t length = values.Size();
// Check length of values
if(length == 0) {
fprintf(stderr, "Empty 'values' array in dump!\n");
}
// Check that we have doubles
for (size_t i = 0; i < length; i++) {
if(!values[i].IsNumber()) {
fprintf(stderr, "Array contains non-double value!\n");
return;
}
}
// Check if length matches
if (_length != length) {
// Replace if we have newer buildId or current length is zero
if (_length == 0 || _buildId < buildId) {
// Set buildId and revision
_buildId = _buildIdStringCtx.createString(buildId);
_revision = _revisionStringCtx.createString(revision);
_length = length;
// Free old values
if (_values) {
delete[] _values;
}
_values = new double[length];
for (size_t i = 0; i < length; i++) {
_values[i] = values[i].GetDouble();
}
}
} else {
// Update revision and buildId if we have a newer one
if (_buildId < buildId) {
_buildId = _buildIdStringCtx.createString(buildId);
_revision = _revisionStringCtx.createString(revision);
}
size_t i;
for (i = 0; i < length - 6; i++) {
_values[i] += values[i].GetDouble();
}
for (; i < length - 1; i++) {
double val = values[i].GetDouble();
// Do not accumulate -1 (this indicates missing entry)
if (val == -1 && _values[i] == -1) {
continue;
}
_values[i] += val;
}
_values[i] += values[i].GetDouble();
}
}
void Aggregate::output(OutputContext& ctx, PathNode<Aggregate>* owner) {
if (ctx.comma) {
fputc(',', ctx.file);
}
ctx.comma = true;
fputc('\"', ctx.file);
owner->output(ctx.file);
fputs("\":{\"values\": [", ctx.file);
if(_length > 0) {
fprintf(ctx.file, "%.20g", _values[0]);
for(size_t i = 1; i < _length; i++) {
fprintf(ctx.file, ",%.20g", _values[i]);
}
}
fprintf(ctx.file, "],\"buildId\":\"%s\",\"revision\":\"%s\"}",
_buildId.data(), _revision.data());
}

48
src/cache/Aggregate.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,48 @@
#ifndef AGGREGATE_H
#define AGGREGATE_H
#include "PathNode.h"
#include "InternedString.h"
#include "rapidjson/document.h"
/** Representation of histogram aggregate */
class Aggregate {
/** Latests revision of aggregated histograms */
InternedString _revision;
/**
* Highest BuildId of aggregated histograms, used to find newest BuildId when
* merging data into Aggregate.
*/
InternedString _buildId;
/** Aggregated values, null, if _length is zero */
double* _values;
/** Length of _values, zero implies _values == null */
size_t _length;
/** String context for interning BuildIds */
static InternedStringContext _buildIdStringCtx;
/** String context for interning revision string */
static InternedStringContext _revisionStringCtx;
public:
Aggregate()
: _values(nullptr), _length(0) {}
/** Merge aggregated values from JSON dump */
void mergeJSON(const rapidjson::Value& dump);
/** Output context */
struct OutputContext {
FILE* file;
bool comma;
};
/** Output to file */
void output(OutputContext& ctx, PathNode<Aggregate>* owner);
};
#endif // AGGREGATE_H

72
src/cache/InternedString.cpp поставляемый Normal file
Просмотреть файл

@ -0,0 +1,72 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "InternedString.h"
#ifdef LOG_INTERNEDSTRING
#define log(...) fprintf(stderr, __VA_ARGS__);
#else
#define log(...)
#endif
void InternedString::releaseBuffer() {
// If we have a buffer
if (_buffer) {
// Decrement reference count
_buffer->refCount--;
assert(_buffer->refCount >= 0);
log("DEC: to %i of '%s'\n", _buffer->refCount, _buffer->payload.data());
// If there are no more references
if (_buffer->refCount == 0) {
// Erase from owners cache, if owner is still alive
if (_buffer->owner) {
size_t count = _buffer->owner->_cache.erase(_buffer->payload.data());
assert(count == 1);
}
log("DEL: '%s'\n", _buffer->payload.data());
// Free buffer
delete _buffer;
}
// Remove pointer to buffer
_buffer = nullptr;
}
}
const char* InternedString::_emptyString = "";
InternedString InternedStringContext::createString(const char* s) {
// Empty InternedStrings are a special case
if(strlen(s) == 0) {
return InternedString();
}
// Find buffer
InternedString::Buffer* buf = nullptr;
auto it = _cache.find(s);
// If a buffer doesn't exist create a new buffer
if (it == _cache.end()) {
buf = new InternedString::Buffer(s, this);
_cache.insert(std::make_pair<const char*, InternedString::Buffer*>(
buf->payload.data(), (InternedString::Buffer*)buf));
assert(buf->refCount == 1);
} else {
buf = it->second;
buf->refCount++;
}
assert(buf);
assert(buf->payload == s);
return InternedString(buf);
}
InternedStringContext::~InternedStringContext() {
for(auto item : _cache) {
assert(item.second->owner == this);
item.second->owner = nullptr;
}
_cache.clear();
}

167
src/cache/InternedString.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,167 @@
#ifndef INTERNED_STRING_H
#define INTERNED_STRING_H
#include <string.h>
#include <string>
#include <unordered_map>
class InternedStringContext;
/**
* Interned immutable string, used to reduce memory allocations when dealing
* with a lot of instances of the same string.
*/
class InternedString {
/** Buffer storing the contents of an interned string */
struct Buffer {
Buffer(const char* s, InternedStringContext* owner)
: refCount(1), payload(s), owner(owner) {}
size_t refCount;
std::string payload;
InternedStringContext* owner;
};
/** Internal Buffer, nullptr, for empty strings */
Buffer* _buffer;
/**
* Initialized InternedString from buffer
* Note, buffers are always allocated by instances of InternedStringContext.
*/
explicit InternedString(Buffer* buffer)
: _buffer(buffer) {}
/** Release current buffer, decrementing refCount and freeing it if needed */
void releaseBuffer();
/** Empty string to return when _buffer is null */
static const char* _emptyString;
public:
/** Initialized empty InternedString */
InternedString()
: _buffer(nullptr) {}
/** Copy-construct InternedString */
InternedString(const InternedString& s) {
_buffer = s._buffer;
if(_buffer) {
_buffer->refCount++;
}
}
/** Assignment operator */
InternedString& operator= (const InternedString& s) {
releaseBuffer();
_buffer = s._buffer;
if(_buffer) {
_buffer->refCount++;
}
}
/** Compare two InternedStrings */
bool operator==(const InternedString& s) const {
if(_buffer && s._buffer) {
if(_buffer->owner != s._buffer->owner || !_buffer->owner) {
return _buffer->payload == s._buffer->payload;
}
}
return _buffer == s._buffer;
}
/** Compare to C string */
bool operator==(const char* s) const {
if(!_buffer) {
return *s == '\0';
}
return _buffer->payload == s;
}
/** Compare to std::string */
bool operator==(const std::string& s) const {
if(!_buffer) {
return s.empty();
}
return _buffer->payload == s;
}
/** Compare strings */
bool operator<(const char* s) const {
if(!_buffer) {
return *s != '\0';
}
return _buffer->payload < s;
}
/** Get string as const char* */
const char* data() const {
if (_buffer)
return _buffer->payload.data();
return _emptyString;
}
/** Write to FILE */
void output(FILE* f) {
if(_buffer) {
fputs(_buffer->payload.data(), f);
}
}
/** Check if this is an empty string */
bool empty() {
return _buffer;
}
/** Destroy and deref InternedScript */
~InternedString() {
releaseBuffer();
}
friend class InternedStringContext;
};
/** Context that owns a collection of interned strings */
class InternedStringContext {
/** Hash for C strings */
struct StrHash {
size_t operator()(const char* s) const {
size_t hash = 0;
while(*s != '\0') {
hash = (hash << 6) ^ *(s++);
}
return hash;
}
};
/** Comparison operator for C strings */
struct StrCmp {
bool operator()(const char* s1, const char* s2) const {
return strcmp(s1, s2) == 0;
}
};
/** Buffer Cache type */
typedef std::unordered_map<const char*, InternedString::Buffer*, StrHash, StrCmp>
BufferCache;
/** Buffer Cache */
BufferCache _cache;
public:
/** Create new interned string from C string */
InternedString createString(const char* s);
/** Create new interned string from const char* and string length */
InternedString createString(const char* s, size_t n);
/** Create new interned string from std::string */
InternedString createString(const std::string& s) {
return createString(s.data());
}
/** Free InternedStringContext and freeing buffers when they are deleted */
~InternedStringContext();
friend class InternedString;
};
#endif // INTERNED_STRING_H

60
src/cache/MeasureFile.cpp поставляемый Normal file
Просмотреть файл

@ -0,0 +1,60 @@
#include "MeasureFile.h"
#include "Aggregate.h"
#include <stdio.h>
using namespace std;
using namespace rapidjson;
InternedStringContext MeasureFile::_filterStringCtx;
void MeasureFile::mergeJSON(Value& blob) {
// For each member
for (auto it = blob.MemberBegin(); it != blob.MemberEnd(); ++it) {
// First find filter path
const char* filterPath = it->name.GetString();
// Check that we have an object
if (!it->value.IsObject()) {
printf("Value of filterPath: %s is not an object\n", filterPath);
continue;
}
// Find PathNode
PathNode<Aggregate>* n = _filterRoot.find(filterPath, _filterStringCtx);
if (!n->target()) {
n->setTarget(new Aggregate());
}
n->target()->mergeJSON(it->value);
#if FIRST_DUMP_ONLY
break;
#endif
}
}
/** Output to file */
void MeasureFile::output(FILE* f, PathNode<MeasureFile>* owner) {
owner->output(f);
fputs("\t{", f);
Aggregate::OutputContext ctx;
ctx.file = f;
ctx.comma = false;
_filterRoot.outputTargetTree(ctx);
fputs("}\n", f);
}
void MeasureFile::output(FILE* f, const std::string& filePath) {
fputs(filePath.data(), f);
fputs("\t{", f);
Aggregate::OutputContext ctx;
ctx.file = f;
ctx.comma = false;
_filterRoot.outputTargetTree(ctx);
fputs("}\n", f);
}

32
src/cache/MeasureFile.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,32 @@
#ifndef MEASUREFILE_H
#define MEASUREFILE_H
#include "PathNode.h"
#include "InternedString.h"
#include "rapidjson/document.h"
#include <stdio.h>
class Aggregate;
/**
* In-memory representation of the aggregated data stored in a single JSON file.
* This is called an MeasureFile as there may be multiple of these files for a
* given measure under different channel, product, version and by-date.
*/
class MeasureFile {
PathNode<Aggregate> _filterRoot;
static InternedStringContext _filterStringCtx;
public:
/** Merge with JSON from file */
void mergeJSON(rapidjson::Value& blob);
/** Output to file */
void output(FILE* f, PathNode<MeasureFile>* owner);
/** Output to file */
void output(FILE* f, const std::string& filePath);
};
#endif // MEASUREFILE_H

126
src/cache/PathNode.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,126 @@
#ifndef PATHNODE_H
#define PATHNODE_H
#include <stdio.h>
#include <string>
#include <vector>
#include "InternedString.h"
/** Node on a path separated by slashes */
template<typename Target>
class PathNode {
InternedString _value;
PathNode<Target>* _parent;
std::vector<PathNode<Target>*> _children;
Target* _target;
/** Create internal PathNode */
PathNode(const InternedString& value, PathNode* parent)
: _value(value), _parent(parent), _target(nullptr) {}
public:
/** Create new root PathNode */
PathNode()
: _value(), _parent(nullptr), _target(nullptr) {}
/** Delete node, children, targets and remove from parent if any */
~PathNode() {
// Delete target
if (_target) {
delete _target;
_target = nullptr;
}
// Delete children
for (auto child : _children) {
child->_parent = nullptr;
delete child;
}
// Remove from parent if we still have one
if (_parent) {
std::vector<PathNode<Target>*>& siblings = _parent->_children;
for(auto it = siblings.begin(); it != siblings.end(); it++) {
if(*it == this) {
siblings.erase(it);
break;
}
}
}
}
/** Set a target object, this will be deleted with this node */
void setTarget(Target* target) {
_target = target;
}
/** Get current target, nullptr if none is set */
Target* target() const {
return _target;
}
/** Write Path to file */
void output(FILE* f) {
if(_parent && _parent->_parent) {
_parent->output(f);
fputc('/', f);
}
_value.output(f);
}
/** Invoke output(ctx, parent) on the entire target tree */
template<typename Context>
void outputTargetTree(Context& ctx) {
// If we have a target, output it
if(_target) {
_target->output(ctx, this);
}
// Call recursively on children
for(auto child : _children) {
child->outputTargetTree(ctx);
}
}
/** Find/create PathNode under current node */
PathNode* find(const char* path, InternedStringContext& ctx) {
const char* reminder = path;
while(*reminder != '/' && *reminder != '\0'){
reminder++;
}
size_t n = reminder - path;
// Find child node
PathNode<Target>* next = nullptr;
for(auto child : _children) {
if(strncmp(child->_value.data(), path, n) == 0) {
next = child;
break;
}
}
// Create new child node if we need a new one
if(!next) {
// Create interned string with const cast hack to avoid copying the string
char tmp = *reminder;
*(const_cast<char*>(reminder)) = '\0';
InternedString name(ctx.createString(path));
*(const_cast<char*>(reminder)) = tmp;
// Create next node
next = new PathNode(name, this);
_children.push_back(next);
}
// If there is more path to lookup we that from next
if(*reminder != '\0') {
return next->find(reminder + 1, ctx);
}
// Otherwise return next
return next;
}
};
#endif // PATHNODE_H

56
src/cache/ResultSet.cpp поставляемый Normal file
Просмотреть файл

@ -0,0 +1,56 @@
#include "ResultSet.h"
#include "MeasureFile.h"
#include "Aggregate.h"
#include "rapidjson/document.h"
#include <stdio.h>
#include <string>
using namespace std;
using namespace rapidjson;
void ResultSet::mergeStream(istream& stream) {
// Read input line by line
string line;
int nb_line = 0;
while (getline(stream, line)) {
nb_line++;
// Find delimiter
size_t del = line.find('\t');
if (del == string::npos) {
fprintf(stderr, "No tab on line %i\n", nb_line);
continue;
}
// Find filePath
string filePath = line.substr(0, del);
// Parse JSON document
Document d;
d.Parse<0>(line.data() + del + 1);
// Check that we have an object
if (!d.IsObject()) {
fprintf(stderr, "JSON root is not an object on line %i\n", nb_line);
continue;
}
// Find blob and merge with it
PathNode<MeasureFile>* n = _fileRoot.find(filePath.data(), _pathStringCtx);
if(!n->target()) {
n->setTarget(new MeasureFile());
}
n->target()->mergeJSON(d);
}
}
void ResultSet::aggregate(const char* filename) {
}
void ResultSet::output(FILE* f) {
_fileRoot.outputTargetTree(f);
}

26
src/cache/ResultSet.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,26 @@
#ifndef RESULTSET_H
#define RESULTSET_H
#include "PathNode.h"
#include "InternedString.h"
#include <iostream>
class MeasureFile;
/** A collection of results for various measures files */
class ResultSet {
PathNode<MeasureFile> _fileRoot;
InternedStringContext _pathStringCtx;
public:
/** Merge a result-set file into this ResultSet */
void mergeStream(std::istream& stream);
/** Output result-set to file */
void output(FILE* f);
/** Decompress and aggregated file */
void aggregate(const char* filename);
};
#endif // RESULTSET_H

3
tests/CMakeLists.txt Normal file
Просмотреть файл

@ -0,0 +1,3 @@
add_subdirectory(merge-test)
#add_subdirectory(decompress)

Просмотреть файл

@ -0,0 +1,15 @@
file(COPY a-b-c-def-g.txt.xz a-b-c-def-g.txt.lzma DESTINATION .)
set(COMPRESSEDFILEREADERTEST_SRC
CompressedFileReaderTest.cpp
../../src/CompressedFileReader.cpp
)
include_directories(${LIBLZMA_INCLUDE_DIRS})
add_executable(compressedfilereadertest ${COMPRESSEDFILEREADERTEST_SRC})
target_link_libraries(compressedfilereadertest ${LIBLZMA_LIBRARIES})
add_test(
NAME CompressedFileReaderTest
COMMAND ./compressedfilereadertest
)

Просмотреть файл

@ -0,0 +1,89 @@
#include "../../src/CompressedFileReader.h"
#include <string.h>
#include <stdio.h>
#include <assert.h>
int main(int argc, char* argv[]) {
// Test XZ reading
{
FILE* input = fopen("a-b-c-def-g.txt.xz", "r");
CompressedFileReader reader(input);
char* line;
// 1. line: "a"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "a") == 0);
// 2. line: "b"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "b") == 0);
// 3. line: "c"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "c") == 0);
// 4. line: "def"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "def") == 0);
// 5. line: "g"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "g") == 0);
// End of input
line = reader.nextLine();
assert(!line);
fclose(input);
}
// Test lzma reading (legacy only)
{
FILE* input = fopen("a-b-c-def-g.txt.lzma", "r");
CompressedFileReader reader(input);
char* line;
// 1. line: "a"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "a") == 0);
// 2. line: "b"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "b") == 0);
// 3. line: "c"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "c") == 0);
// 4. line: "def"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "def") == 0);
// 5. line: "g"
line = reader.nextLine();
assert(line);
assert(strcmp(line, "g") == 0);
// End of input
line = reader.nextLine();
assert(!line);
fclose(input);
}
return 0;
}

Просмотреть файл

@ -0,0 +1,5 @@
a
b
c
def
g

Двоичные данные
tests/decompress/a-b-c-def-g.txt.lzma Normal file

Двоичный файл не отображается.

Двоичные данные
tests/decompress/a-b-c-def-g.txt.xz Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,11 @@
file(COPY result-1.txt result-2.txt DESTINATION .)
add_test(
NAME merge-test-with-options
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/with-options.sh"
)
add_test(
NAME merge-test-with-pipes
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/with-pipes.sh"
)

Просмотреть файл

@ -0,0 +1 @@
aurora/24/CYCLE_COLLECTOR/by-submission-date {"20130805/idle_daily/Fennec/Android/16/arm":{"values": [5, 10, 5, -1, -1, -1, -1, -1, 3],"buildId": "20130805004006","revision": "http://hg.mozilla.org/releases/mozilla-aurora/rev/4ea223de889c"}}

Просмотреть файл

@ -0,0 +1 @@
aurora/24/CYCLE_COLLECTOR/by-submission-date {"20130805/idle_daily/Fennec/Android/16/arm":{"values": [10, 5, 10, -1, -1, -1, -1, -1, 1],"buildId": "20130805004006","revision": "http://hg.mozilla.org/releases/mozilla-aurora/rev/4ea223de889c"}}

Просмотреть файл

@ -0,0 +1,12 @@
#!/bin/bash -e
echo "got here"
# Merge test files
../../mergeresults -i result-1.txt -i result-2.txt -o output.txt;
# Check number of lines
test `cat output.txt | wc -l` -eq 1;
# Check that we have 15 in there
test `cat output.txt | grep 15 | wc -l` -eq 1;

10
tests/merge-test/with-pipes.sh Executable file
Просмотреть файл

@ -0,0 +1,10 @@
#!/bin/bash -e
# Merge test files
cat result-1.txt result-2.txt | ../../mergeresults > output.txt;
# Check number of lines
test `cat output.txt | wc -l` -eq 1;
# Check that we have 15 in there
test `cat output.txt | grep 15 | wc -l` -eq 1;