added support for transforming marginal features via json

This commit is contained in:
Luong Hoang 2016-08-20 15:38:12 -04:00
Родитель c1ef154019
Коммит 9b0d00c13a
4 изменённых файлов: 51 добавлений и 9 удалений

Просмотреть файл

@ -123,6 +123,7 @@
<Compile Include="AzureBlobDownloader.cs" />
<Compile Include="FileTransformBlock.cs" />
<Compile Include="MetricResult.cs" />
<Compile Include="TupleList.cs" />
<Compile Include="Util.cs" />
<Compile Include="VowpalWabbitJsonToString.cs" />
<Compile Include="JsonTransform.cs" />

Просмотреть файл

@ -1,4 +1,5 @@
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.IO;
@ -27,6 +28,33 @@ namespace Experimentation
});
}
public static void TransformFixMarginal(string fileOut, int numExpectedActions, char startingNamespace, TupleList<string, string> marginalProperties)
{
var serializer = JsonSerializer.CreateDefault();
JsonTransform.Transform(fileOut, fileOut + ".fixed", (reader, writer) =>
{
var obj = (JObject)serializer.Deserialize(reader);
var multi = (JArray)obj.SelectToken("$._multi");
if (multi.Count == numExpectedActions)
{
foreach (var item in multi)
{
for (int i = 0; i < marginalProperties.Count; i++)
{
var parentNodeName = marginalProperties[i].Item1;
var childNodeName = marginalProperties[i].Item2;
var parentNode = (JObject)item[parentNodeName];
var propertyValue = parentNode.SelectToken(childNodeName).Value<string>();
parentNode.Add($"{(char)(startingNamespace + i)}{childNodeName}", JToken.FromObject(new { c = "onstant", id = propertyValue }));
}
}
serializer.Serialize(writer, obj);
}
return true;
});
}
public static void Transform(string fileIn, string fileOut, Func<JsonTextReader, JsonTextWriter, bool> transform)
{
using (var reader = new StreamReader(fileIn, Encoding.UTF8))

Просмотреть файл

@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
namespace Experimentation
{
public class TupleList<T1, T2> : List<Tuple<T1, T2>>
{
public void Add(T1 item, T2 item2)
{
Add(new Tuple<T1, T2>(item, item2));
}
}
}

Просмотреть файл

@ -48,16 +48,16 @@ namespace ExperimentationConsole
outputFile += ".small";
// filter broken events
JsonTransform.Transform(outputFile, outputFile + ".fixed", (reader, writer) =>
JsonTransform.TransformFixMarginal(outputFile,
numExpectedActions: 10, // examples with different number of actions are ignored
startingNamespace: 'G', // starting namespace of the marginal features, if more than one marginal features then the next letter is used, e.g. G for the first one, H for second, and so on.
marginalProperties: new TupleList<string, string>
{
var serializer = JsonSerializer.CreateDefault();
var obj = (JObject)serializer.Deserialize(reader);
var multi = (JArray)obj.SelectToken("$._multi");
if (multi.Count == 10)
serializer.Serialize(writer, obj);
return true;
// The property parent and name to create marginal features for
{ "DVideoFeatures", "VideoId" },
//{ "DVideoFeatures", "VideoTitle" }, // uncomment if more marginal features are needed
});
outputFile += ".fixed";
using (var reader = new StreamReader(outputFile))