зеркало из https://github.com/microsoft/mwt-ds.git
added support for transforming marginal features via json
This commit is contained in:
Родитель
c1ef154019
Коммит
9b0d00c13a
|
@ -123,6 +123,7 @@
|
|||
<Compile Include="AzureBlobDownloader.cs" />
|
||||
<Compile Include="FileTransformBlock.cs" />
|
||||
<Compile Include="MetricResult.cs" />
|
||||
<Compile Include="TupleList.cs" />
|
||||
<Compile Include="Util.cs" />
|
||||
<Compile Include="VowpalWabbitJsonToString.cs" />
|
||||
<Compile Include="JsonTransform.cs" />
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
|
@ -27,6 +28,33 @@ namespace Experimentation
|
|||
});
|
||||
}
|
||||
|
||||
public static void TransformFixMarginal(string fileOut, int numExpectedActions, char startingNamespace, TupleList<string, string> marginalProperties)
|
||||
{
|
||||
var serializer = JsonSerializer.CreateDefault();
|
||||
JsonTransform.Transform(fileOut, fileOut + ".fixed", (reader, writer) =>
|
||||
{
|
||||
var obj = (JObject)serializer.Deserialize(reader);
|
||||
var multi = (JArray)obj.SelectToken("$._multi");
|
||||
if (multi.Count == numExpectedActions)
|
||||
{
|
||||
foreach (var item in multi)
|
||||
{
|
||||
for (int i = 0; i < marginalProperties.Count; i++)
|
||||
{
|
||||
var parentNodeName = marginalProperties[i].Item1;
|
||||
var childNodeName = marginalProperties[i].Item2;
|
||||
var parentNode = (JObject)item[parentNodeName];
|
||||
var propertyValue = parentNode.SelectToken(childNodeName).Value<string>();
|
||||
parentNode.Add($"{(char)(startingNamespace + i)}{childNodeName}", JToken.FromObject(new { c = "onstant", id = propertyValue }));
|
||||
}
|
||||
}
|
||||
serializer.Serialize(writer, obj);
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
public static void Transform(string fileIn, string fileOut, Func<JsonTextReader, JsonTextWriter, bool> transform)
|
||||
{
|
||||
using (var reader = new StreamReader(fileIn, Encoding.UTF8))
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Experimentation
|
||||
{
|
||||
public class TupleList<T1, T2> : List<Tuple<T1, T2>>
|
||||
{
|
||||
public void Add(T1 item, T2 item2)
|
||||
{
|
||||
Add(new Tuple<T1, T2>(item, item2));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -48,16 +48,16 @@ namespace ExperimentationConsole
|
|||
|
||||
outputFile += ".small";
|
||||
// filter broken events
|
||||
JsonTransform.Transform(outputFile, outputFile + ".fixed", (reader, writer) =>
|
||||
{
|
||||
var serializer = JsonSerializer.CreateDefault();
|
||||
var obj = (JObject)serializer.Deserialize(reader);
|
||||
var multi = (JArray)obj.SelectToken("$._multi");
|
||||
if (multi.Count == 10)
|
||||
serializer.Serialize(writer, obj);
|
||||
JsonTransform.TransformFixMarginal(outputFile,
|
||||
numExpectedActions: 10, // examples with different number of actions are ignored
|
||||
startingNamespace: 'G', // starting namespace of the marginal features, if more than one marginal features then the next letter is used, e.g. G for the first one, H for second, and so on.
|
||||
marginalProperties: new TupleList<string, string>
|
||||
{
|
||||
// The property parent and name to create marginal features for
|
||||
{ "DVideoFeatures", "VideoId" },
|
||||
//{ "DVideoFeatures", "VideoTitle" }, // uncomment if more marginal features are needed
|
||||
});
|
||||
|
||||
return true;
|
||||
});
|
||||
outputFile += ".fixed";
|
||||
|
||||
using (var reader = new StreamReader(outputFile))
|
||||
|
|
Загрузка…
Ссылка в новой задаче