[XlgDebugger] parallel map and filter evaluation (#1174)

* parallel map and filter evaluation
* Add 'Elems' to `uniq -c` result
* add -o for grep
* unit tests
* update documentation
This commit is contained in:
Aleksandar Milicevic 2019-11-07 10:42:32 -08:00 коммит произвёл GitHub
Родитель d45bc88b21
Коммит 40b47bc2f8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 209 добавлений и 71 удалений

Просмотреть файл

@ -325,12 +325,13 @@ For the most up-to-date list of library functions see [LibraryFunctions.cs](/Pub
| Function | Switches | Semantics |
| --- | --- | --- |
| `$sum` | | Converts every arg to number and computes their sum. Fails if any argument is not a number. |
| `$avg` | | Converts every arg to number and computes their average. Fails if any argument is not a number. |
| `$cut` | `-d <delim> -f <fld1>,...,<fldN>` | Similar to `/usr/bin/cut` |
| `$count` | | Flattens all arguments and returns their count. |
| `$uniq` | `-c` | Flattens all arguments and dedupes them. When `-c` is provided, the output contains the count of each returned value. |
| `$sort` | `-n -r` | Sorts the elements. `-n` implies numeric sorting, and `-r` sorting in descending order |
| `$uniq` | `-c -k <fld>` | Flattens all arguments and dedupes them. When `-k <fld>` is specified, elements are deduped by their `<fld>` property values. When `-c` is provided, the output contains the count of each returned value. |
| `$sort` | `-n -r -k <fld>` | Sorts the elements. `-n` implies numeric sorting, and `-r` sorting in descending order. If `-k <fld>` is specified, elements are sorted by their `<fld>` property values |
| `$join` | `-d <delim>` | Joins all elements by `delim`; when `delim` is not provided, platform-specific EOL is used |
| `$grep` | `-v` | The first argument is a pattern; from the rest of the arguments, selects those that [match](#Match-Operator) the pattern. `-v` implies inverse selection. |
| `$grep` | `-v -o -g <grp>` | The first argument is a pattern; from the rest of the arguments, selects those that [match](#Match-Operator) the pattern; when `-o` is specified, only the matched substring is printed (in this case `-g <grp>` specifies the name of the RegEx group whose value to print). `-v` implies inverse selection. |
| `$str` | | Concatenates all args into a string |
| `$head` | `-n <num>` | Flattens all args and takes first `num`. |
| `$tail` | `-n <num>` | Flattens all args and takes last `num`. |

Просмотреть файл

@ -80,8 +80,11 @@ namespace BuildXL.Execution.Analyzer.JPath
{
try
{
var regex = new Regex(context.Value.Text.Trim('/', '!'));
return new RegexLit(regex);
var regexStr =
context.Value.Text.StartsWith("/") ? context.Value.Text.Trim('/') :
context.Value.Text.StartsWith("!") ? context.Value.Text.Trim('!') :
context.Value.Text;
return new RegexLit(new Regex(regexStr));
}
catch (ArgumentException e)
{

Просмотреть файл

@ -82,7 +82,9 @@ namespace BuildXL.Execution.Analyzer.JPath
public bool Equals(Result other)
{
return other != null && m_identity.Equals(other.m_identity);
return other != null
&& Count == other.Count
&& Enumerable.Range(0, Count).All(idx => Value[idx] == other.Value[idx]);
}
public override bool Equals(object obj)
@ -546,13 +548,17 @@ namespace BuildXL.Execution.Analyzer.JPath
return TopEnv
.Current
.Where(obj => ToBool(InNewEnv(Result.Scalar(obj), filterExpr.Filter)))
.ToList();
.ToArray()
.AsParallel()
.Where(obj => ToBool(new Evaluator(TopEnv.WithCurrent(Result.Scalar(obj)), EnableCaching).Eval(filterExpr.Filter)))
.ToArray();
case MapExpr mapExpr:
var lhs = Eval(mapExpr.Lhs);
return lhs
.Select(obj => InNewEnv(Result.Scalar(obj), mapExpr.Sub))
.ToArray()
.AsParallel()
.Select(obj => new Evaluator(TopEnv.WithCurrent(Result.Scalar(obj)), EnableCaching).Eval(mapExpr.Sub))
.SelectMany(result => result) // automatically flatten
.ToArray();
@ -712,14 +718,36 @@ namespace BuildXL.Execution.Analyzer.JPath
/// <param name="lhsStr">String to check</param>
/// <param name="rhs">Must be a scalar string or regular expression</param>
public bool Matches(string lhsStr, Result rhs)
{
return !string.IsNullOrEmpty(Match(lhsStr, rhs));
}
/// <summary>
/// Returns a substring of <paramref name="lhsStr"/> matching <paramref name="rhs"/>.
/// </summary>
public string Match(string lhsStr, Result rhs, string groupName = null)
{
var rhsVal = ToScalar(rhs);
return rhsVal switch
{
string str => lhsStr.ToUpperInvariant().Contains(str.ToUpperInvariant()),
Regex regex => regex.Match(lhsStr).Success,
string str => substr(lhsStr, lhsStr.IndexOf(str, StringComparison.OrdinalIgnoreCase), str.Length),
Regex regex => match(regex.Match(lhsStr)),
_ => throw TypeError(rhsVal, "string | Regex")
};
string substr(string s, int index, int length)
{
return index >= 0
? s.Substring(index, length)
: string.Empty;
}
string match(Match m)
{
return m.Success
? groupName != null ? m.Groups[groupName].Value : m.Value
: string.Empty;
}
}
/// <summary>
@ -727,7 +755,14 @@ namespace BuildXL.Execution.Analyzer.JPath
///
/// Every object can be resolved to something, so this function never fails.
/// </summary>
public string PreviewObj(object obj) => Resolve(obj)?.Preview ?? obj?.ToString() ?? "<null>";
public string PreviewObj(object obj)
{
if (obj is Result r && r.IsScalar)
{
obj = r.First();
}
return Resolve(obj)?.Preview ?? obj?.ToString() ?? "<null>";
}
internal ObjectInfo Resolve(object obj) => TopEnv?.Resolver?.Invoke(obj);

Просмотреть файл

@ -10,6 +10,7 @@ using BuildXL.Execution.Analyzer.JPath;
using IEnum = System.Collections.IEnumerable;
using static BuildXL.Execution.Analyzer.JPath.Evaluator;
using BuildXL.FrontEnd.Script.Debugger;
namespace BuildXL.Execution.Analyzer
{
@ -18,9 +19,10 @@ namespace BuildXL.Execution.Analyzer
public static readonly Function SaveFunction = new Function(name: "save", minArity: 2, func: Save);
public static readonly Function AppendFunction = new Function(name: "append", minArity: 2, func: Append);
public static readonly IReadOnlyList<Function> All = new List<Function>
public static IReadOnlyList<Function> All { get; } = new List<Function>
{
new Function(name: "sum", minArity: 1, func: Sum),
new Function(name: "avg", minArity: 1, func: Avg),
new Function(name: "cut", minArity: 1, func: Cut),
new Function(name: "count", minArity: 1, func: Count),
new Function(name: "uniq", minArity: 1, func: Uniq),
@ -44,6 +46,14 @@ namespace BuildXL.Execution.Analyzer
.Sum();
}
private static Result Avg(Evaluator.Args args)
{
return (long)args
.Flatten()
.Select(obj => args.ToNumber(obj))
.Average();
}
private static Result Cut(Evaluator.Args args)
{
var separator = args.ToString(args.GetSwitch("d") ?? " \t\r\n");
@ -75,12 +85,30 @@ namespace BuildXL.Execution.Analyzer
private static Result Uniq(Evaluator.Args args)
{
var groups = args.Flatten().GroupBy(obj => args.Preview(obj));
var fieldToGroupBy = args.GetStrSwitch("k", null);
Func<object, object> keySelector = o => o;
if (!string.IsNullOrEmpty(fieldToGroupBy))
{
keySelector = o => args.Eval.Resolve(o).Properties.FirstOrDefault(p => p.Name == fieldToGroupBy)?.Value;
}
var aa = args.Flatten();
var groups = aa.GroupBy(obj => args.Preview(keySelector(obj)));
if (args.HasSwitch("c")) // count objects in each group
{
return groups
.Select(grp => $"{grp.Count()}\t{args.Preview(grp.First())}")
.Select(grp =>
{
return new ObjectInfo(
preview: $"{grp.Count()}: {grp.Key}",
properties: new[]
{
new Property(name: "Key", value: grp.Key),
new Property(name: "Count", value: grp.Count()),
new Property(name: "Elems", value: grp.ToArray())
});
})
.ToArray();
}
else
@ -95,9 +123,18 @@ namespace BuildXL.Execution.Analyzer
{
var objs = args.Flatten();
var ordered = args.HasSwitch("n") // numeric sorting (otherwise string sorting)
? objs.OrderBy(args.ToNumber)
: objs.OrderBy(args.Preview);
var fieldToSortBy = args.GetStrSwitch("k", null);
Func<object, object> keySelector = o => o;
if (!string.IsNullOrEmpty(fieldToSortBy))
{
keySelector = o => args.Eval.Resolve(o).Properties.FirstOrDefault(p => p.Name == fieldToSortBy)?.Value;
}
IComparer<object> comparer = args.HasSwitch("n")
? Comparer<object>.Create((lhs, rhs) => Comparer<long?>.Default.Compare(args.Eval.TryToNumber(lhs), args.Eval.TryToNumber(rhs)))
: Comparer<object>.Create((lhs, rhs) => Comparer<string>.Default.Compare(args.Preview(lhs), args.Preview(rhs)));
var ordered = objs.OrderBy(keySelector, comparer);
var finalOrder = args.HasSwitch("r") // reverse
? ordered.Reverse()
@ -176,10 +213,22 @@ namespace BuildXL.Execution.Analyzer
{
var pattern = args[0];
var flip = args.HasSwitch("v");
var printMatchOnly = args.HasSwitch("o");
var groupName = args.GetStrSwitch("g", defaultValue: "0");
return args
.Skip(1)
.SelectMany(result => result)
.Where(obj => flip ^ args.Matches(args.Preview(obj), pattern))
.Select(obj =>
{
var str = args.Preview(obj);
var match = args.Eval.Match(str, pattern, groupName);
var matches = !string.IsNullOrEmpty(match);
var shouldInclude = flip ^ matches;
return shouldInclude
? (flip || !printMatchOnly) ? str : match
: null;
})
.Where(str => str != null)
.ToArray();
}

Просмотреть файл

@ -9,6 +9,7 @@ using System.Threading.Tasks;
using BuildXL.Execution.Analyzer.JPath;
using BuildXL.FrontEnd.Script.Debugger;
using Newtonsoft.Json.Linq;
using Test.BuildXL.TestUtilities.Xunit;
using Xunit;
using Xunit.Abstractions;
@ -35,13 +36,19 @@ namespace Test.Tool.Analyzers
}
#pragma warning restore CS0649
private Evaluator.Env RootEnv { get; }
public JPathEvaluatorTests(ITestOutputHelper output)
: base(output)
{
RootEnv = new Evaluator.Env(
parent: null,
current: Evaluator.Result.Scalar(new Val()),
resolver: Resolver,
vars: global::BuildXL.Execution.Analyzer.LibraryFunctions.All.ToDictionary(
func => "$" + func.Name, func => Evaluator.Result.Scalar(func)));
}
private const string Env1 = "Curr: 1";
[Theory]
// root expr
[InlineData("$.N", "{Curr: {N: 1}}", "[1]")]
@ -121,13 +128,55 @@ namespace Test.Tool.Analyzers
{
var env = Convert(JsonDeserialize<Env>(envStr));
var evaluator = new Evaluator(env, enableCaching: false);
EvaluateAndAssertResult(evaluator, exprStr, expectedResultJson);
}
[Theory]
// grep tests
[InlineData("$grep('A', 'ab' ++ 'cd')", "['ab']")]
[InlineData("$grep -o ('A', 'ab' ++ 'cd')", "['a']")]
[InlineData("$grep(/.$/, 'ab' ++ 'cd')", "['ab', 'cd']")]
[InlineData("$grep -o (/.$/, 'ab' ++ 'cd')", "['b', 'd']")]
[InlineData("$grep -o -g 'G' (/(?<G>.).$/, 'ab' ++ 'cd')", "['a', 'c']")]
// grep -v
[InlineData("$grep -v ('A', 'ab' ++ 'cd')", "['cd']")]
[InlineData("$grep -v -o ('A', 'ab' ++ 'cd')", "['cd']")]
[InlineData("$grep -v (/.$/, 'ab' ++ 'cd')", "[]")]
[InlineData("$grep -v -o (/.$/, 'ab' ++ 'cd')", "[]")]
[InlineData("$grep -v -o -g 'G' (/(?<G>.).$/, 'ab' ++ 'cd')", "[]")]
// sort numeric
[InlineData("(111 ++ 3 ++ 22) | $sort -n", "[3, 22, 111]")]
[InlineData("(111 ++ 3 ++ 22) | $sort -n -r", "[111, 22, 3]")]
// sort as string
[InlineData("(111 ++ 3 ++ 22) | $sort", "[111, 22, 3]")]
[InlineData("(111 ++ 3 ++ 22) | $sort -r", "[3, 22, 111]")]
// sort by field
[InlineData("({a: 1, b: 2} ++ {a: 2, b: 1}) | $sort -n -k 'a'", "[{a: 1, b: 2}, {a: 2, b: 1}]")]
[InlineData("({a: 1, b: 2} ++ {a: 2, b: 1}) | $sort -n -k 'b'", "[{a: 2, b: 1}, {a: 1, b: 2}]")]
// uniq
[InlineData("(1 ++ 2 ++ 1) | $uniq", "[1, 2]")]
[InlineData("(1 ++ 2 ++ 1) | $uniq -c", "[{Key: '1', Count: 2, Elems: [1, 1]}, {Key: '2', Count: 1, Elems: [2]}]")]
// uniq by field
[InlineData("({a: 1} ++ {a: 2} ++ {a: 3, b: 2}) | $uniq -k 'a' | $count", "[3]")]
[InlineData("({a: 1} ++ {a: 2} ++ {a: 3, b: 2}) | $uniq -k 'b' | $count", "[2]")]
[InlineData("({a: 1} ++ {a: 2} ++ {a: 3, b: 2}) | $uniq -k 'c' | $count", "[1]")]
// uniq + sort
[InlineData("(('a' ++ 'b' ++ 'a') | $uniq -c | $sort -n -r -k 'Count').($str(Count, ': ', Key))", "['2: a', '1: b']")]
public void TestLibraryFunc(string exprStr, string expectedResultJson)
{
var evaluator = new Evaluator(RootEnv, enableCaching: false);
EvaluateAndAssertResult(evaluator, exprStr, expectedResultJson);
}
private void EvaluateAndAssertResult(Evaluator evaluator, string exprStr, string expectedResultJson)
{
var maybeResult = JPath.TryParse(exprStr).Then(expr => JPath.TryEval(evaluator, expr));
XAssert.IsTrue(maybeResult.Succeeded);
var result = maybeResult.Result;
var j1 = Newtonsoft.Json.Linq.JArray.Parse(expectedResultJson);
var j2 = Newtonsoft.Json.Linq.JArray.Parse(JsonSerialize(result.ToArray()));
XAssert.AreEqual(j1.ToString(), j2.ToString());
var j1 = JArray.Parse(expectedResultJson);
var j2 = JArray.Parse(JsonSerialize(result.ToArray()));
XAssert.IsTrue(JToken.DeepEquals(j1, j2), "Expected: {0}, Actual: {1}", j1, j2);
}
private Evaluator.Env CreateEnv(object current, Evaluator.Env parent = null)
@ -141,6 +190,7 @@ namespace Test.Tool.Analyzers
{
int i => new ObjectInfo(preview: i.ToString(), original: i),
string str => new ObjectInfo(preview: str, original: str),
ObjectInfo oi => oi,
_ => Renderer.GenericObjectInfo(obj)
};
}
@ -171,7 +221,7 @@ namespace Test.Tool.Analyzers
}
}
private static Evaluator.Env Convert(Env env)
private Evaluator.Env Convert(Env env)
{
return env == null
? null