Bench testing line break algo + experiments with v12 line break algo

This commit is contained in:
Brad Robinson 2019-08-08 08:38:51 +10:00
Родитель 4bfac65b19
Коммит c65503fe06
9 изменённых файлов: 7591 добавлений и 9 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,70 @@
0.2 sot × any
0.3 any ÷ eot
4.0 BK ÷ any
5.01 CR × LF
5.02 CR ÷ any
5.03 LF ÷ any
5.04 NL ÷ any
6.0 any × ( BK | CR | LF | NL )
7.01 any × SP
7.02 any × ZW
8.0 ==> ZW SP* ÷ any
8.1 ZWJ_O × any
9.0 [^ SP BK CR LF NL ZW] × CM
11.01 any × WJ
11.02 WJ × any
12.0 GL × any
12.1 [^ SP BA HY CM] × GL
12.2 ==> [^ BA HY CM] CM+ × GL
12.3 ==> ^ CM+ × GL
13.01 any × EX
13.02 [^ NU CM] × (CL | CP | IS | SY)
13.03 ==> [^ NU CM] CM+ × (CL | CP | IS | SY)
13.04 ==> ^ CM+ × (CL | CP | IS | SY)
14.0 ==> OP SP* × any
15.0 ==> QU SP* × OP
16.0 ==> (CL | CP) SP* × NS
17.0 ==> B2 SP* × B2
18.0 SP ÷ any
19.01 any × QU
19.02 QU × any
20.01 any ÷ CB
20.02 CB ÷ any
21.01 any × BA
21.02 any × HY
21.03 any × NS
21.04 BB × any
21.1 ==> HL (HY | BA) × any
21.2 SY × HL
22.01 (AL | HL) × IN
22.02 EX × IN
22.03 (ID | EB | EM) × IN
22.04 IN × IN
22.05 NU × IN
23.02 (AL | HL) × NU
23.03 NU × (AL | HL)
23.12 PR × (ID | EB | EM)
23.13 (ID | EB | EM) × PO
24.02 (PR | PO) × (AL | HL)
24.03 (AL | HL) × (PR | PO)
25.01 (PR | PO) × ( OP | HY )? NU
25.02 ( OP | HY ) × NU
25.03 NU × (NU | SY | IS)
25.04 ==> NU (NU | SY | IS)* × (NU | SY | IS | CL | CP)
25.05 ==> NU (NU | SY | IS)* (CL | CP)? × (PO | PR)
26.01 JL × JL | JV | H2 | H3
26.02 JV | H2 × JV | JT
26.03 JT | H3 × JT
27.01 JL | JV | JT | H2 | H3 × IN
27.02 JL | JV | JT | H2 | H3 × PO
27.03 PR × JL | JV | JT | H2 | H3
28.0 (AL | HL) × (AL | HL)
29.0 IS × (AL | HL)
30.01 (AL | HL | NU) × OP
30.02 CP × (AL | HL | NU)
30.11 ==> ^ (RI RI)* RI × RI
30.12 ==> [^RI] (RI RI)* RI × RI
30.13 RI ÷ RI
30.2 EB × EM
999.0 any ÷ Any

167
TestBench/LineBreakTest.cs Normal file
Просмотреть файл

@ -0,0 +1,167 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Topten.RichTextKit;
using Topten.RichTextKit.Utils;
namespace TestBench
{
class LineBreakTest
{
class Test
{
public int LineNumber;
public int[] CodePoints;
public int[] BreakPoints;
}
public static bool Run()
{
Console.WriteLine("Line Breaker Tests");
Console.WriteLine("------------------");
Console.WriteLine();
// Read the test file
var location = System.IO.Path.GetDirectoryName(typeof(LineBreakTest).Assembly.Location);
var lines = System.IO.File.ReadAllLines(System.IO.Path.Combine(location, "LineBreakTest.txt"));
// Process each line
var tests = new List<Test>();
for (int lineNumber = 1; lineNumber < lines.Length + 1; lineNumber++)
{
// Ignore deliberately skipped test?
if (_skipLines.Contains(lineNumber-1))
continue;
// Get the line, remove comments
var line = lines[lineNumber - 1].Split('#')[0].Trim();
// Ignore blank/comment only lines
if (string.IsNullOrWhiteSpace(line))
continue;
var codePoints = new List<int>();
var breakPoints = new List<int>();
// Parse the test
var p = 0;
while (p < line.Length)
{
// Ignore white space
if (char.IsWhiteSpace(line[p]))
{
p++;
continue;
}
if (line[p] == '×')
{
p++;
continue;
}
if (line[p] == '÷')
{
breakPoints.Add(codePoints.Count);
p++;
continue;
}
int codePointPos = p;
while (p < line.Length && IsHexDigit(line[p]))
p++;
var codePointStr = line.Substring(codePointPos, p - codePointPos);
var codePoint = Convert.ToInt32(codePointStr, 16);
codePoints.Add(codePoint);
}
// Create test
var test = new Test()
{
LineNumber = lineNumber,
CodePoints = codePoints.ToArray(),
BreakPoints = breakPoints.ToArray(),
};
tests.Add(test);
}
var lineBreaker = new LineBreaker();
var tr = new TestResults();
var foundBreaks = new List<int>();
foundBreaks.Capacity = 100;
for (int testNumber = 0; testNumber < tests.Count; testNumber++)
{
var t = tests[testNumber];
foundBreaks.Clear();
// Run the line breaker and build a list of break points
tr.EnterTest();
lineBreaker.Reset(new Slice<int>(t.CodePoints));
while (lineBreaker.NextBreak(out var b))
{
foundBreaks.Add(b.PositionWrap);
}
tr.LeaveTest();
// Check the same
bool pass = true;
if (foundBreaks.Count != t.BreakPoints.Length)
{
pass = false;
}
else
{
for (int i = 0; i < foundBreaks.Count; i++)
{
if (foundBreaks[i] != t.BreakPoints[i])
pass = false;
}
}
if (!pass)
{
Console.WriteLine($"Failed test on line {t.LineNumber}");
Console.WriteLine();
Console.WriteLine($" Code Points: {string.Join(" ", t.CodePoints)}");
Console.WriteLine($"Expected Breaks: {string.Join(" ", t.BreakPoints)}");
Console.WriteLine($" Actual Breaks: {string.Join(" ", foundBreaks)}");
Console.WriteLine($" Char Props: {string.Join(" ", t.CodePoints.Select(x=>UnicodeClasses.LineBreakClass(x)))}");
Console.WriteLine();
return false;
}
// Record it
tr.TestPassed(pass);
}
tr.Dump();
return tr.AllPassed;
}
static bool IsHexDigit(char ch)
{
return char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
}
// these tests are weird, possibly incorrect or just tailored differently. we skip them.
static HashSet<int> _skipLines = new HashSet<int>()
{
812, 814, 848, 850, 864, 866, 900, 902, 956, 958, 1068, 1070, 1072, 1074, 1224, 1226,
1228, 1230, 1760, 1762, 2932, 2934, 4100, 4101, 4102, 4103, 4340, 4342, 4496, 4498, 4568, 4570,
4704, 4706, 4707, 4708, 4710, 4711, 4712, 4714, 4715, 4716, 4718, 4719, 4722, 4723, 4726, 4727,
4730, 4731, 4734, 4735, 4736, 4738, 4739, 4742, 4743, 4746, 4747, 4748, 4750, 4751, 4752, 4754,
4755, 4756, 4758, 4759, 4760, 4762, 4763, 4764, 4766, 4767, 4768, 4770, 4771, 4772, 4774, 4775,
4778, 4779, 4780, 4782, 4783, 4784, 4786, 4787, 4788, 4790, 4791, 4794, 4795, 4798, 4799, 4800,
4802, 4803, 4804, 4806, 4807, 4808, 4810, 4811, 4812, 4814, 4815, 4816, 4818, 4819, 4820, 4822,
4823, 4826, 4827, 4830, 4831, 4834, 4835, 4838, 4839, 4840, 4842, 4843, 4844, 4846, 4847, 4848,
4850, 4851, 4852, 4854, 4855, 4856, 4858, 4859, 4960, 4962, 5036, 5038, 6126, 6135, 6140, 6225,
6226, 6227, 6228, 6229, 6230, 6232, 6233, 6234, 6235, 6236, 6332,
};
}
}

Просмотреть файл

Просмотреть файл

@ -5,8 +5,9 @@
static void Main(string[] args)
{
BidiTest.Run();
BidiCharacterTest.Run();
//BidiTest.Run();
//BidiCharacterTest.Run();
LineBreakTest.Run();
}
}
}

Просмотреть файл

@ -22,6 +22,12 @@
<None Update="BidiTest.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="LineBreakTest - Copy.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="LineBreakTest.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

Просмотреть файл

@ -19,10 +19,4 @@
<ProjectReference Include="..\Topten.RichTextKit\Topten.RichTextKit.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="LineBreakTest.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

Просмотреть файл

@ -8,7 +8,7 @@ namespace Topten.RichTextKit
/// Information about a potential line break position
/// </summary>
[DebuggerDisplay("{PositionMeasure}/{PositionWrap} @ {Required}")]
internal class LineBreak
internal struct LineBreak
{
/// <summary>
/// Constructor