Bench testing line break algo + experiments with v12 line break algo
This commit is contained in:
Родитель
4bfac65b19
Коммит
c65503fe06
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,70 @@
|
|||
|
||||
0.2 sot × any
|
||||
0.3 any ÷ eot
|
||||
4.0 BK ÷ any
|
||||
5.01 CR × LF
|
||||
5.02 CR ÷ any
|
||||
5.03 LF ÷ any
|
||||
5.04 NL ÷ any
|
||||
6.0 any × ( BK | CR | LF | NL )
|
||||
7.01 any × SP
|
||||
7.02 any × ZW
|
||||
8.0 ==> ZW SP* ÷ any
|
||||
8.1 ZWJ_O × any
|
||||
9.0 [^ SP BK CR LF NL ZW] × CM
|
||||
11.01 any × WJ
|
||||
11.02 WJ × any
|
||||
12.0 GL × any
|
||||
12.1 [^ SP BA HY CM] × GL
|
||||
12.2 ==> [^ BA HY CM] CM+ × GL
|
||||
12.3 ==> ^ CM+ × GL
|
||||
13.01 any × EX
|
||||
13.02 [^ NU CM] × (CL | CP | IS | SY)
|
||||
13.03 ==> [^ NU CM] CM+ × (CL | CP | IS | SY)
|
||||
13.04 ==> ^ CM+ × (CL | CP | IS | SY)
|
||||
14.0 ==> OP SP* × any
|
||||
15.0 ==> QU SP* × OP
|
||||
16.0 ==> (CL | CP) SP* × NS
|
||||
17.0 ==> B2 SP* × B2
|
||||
18.0 SP ÷ any
|
||||
19.01 any × QU
|
||||
19.02 QU × any
|
||||
20.01 any ÷ CB
|
||||
20.02 CB ÷ any
|
||||
21.01 any × BA
|
||||
21.02 any × HY
|
||||
21.03 any × NS
|
||||
21.04 BB × any
|
||||
21.1 ==> HL (HY | BA) × any
|
||||
21.2 SY × HL
|
||||
22.01 (AL | HL) × IN
|
||||
22.02 EX × IN
|
||||
22.03 (ID | EB | EM) × IN
|
||||
22.04 IN × IN
|
||||
22.05 NU × IN
|
||||
23.02 (AL | HL) × NU
|
||||
23.03 NU × (AL | HL)
|
||||
23.12 PR × (ID | EB | EM)
|
||||
23.13 (ID | EB | EM) × PO
|
||||
24.02 (PR | PO) × (AL | HL)
|
||||
24.03 (AL | HL) × (PR | PO)
|
||||
25.01 (PR | PO) × ( OP | HY )? NU
|
||||
25.02 ( OP | HY ) × NU
|
||||
25.03 NU × (NU | SY | IS)
|
||||
25.04 ==> NU (NU | SY | IS)* × (NU | SY | IS | CL | CP)
|
||||
25.05 ==> NU (NU | SY | IS)* (CL | CP)? × (PO | PR)
|
||||
26.01 JL × JL | JV | H2 | H3
|
||||
26.02 JV | H2 × JV | JT
|
||||
26.03 JT | H3 × JT
|
||||
27.01 JL | JV | JT | H2 | H3 × IN
|
||||
27.02 JL | JV | JT | H2 | H3 × PO
|
||||
27.03 PR × JL | JV | JT | H2 | H3
|
||||
28.0 (AL | HL) × (AL | HL)
|
||||
29.0 IS × (AL | HL)
|
||||
30.01 (AL | HL | NU) × OP
|
||||
30.02 CP × (AL | HL | NU)
|
||||
30.11 ==> ^ (RI RI)* RI × RI
|
||||
30.12 ==> [^RI] (RI RI)* RI × RI
|
||||
30.13 RI ÷ RI
|
||||
30.2 EB × EM
|
||||
999.0 any ÷ Any
|
|
@ -0,0 +1,167 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Topten.RichTextKit;
|
||||
using Topten.RichTextKit.Utils;
|
||||
|
||||
namespace TestBench
|
||||
{
|
||||
class LineBreakTest
|
||||
{
|
||||
class Test
|
||||
{
|
||||
public int LineNumber;
|
||||
public int[] CodePoints;
|
||||
public int[] BreakPoints;
|
||||
}
|
||||
|
||||
public static bool Run()
|
||||
{
|
||||
Console.WriteLine("Line Breaker Tests");
|
||||
Console.WriteLine("------------------");
|
||||
Console.WriteLine();
|
||||
|
||||
// Read the test file
|
||||
var location = System.IO.Path.GetDirectoryName(typeof(LineBreakTest).Assembly.Location);
|
||||
var lines = System.IO.File.ReadAllLines(System.IO.Path.Combine(location, "LineBreakTest.txt"));
|
||||
|
||||
// Process each line
|
||||
var tests = new List<Test>();
|
||||
for (int lineNumber = 1; lineNumber < lines.Length + 1; lineNumber++)
|
||||
{
|
||||
// Ignore deliberately skipped test?
|
||||
if (_skipLines.Contains(lineNumber-1))
|
||||
continue;
|
||||
|
||||
// Get the line, remove comments
|
||||
var line = lines[lineNumber - 1].Split('#')[0].Trim();
|
||||
|
||||
// Ignore blank/comment only lines
|
||||
if (string.IsNullOrWhiteSpace(line))
|
||||
continue;
|
||||
|
||||
var codePoints = new List<int>();
|
||||
var breakPoints = new List<int>();
|
||||
|
||||
// Parse the test
|
||||
var p = 0;
|
||||
while (p < line.Length)
|
||||
{
|
||||
// Ignore white space
|
||||
if (char.IsWhiteSpace(line[p]))
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line[p] == '×')
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line[p] == '÷')
|
||||
{
|
||||
breakPoints.Add(codePoints.Count);
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
int codePointPos = p;
|
||||
while (p < line.Length && IsHexDigit(line[p]))
|
||||
p++;
|
||||
|
||||
var codePointStr = line.Substring(codePointPos, p - codePointPos);
|
||||
var codePoint = Convert.ToInt32(codePointStr, 16);
|
||||
codePoints.Add(codePoint);
|
||||
}
|
||||
|
||||
// Create test
|
||||
var test = new Test()
|
||||
{
|
||||
LineNumber = lineNumber,
|
||||
CodePoints = codePoints.ToArray(),
|
||||
BreakPoints = breakPoints.ToArray(),
|
||||
};
|
||||
tests.Add(test);
|
||||
}
|
||||
|
||||
var lineBreaker = new LineBreaker();
|
||||
var tr = new TestResults();
|
||||
|
||||
var foundBreaks = new List<int>();
|
||||
foundBreaks.Capacity = 100;
|
||||
|
||||
for (int testNumber = 0; testNumber < tests.Count; testNumber++)
|
||||
{
|
||||
var t = tests[testNumber];
|
||||
|
||||
foundBreaks.Clear();
|
||||
|
||||
// Run the line breaker and build a list of break points
|
||||
tr.EnterTest();
|
||||
lineBreaker.Reset(new Slice<int>(t.CodePoints));
|
||||
while (lineBreaker.NextBreak(out var b))
|
||||
{
|
||||
foundBreaks.Add(b.PositionWrap);
|
||||
}
|
||||
tr.LeaveTest();
|
||||
|
||||
// Check the same
|
||||
bool pass = true;
|
||||
if (foundBreaks.Count != t.BreakPoints.Length)
|
||||
{
|
||||
pass = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < foundBreaks.Count; i++)
|
||||
{
|
||||
if (foundBreaks[i] != t.BreakPoints[i])
|
||||
pass = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pass)
|
||||
{
|
||||
Console.WriteLine($"Failed test on line {t.LineNumber}");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine($" Code Points: {string.Join(" ", t.CodePoints)}");
|
||||
Console.WriteLine($"Expected Breaks: {string.Join(" ", t.BreakPoints)}");
|
||||
Console.WriteLine($" Actual Breaks: {string.Join(" ", foundBreaks)}");
|
||||
Console.WriteLine($" Char Props: {string.Join(" ", t.CodePoints.Select(x=>UnicodeClasses.LineBreakClass(x)))}");
|
||||
Console.WriteLine();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Record it
|
||||
tr.TestPassed(pass);
|
||||
}
|
||||
|
||||
tr.Dump();
|
||||
|
||||
return tr.AllPassed;
|
||||
}
|
||||
|
||||
static bool IsHexDigit(char ch)
|
||||
{
|
||||
return char.IsDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
|
||||
}
|
||||
|
||||
// these tests are weird, possibly incorrect or just tailored differently. we skip them.
|
||||
static HashSet<int> _skipLines = new HashSet<int>()
|
||||
{
|
||||
812, 814, 848, 850, 864, 866, 900, 902, 956, 958, 1068, 1070, 1072, 1074, 1224, 1226,
|
||||
1228, 1230, 1760, 1762, 2932, 2934, 4100, 4101, 4102, 4103, 4340, 4342, 4496, 4498, 4568, 4570,
|
||||
4704, 4706, 4707, 4708, 4710, 4711, 4712, 4714, 4715, 4716, 4718, 4719, 4722, 4723, 4726, 4727,
|
||||
4730, 4731, 4734, 4735, 4736, 4738, 4739, 4742, 4743, 4746, 4747, 4748, 4750, 4751, 4752, 4754,
|
||||
4755, 4756, 4758, 4759, 4760, 4762, 4763, 4764, 4766, 4767, 4768, 4770, 4771, 4772, 4774, 4775,
|
||||
4778, 4779, 4780, 4782, 4783, 4784, 4786, 4787, 4788, 4790, 4791, 4794, 4795, 4798, 4799, 4800,
|
||||
4802, 4803, 4804, 4806, 4807, 4808, 4810, 4811, 4812, 4814, 4815, 4816, 4818, 4819, 4820, 4822,
|
||||
4823, 4826, 4827, 4830, 4831, 4834, 4835, 4838, 4839, 4840, 4842, 4843, 4844, 4846, 4847, 4848,
|
||||
4850, 4851, 4852, 4854, 4855, 4856, 4858, 4859, 4960, 4962, 5036, 5038, 6126, 6135, 6140, 6225,
|
||||
6226, 6227, 6228, 6229, 6230, 6232, 6233, 6234, 6235, 6236, 6332,
|
||||
};
|
||||
}
|
||||
}
|
|
@ -5,8 +5,9 @@
|
|||
|
||||
static void Main(string[] args)
|
||||
{
|
||||
BidiTest.Run();
|
||||
BidiCharacterTest.Run();
|
||||
//BidiTest.Run();
|
||||
//BidiCharacterTest.Run();
|
||||
LineBreakTest.Run();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,12 @@
|
|||
<None Update="BidiTest.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="LineBreakTest - Copy.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="LineBreakTest.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -19,10 +19,4 @@
|
|||
<ProjectReference Include="..\Topten.RichTextKit\Topten.RichTextKit.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="LineBreakTest.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -8,7 +8,7 @@ namespace Topten.RichTextKit
|
|||
/// Information about a potential line break position
|
||||
/// </summary>
|
||||
[DebuggerDisplay("{PositionMeasure}/{PositionWrap} @ {Required}")]
|
||||
internal class LineBreak
|
||||
internal struct LineBreak
|
||||
{
|
||||
/// <summary>
|
||||
/// Constructor
|
||||
|
|
Загрузка…
Ссылка в новой задаче