Merge pull request #3355 from nextcloud/enhancement/jwt-threading

Implement the jwz threading algorithm
This commit is contained in:
Greta 2020-07-08 11:03:54 +02:00 коммит произвёл GitHub
Родитель b364465f3c d68a894580
Коммит 4d78b957ec
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 1056 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,140 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCA\Mail\IMAP\Threading;
use RuntimeException;
use function array_key_exists;
use function spl_object_id;
class Container {
/** @var Message|null */
private $message;
/** @var bool */
private $root;
/** @var Container|null */
private $parent;
/** @var Container[] */
private $children = [];
private function __construct(?Message $message,
bool $root = false) {
$this->message = $message;
$this->root = $root;
}
public static function root(): self {
return new self(
null,
true
);
}
public static function empty(): self {
return new self(
null
);
}
public static function with(Message $message): self {
return new self(
$message
);
}
public function fill(Message $message): void {
$this->message = $message;
}
public function hasMessage(): bool {
return $this->message !== null;
}
public function getMessage(): ?Message {
return $this->message;
}
public function isRoot(): bool {
return $this->root;
}
public function hasParent(): bool {
return $this->parent !== null;
}
public function getParent(): Container {
if ($this->isRoot()) {
throw new RuntimeException('Container root has no parent');
}
return $this->parent;
}
public function setParent(?Container $parent): void {
$this->unlink();
$this->parent = $parent;
if ($parent !== null) {
$parent->children[spl_object_id($this)] = $this;
}
}
public function hasAncestor(Container $container): bool {
if ($this->parent === $container) {
return true;
}
if ($this->parent !== null) {
return $this->parent->hasAncestor($container);
}
return false;
}
public function unlink(): void {
if ($this->parent !== null) {
$this->parent->removeChild($this);
}
$this->parent = null;
}
private function removeChild(Container $child): void {
$objId = spl_object_id($child);
if (array_key_exists($objId, $this->children)) {
unset($this->children[$objId]);
}
}
public function hasChildren(): bool {
return !empty($this->children);
}
/**
* @return Container[]
*/
public function getChildren(): array {
return $this->children;
}
}

Просмотреть файл

@ -0,0 +1,75 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCA\Mail\IMAP\Threading;
use function str_replace;
use function strpos;
class Message {
/** @var string */
private $subject;
/** @var string */
private $id;
/** @var string[] */
private $references;
/**
* @param string[] $references
*/
public function __construct(string $subject,
string $id,
array $references) {
$this->subject = $subject;
$this->id = $id;
$this->references = $references;
}
public function hasReSubject(): bool {
return strpos($this->getSubject(), 'Re:') === 0;
}
public function getSubject(): string {
return $this->subject;
}
public function getBaseSubject(): string {
return str_replace('Re:', '', $this->getSubject());
}
public function getId(): string {
return $this->id;
}
/**
* @return string[]
*/
public function getReferences(): array {
return $this->references;
}
}

Просмотреть файл

@ -0,0 +1,236 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCA\Mail\IMAP\Threading;
use OCA\Mail\Support\PerformanceLogger;
use function array_key_exists;
use function count;
class ThreadBuilder {
/** @var PerformanceLogger */
private $performanceLogger;
public function __construct(PerformanceLogger $performanceLogger) {
$this->performanceLogger = $performanceLogger;
}
/**
* @param Message[] $messages
*
* @return Container[]
*/
public function build(array $messages): array {
$log = $this->performanceLogger->start('Threading ' . count($messages) . ' messages');
// Step 1
$idTable = $this->buildIdTable($messages);
$log->step('build ID table');
// Step 2
$rootContainer = $this->buildRootContainer($idTable);
$log->step('build root container');
// Step 3
unset($idTable);
$log->step('free ID table');
// Step 4
$this->pruneContainers($rootContainer);
$log->step('prune containers');
// Step 5
$this->groupBySubject($rootContainer);
$log->step('group by subject');
$log->end();
// Return the children with reset numeric keys
return array_values($rootContainer->getChildren());
}
/**
* @param Message[] $messages
*
* @return Container[]
*/
private function buildIdTable(array $messages): array {
/** @var Container[] $idTable */
$idTable = [];
foreach ($messages as $message) {
/** @var Message $message */
// Step 1.A
$container = $idTable[$message->getId()] ?? null;
if ($container !== null && !$container->hasMessage()) {
$container->fill($message);
} else {
$container = $idTable[$message->getId()] = Container::with($message);
}
// Step 1.B
$parent = null;
foreach ($message->getReferences() as $reference) {
$refContainer = $idTable[$reference] ?? null;
if ($refContainer === null) {
$refContainer = $idTable[$reference] = Container::empty();
}
if (!$refContainer->hasParent()
&& !($parent !== null && !$parent->hasAncestor($refContainer))
&& !($parent !== null && !$refContainer->hasAncestor($parent))) {
// TODO: Do not add a link if adding that link would introduce a loop: that is, before asserting A->B, search down the children of B to see if A is reachable, and also search down the children of A to see if B is reachable. If either is already reachable as a child of the other, don't add the link.
$refContainer->setParent($parent);
}
$parent = $refContainer;
}
// Step 1.C
//$parentId = $message->getReferences()[count($message->getReferences()) - 1] ?? null;
//$container->setParent($idTable[$parentId] ?? null);
if ($parent === null || !$parent->hasAncestor($container)) {
$container->setParent($parent);
}
}
return $idTable;
}
/**
* @param Container[] $idTable
*
* @return Container
*/
private function buildRootContainer(array $idTable): Container {
$rootContainer = Container::empty();
foreach ($idTable as $id => $container) {
if (!$container->hasParent()) {
$container->setParent($rootContainer);
}
}
return $rootContainer;
}
/**
* @param Container $container
*/
private function pruneContainers(Container $root): void {
/** @var Container $container */
foreach ($root->getChildren() as $id => $container) {
// Step 4.A
if (!$container->hasMessage() && !$container->hasChildren()) {
$container->unlink();
continue;
}
// Step 4.B
if (!$container->hasMessage() && $container->hasChildren()) {
if (!$container->getParent()->isRoot() && count($container->getChildren()) > 1) {
// Do not promote
continue;
}
foreach ($container->getChildren() as $child) {
$parent = $container->getParent();
$child->setParent($parent);
$container->unlink();
}
}
// Descend recursively (we don't care about the returned array here
// but only for the root set)
$this->pruneContainers($container);
}
}
/**
* @param Container $root
*/
private function groupBySubject(Container $root): void {
// Step 5.A
/** @var Container[] $subjectTable */
$subjectTable = [];
// Step 5.B
foreach ($root->getChildren() as $container) {
$subject = $this->getSubTreeSubject($container);
if ($subject === '') {
continue;
}
$existingContainer = $subjectTable[$subject] ?? null;
$existingMessage = $existingContainer !== null ? $existingContainer->getMessage() : null;
$thisMessage = $container->getMessage();
if (!array_key_exists($subject, $subjectTable)
|| (!$container->hasMessage() && $existingContainer !== null && $existingContainer->hasMessage())
|| ($existingMessage !== null && $existingMessage->hasReSubject() && $thisMessage !== null && !$thisMessage->hasReSubject())) {
$subjectTable[$subject] = $container;
}
}
// Step 5.C
foreach ($root->getChildren() as $container) {
$subject = $this->getSubTreeSubject($container);
$subjectContainer = $subjectTable[$subject] ?? null;
if ($subjectContainer === null || $subjectContainer === $container) {
continue;
}
if (!$container->hasMessage() && !$subjectContainer->hasMessage()) {
// Merge the current subject container into this one and replace it
foreach ($subjectContainer->getChildren() as $child) {
$child->setParent($container);
}
$subjectTable[$subject] = $container;
} elseif (!$container->hasMessage() && $subjectContainer->hasMessage()) {
$subjectContainer->setParent($container);
} elseif ($container->hasMessage() && !$subjectContainer->hasMessage()) {
$container->setParent($subjectContainer);
} elseif ($subjectContainer->hasMessage() && !$subjectContainer->getMessage()->hasReSubject()
&& $container->hasMessage() && $container->getMessage()->hasReSubject()) {
$container->setParent($subjectContainer);
$subjectTable[$subject];
} else {
$new = Container::empty();
$container->setParent($new);
$subjectContainer->setParent($new);
$new->setParent($root);
$subjectTable[$subject] = $new;
}
}
}
private function getSubTreeSubject(Container $container): string {
if (($message = $container->getMessage()) !== null) {
return $message->getBaseSubject();
}
$firstChild = $container->getChildren()[0] ?? null;
if ($firstChild === null || ($message = $firstChild->getMessage()) === null) {
// should not happen
return '';
}
return $message->getBaseSubject();
}
}

Просмотреть файл

@ -0,0 +1,117 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCA\Mail\Tests\Unit\IMAP\Threading;
use ChristophWurst\Nextcloud\Testing\TestCase;
use OCA\Mail\IMAP\Threading\Container;
use OCA\Mail\IMAP\Threading\Message;
class ContainerTest extends TestCase {
public function testEmpty(): void {
$container = Container::empty();
$this->assertFalse($container->hasMessage());
$this->assertFalse($container->hasParent());
$this->assertFalse($container->hasChildren());
}
public function testWithMessage(): void {
$message = $this->createMock(Message::class);
$container = Container::with($message);
$this->assertTrue($container->hasMessage());
$this->assertFalse($container->hasParent());
$this->assertFalse($container->hasChildren());
}
public function testFillWithMessage(): void {
$message = $this->createMock(Message::class);
$container = Container::with($message);
$this->assertTrue($container->hasMessage());
$this->assertEquals(Container::with($message), $container);
}
public function testSetParent(): void {
$parent = Container::empty();
$container = Container::empty();
$container->setParent($parent);
$this->assertTrue($container->hasParent());
$this->assertCount(1, $parent->getChildren());
}
public function testReSetParent(): void {
$parent1 = Container::empty();
$parent2 = Container::empty();
$container = Container::empty();
$container->setParent($parent1);
$container->setParent($parent2);
$this->assertSame($parent2, $container->getParent());
$this->assertTrue($container->hasParent());
$this->assertEmpty($parent1->getChildren());
$this->assertCount(1, $parent2->getChildren());
}
public function testHasNoAncestor(): void {
$unrelated = Container::empty();
$container = Container::empty();
$hasAncestor = $container->hasAncestor($unrelated);
$this->assertFalse($hasAncestor);
}
public function testHasAncestor(): void {
$grandmother = Container::empty();
$mother = Container::empty();
$container = Container::empty();
$container->setParent($mother);
$mother->setParent($grandmother);
$hasMother = $container->hasAncestor($mother);
$hasGrandMother = $container->hasAncestor($grandmother);
$this->assertTrue($hasMother);
$this->assertTrue($hasGrandMother);
}
public function testUnlink(): void {
$parent = Container::empty();
$container = Container::empty();
$container->setParent($parent);
$container->unlink();
$this->assertFalse($container->hasParent());
$this->assertEmpty($parent->getChildren());
}
}

Просмотреть файл

@ -0,0 +1,43 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace OCA\Mail\Tests\Unit\IMAP\Threading;
use ChristophWurst\Nextcloud\Testing\TestCase;
use OCA\Mail\IMAP\Threading\Message;
class MessageTest extends TestCase {
public function testGetId(): void {
$message = new Message('', 'id', []);
$this->assertSame('id', $message->getId());
}
public function getGetReferences(): void {
$message = new Message('', 'id', ['ref1', 'ref2']);
$this->assertEquals(['ref1', 'ref2'], $message->getReferences());
}
}

Просмотреть файл

@ -0,0 +1,445 @@
<?php
declare(strict_types=1);
/**
* @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
namespace mail\lib\IMAP\Threading;
use ChristophWurst\Nextcloud\Testing\TestCase;
use OCA\Mail\IMAP\Threading\Container;
use OCA\Mail\IMAP\Threading\Message;
use OCA\Mail\IMAP\Threading\ThreadBuilder;
use OCA\Mail\Support\PerformanceLogger;
use PHPUnit\Framework\MockObject\MockObject;
class ThreadBuilderTest extends TestCase {
/** @var PerformanceLogger|MockObject */
private $performanceLogger;
/** @var ThreadBuilder */
private $builder;
protected function setUp(): void {
parent::setUp();
$this->performanceLogger = $this->createMock(PerformanceLogger::class);
$this->builder = new ThreadBuilder(
$this->performanceLogger
);
}
/**
* @param Container[] $set
*
* @return array
*/
private function abstract(array $set): array {
return array_map(function (Container $container) {
return [
'id' => (($message = $container->getMessage()) !== null ? $message->getId() : null),
'children' => $this->abstract($container->getChildren()),
];
}, array_values($set));
}
public function testBuildEmpty(): void {
$messages = [];
$result = $this->builder->build($messages);
$this->assertEquals([], $result);
}
public function testBuildFlat(): void {
$messages = [
new Message('s1', 'id1', []),
new Message('s2', 'id2', []),
new Message('s3', 'id3', []),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [],
],
[
'id' => 'id2',
'children' => [],
],
[
'id' => 'id3',
'children' => [],
],
],
$this->abstract($result)
);
}
public function testBuildOneDeep(): void {
$messages = [
new Message('s1', 'id1', []),
new Message('Re:s1', 'id2', ['id1']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildOneDeepMismatchingSubjects(): void {
$messages = [
new Message('s1', 'id1', []),
new Message('s2', 'id2', ['id1']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildOneDeepNoReferences(): void {
$messages = [
new Message('s1', 'id1', []),
new Message('Re:s1', 'id2', []),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildTwoDeep(): void {
// 1
// |
// 2
// |
// 3
$messages = [
new Message('s1', 'id1', []),
new Message('s2', 'id2', ['id1']),
new Message('s3', 'id3', ['id2']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [
[
'id' => 'id3',
'children' => [],
],
],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildFourDeep(): void {
// 1
// |
// 2
// |
// 3
// |
// 4
$messages = [
new Message('s1', 'id1', []),
new Message('Re:s1', 'id2', ['id1']),
new Message('Re:s1', 'id3', ['id2']),
new Message('Re:s1', 'id4', ['id3']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [
[
'id' => 'id3',
'children' => [
[
'id' => 'id4',
'children' => [],
],
],
],
],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildTree(): void {
// 1
// / \
// 2 3
// / \ / \
// 4 5 6 7
$messages = [
new Message('s1', 'id1', []),
new Message('Re:s1', 'id2', ['id1']),
new Message('Re:s1', 'id3', ['id1']),
new Message('Re:s1', 'id4', ['id1', 'id2']),
new Message('Re:s1', 'id5', ['id1', 'id2']),
new Message('Re:s1', 'id6', ['id1', 'id3']),
new Message('Re:s1', 'id7', ['id1', 'id3']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [
[
'id' => 'id4',
'children' => [],
],
[
'id' => 'id5',
'children' => [],
],
],
],
[
'id' => 'id3',
'children' => [
[
'id' => 'id6',
'children' => [],
],
[
'id' => 'id7',
'children' => [],
],
],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildTreePartialRefs(): void {
// 1
// / \
// 2 3
// / \ / \
// 4 5 6 7
$messages = [
new Message('s1', 'id1', []),
new Message('Re:s1', 'id2', ['id1']),
new Message('Re:s1', 'id3', ['id1']),
new Message('Re:s1', 'id4', ['id2']),
new Message('Re:s1', 'id5', ['id2']),
new Message('Re:s1', 'id6', ['id3']),
new Message('Re:s1', 'id7', ['id3']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [
[
'id' => 'id4',
'children' => [],
],
[
'id' => 'id5',
'children' => [],
],
],
],
[
'id' => 'id3',
'children' => [
[
'id' => 'id6',
'children' => [],
],
[
'id' => 'id7',
'children' => [],
],
],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildCyclic(): void {
$messages = [
new Message('s1', 'id1', ['id2']),
new Message('s2', 'id2', ['id1']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id2',
'children' => [
[
'id' => 'id1',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildSiblingsWithRoot(): void {
$messages = [
new Message('s1', 'id1', []),
new Message('s2', 'id2', ['id1']),
new Message('s3', 'id3', ['id1']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => 'id1',
'children' => [
[
'id' => 'id2',
'children' => [],
],
[
'id' => 'id3',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
public function testBuildSiblingsWithoutRoot(): void {
$messages = [
new Message('Re:s1', 'id2', ['id1']),
new Message('Re:s2', 'id3', ['id1']),
];
$result = $this->builder->build($messages);
$this->assertEquals(
[
[
'id' => null,
'children' => [
[
'id' => 'id2',
'children' => [],
],
[
'id' => 'id3',
'children' => [],
],
],
],
],
$this->abstract($result)
);
}
}