From 2698214c4122d4f5f63f26f7a204035fe0d4f211 Mon Sep 17 00:00:00 2001 From: Sean Molenaar Date: Sun, 17 Mar 2019 08:23:37 +0100 Subject: [PATCH] fix/allow CDATA encoding (#428) --- AUTHORS.md | 4 +- appinfo/register_command.php | 2 + composer.json | 117 ++++++++++--------- composer.lock | 5 +- lib/AppInfo/Application.php | 8 +- lib/Command/ShowFeed.php | 70 +++++++++++ lib/Config/FetcherConfig.php | 66 ++++++----- lib/Db/Item.php | 4 +- lib/Fetcher/FeedFetcher.php | 13 ++- lib/Utility/ProxyConfigParser.php | 66 ----------- tests/Unit/Fetcher/FeedFetcherTest.php | 30 ++--- tests/Unit/Utility/ProxyConfigParserTest.php | 103 ---------------- 12 files changed, 202 insertions(+), 286 deletions(-) create mode 100644 lib/Command/ShowFeed.php delete mode 100644 lib/Utility/ProxyConfigParser.php delete mode 100644 tests/Unit/Utility/ProxyConfigParserTest.php diff --git a/AUTHORS.md b/AUTHORS.md index 8266c617f..8ab801778 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -11,11 +11,11 @@ * [raghunayyar](mailto:me@iraghu.com) * [bastei](mailto:bastei@users.noreply.github.com) * [Bernhard Posselt](mailto:bep@foryouandyourcustomers.com) +* [Sean Molenaar](mailto:sean@seanmolenaar.eu) * [Thomas Müller](mailto:thomas.mueller@tmit.eu) * [Hoàng Đức Hiếu](mailto:hdhoang@zahe.me) * [Benjamin Brahmer](mailto:info@b-brahmer.de) * [Daniel Opitz](mailto:git@copynpaste.de) -* [Sean Molenaar](mailto:sean@seanmolenaar.eu) * [rakekniven](mailto:mark.ziegler@rakekniven.de) * [David-Development](mailto:david-dev@live.de) * [Koen Martens](mailto:gmc@sonologic.nl) @@ -95,6 +95,7 @@ * [Zach DeCook](mailto:zachdecook@gmail.com) * [amittel](mailto:arnulf.mittelstaedt@gmail.com) * [anoy](mailto:anoymouserver@users.noreply.github.com) +* [b_b](mailto:bruno@eliaz.fr) * [b_b](mailto:brunobergot@gmail.com) * [bjoerns1983](mailto:bjoern@sengotta.net) * [blackcrack](mailto:blackcrack@blackysgate.de) @@ -102,6 +103,7 @@ * [joeplus](mailto:joerg@honululu.Speedport_W_723V_1_32_000) * [kondou](mailto:kondou@ts.unde.re) * [markusj](mailto:markusj@users.noreply.github.com) +* [nexus-uw](mailto:you@example.com) * [repat](mailto:repat@repat.de) * [ritchiewilson](mailto:rawilson52@gmail.com) * [sonologic](mailto:gmc@sonologic.nl) diff --git a/appinfo/register_command.php b/appinfo/register_command.php index 3ce99b2a1..1fc006b76 100644 --- a/appinfo/register_command.php +++ b/appinfo/register_command.php @@ -11,6 +11,7 @@ namespace OCA\News\AppInfo; +use OCA\News\Command\ShowFeed; use OCA\News\Command\Updater\UpdateFeed; use OCA\News\Command\Updater\AllFeeds; use OCA\News\Command\Updater\BeforeUpdate; @@ -24,3 +25,4 @@ $application->add($container->query(UpdateFeed::class)); $application->add($container->query(BeforeUpdate::class)); $application->add($container->query(AfterUpdate::class)); $application->add($container->query(ExploreGenerator::class)); +$application->add($container->query(ShowFeed::class)); diff --git a/composer.json b/composer.json index 538ecbc70..32c217538 100644 --- a/composer.json +++ b/composer.json @@ -1,64 +1,65 @@ { - "type": "project", - "license": "AGPL-3.0", - "description": "An RSS/Atom feed reader. Requires Nextcloud backgroundjobs or an updater script to be enabled to update your feeds. See the README.md in the apps top directory", - "homepage": "https://github.com/nextcloud/news", - "authors": [ - { - "name": "Bernhard Posselt", - "email": "dev@bernhard-posselt.com", - "homepage": "https://bernhard-posselt.com", - "role": "Developer" - }, - { - "name": "Sean Molenaar", - "email": "sean@seanmolenaar.eu", - "homepage": "https://seanmolenaar.eu", - "role": "Developer" - }, - { - "name": "Alessandro Cosentino", - "homepage": "http://algorithmsforthekitchen.com/", - "email": "cosenal@gmail.com", - "role": "Developer" - }, - { - "name": "Jan-Christoph Borchardt", - "email": "hey@jancborchardt.net", - "homepage": "http://jancborchardt.net/", - "role": "Designer" - } - ], - "support": { - "irc": "irc://irc.freenode.org/nextcloud-news", - "issues": "https://github.com/nextcloud/news/issues", - "source": "https://github.com/nextcloud/news/" + "type": "project", + "license": "AGPL-3.0", + "description": "An RSS/Atom feed reader. Requires Nextcloud backgroundjobs or an updater script to be enabled to update your feeds. See the README.md in the apps top directory", + "homepage": "https://github.com/nextcloud/news", + "authors": [ + { + "name": "Bernhard Posselt", + "email": "dev@bernhard-posselt.com", + "homepage": "https://bernhard-posselt.com", + "role": "Developer" }, - "require": { - "php": "^7.0", - "ezyang/htmlpurifier": "4.10.0", - "pear/net_url2": "2.2.2", - "riimu/kit-pathjoin": "1.2.0", - "debril/feed-io": "^3.1", - "arthurhoaro/favicon": "^1.2", - "ext-json": "*" + { + "name": "Sean Molenaar", + "email": "sean@seanmolenaar.eu", + "homepage": "https://seanmolenaar.eu", + "role": "Developer" }, - "require-dev": { - "phpunit/phpunit": "^6.5", - "squizlabs/php_codesniffer": "^3.3", - "guzzlehttp/guzzle": "~6.3" + { + "name": "Alessandro Cosentino", + "homepage": "http://algorithmsforthekitchen.com/", + "email": "cosenal@gmail.com", + "role": "Developer" }, - "replace": { - "guzzlehttp/guzzle": "*" - }, - "autoload": { - "psr-4": { - "OCA\\News\\": "lib/" - } - }, - "autoload-dev": { - "psr-4": { - "OCA\\News\\Tests\\": "tests/" - } + { + "name": "Jan-Christoph Borchardt", + "email": "hey@jancborchardt.net", + "homepage": "http://jancborchardt.net/", + "role": "Designer" } + ], + "support": { + "irc": "irc://irc.freenode.org/nextcloud-news", + "issues": "https://github.com/nextcloud/news/issues", + "source": "https://github.com/nextcloud/news/" + }, + "require": { + "php": "^7.0", + "ezyang/htmlpurifier": "4.10.0", + "pear/net_url2": "2.2.2", + "riimu/kit-pathjoin": "1.2.0", + "debril/feed-io": "^3.1", + "arthurhoaro/favicon": "^1.2", + "ext-json": "*", + "guzzlehttp/guzzle": "^6.3", + "ext-simplexml": "*" + }, + "require-dev": { + "phpunit/phpunit": "^6.5", + "squizlabs/php_codesniffer": "^3.3" + }, + "replace": { + "guzzlehttp/guzzle": "*" + }, + "autoload": { + "psr-4": { + "OCA\\News\\": "lib/" + } + }, + "autoload-dev": { + "psr-4": { + "OCA\\News\\Tests\\": "tests/" + } + } } diff --git a/composer.lock b/composer.lock index afc52cd55..7fe887ba5 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "2247ef4a89a64d37158df942d780cdfe", + "content-hash": "fe812fa2682a6ee30eb5ab53805f4aeb", "packages": [ { "name": "arthurhoaro/favicon", @@ -1905,7 +1905,8 @@ "prefer-lowest": false, "platform": { "php": "^7.0", - "ext-json": "*" + "ext-json": "*", + "ext-simplexml": "*" }, "platform-dev": [] } diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 755f3ea70..06d6135dc 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -159,13 +159,9 @@ class Application extends App * Fetchers */ $container->registerService(FetcherConfig::class, function (IContainer $c): FetcherConfig { - // FIXME: move this into a separate class for testing? - $config = $c->query(Config::class); - $proxy = $c->query(ProxyConfigParser::class); - $fConfig = new FetcherConfig(); - $fConfig->setClientTimeout($config->getFeedFetcherTimeout()); - $fConfig->setProxy($proxy); + $fConfig->setConfig($c->query(Config::class)) + ->setProxy($c->query(IConfig::class)); return $fConfig; }); diff --git a/lib/Command/ShowFeed.php b/lib/Command/ShowFeed.php new file mode 100644 index 000000000..bbe1913fa --- /dev/null +++ b/lib/Command/ShowFeed.php @@ -0,0 +1,70 @@ + + * @copyright Sean Molenaar 2019 + */ +namespace OCA\News\Command; + +use FeedIo\FeedIo; +use Favicon\Favicon; + +use OCA\News\Fetcher\Fetcher; +use Symfony\Component\Console\Command\Command; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; + +/** + * This is used for debugging feed data: + * ./occ news:show-feed www.feed.com + */ +class ShowFeed extends Command +{ + /** + * Feed and favicon fetcher. + */ + protected $feedFetcher; + + /** + * Set up class. + * + * @param Fetcher $feedFetcher Feed reader + */ + public function __construct(Fetcher $feedFetcher) + { + $this->feedFetcher = $feedFetcher; + parent::__construct(); + } + + protected function configure() + { + $this->setName('news:show-feed') + ->setDescription('Prints a JSON string which represents the given feed as it would be in the DB.') + ->addArgument('feed', InputArgument::REQUIRED, 'Feed to parse') + ->addOption('user', 'u', InputOption::VALUE_OPTIONAL, 'Username for the feed') + ->addOption('password', 'p', InputOption::VALUE_OPTIONAL, 'Password for the feed'); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $url = $input->getArgument('feed'); + $user = $input->getOption('user'); + $password = $input->getOption('password'); + + try { + list($feed, $items) = $this->feedFetcher->fetch($url, true, null, $user, $password); + $output->writeln("Feed: " . json_encode($feed, JSON_PRETTY_PRINT)); + $output->writeln("Items: " . json_encode($items, JSON_PRETTY_PRINT)); + } catch (\Throwable $ex) { + $output->writeln('Failed to fetch feed info:'); + $output->writeln($ex->getMessage()); + return 1; + } + } +} diff --git a/lib/Config/FetcherConfig.php b/lib/Config/FetcherConfig.php index 55603c47c..1459ff032 100644 --- a/lib/Config/FetcherConfig.php +++ b/lib/Config/FetcherConfig.php @@ -16,6 +16,7 @@ namespace OCA\News\Config; use FeedIo\Adapter\ClientInterface; use \GuzzleHttp\Client; use \FeedIo\Adapter\Guzzle\Client as FeedIoClient; +use OCP\IConfig; /** * Class FetcherConfig @@ -26,6 +27,8 @@ class FetcherConfig { protected $client_timeout; protected $proxy; + protected $redirects; + protected $max_size; /** * Configure a guzzle client @@ -36,14 +39,18 @@ class FetcherConfig { if (!class_exists('GuzzleHttp\Collection')) { $config = [ - 'timeout' => $this->getClientTimeout(), + 'timeout' => $this->client_timeout, ]; if (!empty($this->proxy)) { $config['proxy'] = $this->proxy; } - $guzzle = new Client(); + if (!empty($this->redirects)) { + $config['redirect.max'] = $this->redirects; + } + + $guzzle = new Client($config); $client = new FeedIoClient($guzzle); return $client; @@ -51,7 +58,7 @@ class FetcherConfig $config = [ 'request.options' => [ - 'timeout' => $this->getClientTimeout(), + 'timeout' => $this->client_timeout, ], ]; @@ -59,59 +66,54 @@ class FetcherConfig $config['request.options']['proxy'] = $this->proxy; } + if (!empty($this->redirects)) { + $config['request.options']['redirect.max'] = $this->redirects; + } + $guzzle = new Client($config); return new LegacyGuzzleClient($guzzle); } /** - * Set a timeout for the client + * Set settings for config. * - * @param int $timeout The timeout + * @param Config $config The shared configuration * * @return self */ - public function setClientTimeout($timeout) + public function setConfig(Config $config) { - $this->client_timeout = $timeout; + $this->client_timeout = $config->getFeedFetcherTimeout(); + $this->redirects = $config->getMaxRedirects(); + $this->max_size = $config->getMaxSize(); return $this; } - /** - * Get the client timeout. - * - * @return mixed - */ - public function getClientTimeout() - { - return $this->client_timeout; - } - /** * Set the proxy * - * @param \OCA\News\Utility\ProxyConfigParser $proxy The proxy to set. + * @param IConfig $config Nextcloud config. * * @return self */ - public function setProxy($proxy) + public function setProxy(IConfig $config) { - // proxy settings - $proxySettings = $proxy->parse(); - $host = $proxySettings['host']; - $port = $proxySettings['port']; - $user = $proxySettings['user']; - $password = $proxySettings['password']; + $proxy = $config->getSystemValue('proxy', null); + $creds = $config->getSystemValue('proxyuserpwd', null); - $proxy_string = 'https://'; - if (!empty($user)) { - $proxy_string .= $user . ':' . $password . '@'; + if (is_null($proxy)) { + return $this; } - $proxy_string .= $host; - if (!empty($port)) { - $proxy_string .= ':' . $port; + + $url = new \Net_URL2($proxy); + + if ($creds) { + $auth = explode(':', $creds, 2); + $url->setUserinfo($auth[0], $auth[1]); } - $this->proxy = $proxy_string; + + $this->proxy = $url->getNormalizedURL(); return $this; } diff --git a/lib/Db/Item.php b/lib/Db/Item.php index 3a17dd2cb..710f239d4 100644 --- a/lib/Db/Item.php +++ b/lib/Db/Item.php @@ -146,7 +146,7 @@ class Item extends Entity implements IAPI, \JsonSerializable return $this->enclosureMime; } - public function getFeedId(): int + public function getFeedId() { return $this->feedId; } @@ -169,7 +169,7 @@ class Item extends Entity implements IAPI, \JsonSerializable return $this->guidHash; } - public function getId(): int + public function getId() { return $this->id; } diff --git a/lib/Fetcher/FeedFetcher.php b/lib/Fetcher/FeedFetcher.php index 9b4a2f996..fb9da7919 100755 --- a/lib/Fetcher/FeedFetcher.php +++ b/lib/Fetcher/FeedFetcher.php @@ -19,12 +19,14 @@ use FeedIo\Feed\ItemInterface; use FeedIo\FeedInterface; use FeedIo\FeedIo; +use Net_URL2; use OCA\News\Utility\PsrLogger; use OCP\IL10N; use OCA\News\Db\Item; use OCA\News\Db\Feed; use OCA\News\Utility\Time; +use SimpleXMLElement; class FeedFetcher implements IFeedFetcher { @@ -65,10 +67,11 @@ class FeedFetcher implements IFeedFetcher */ public function fetch(string $url, bool $favicon, $lastModified, $user, $password): array { + $url2 = new Net_URL2($url); if (!empty($user) && !empty(trim($user))) { - $url = explode('://', $url); - $url = $url[0] . '://' . urlencode($user) . ':' . urlencode($password) . '@' . $url[1]; + $url2->setUserinfo(urlencode($user), urlencode($password)); } + $url = $url2->getNormalizedURL(); if (is_null($lastModified) || !is_string($lastModified)) { $resource = $this->reader->read($url); } else { @@ -204,6 +207,12 @@ class FeedFetcher implements IFeedFetcher 'HTML-ENTITIES', mb_detect_encoding($body) ); + $data = simplexml_load_string( + '' . $body . '', + SimpleXMLElement::class, + LIBXML_NOCDATA + ); + $body = ($data === false) ? $body : (string) $data; $item->setBody($body); if ($parsedItem->hasMedia()) { diff --git a/lib/Utility/ProxyConfigParser.php b/lib/Utility/ProxyConfigParser.php deleted file mode 100644 index dc9bd3d8b..000000000 --- a/lib/Utility/ProxyConfigParser.php +++ /dev/null @@ -1,66 +0,0 @@ - - * @author Bernhard Posselt - * @copyright 2012 Alessandro Cosentino - * @copyright 2012-2014 Bernhard Posselt - */ - - -namespace OCA\News\Utility; - -use \OCP\IConfig; - -class ProxyConfigParser -{ - - private $config; - - public function __construct(IConfig $config) - { - $this->config = $config; - } - - - /** - * Parses the config and splits up the port + url - * - * @return array - */ - public function parse() - { - $proxy = $this->config->getSystemValue('proxy'); - $userpasswd = $this->config->getSystemValue('proxyuserpwd'); - - $result = [ - 'host' => null, - 'port' => null, - 'user' => null, - 'password' => null - ]; - - // we need to filter out the port -.- - $url = new \Net_URL2($proxy); - $port = $url->getPort(); - - $url->setPort(false); - $host = $url->getUrl(); - - - $result['host'] = $host; - $result['port'] = $port; - - if ($userpasswd) { - $auth = explode(':', $userpasswd, 2); - $result['user'] = $auth[0]; - $result['password'] = $auth[1]; - } - - return $result; - } -} diff --git a/tests/Unit/Fetcher/FeedFetcherTest.php b/tests/Unit/Fetcher/FeedFetcherTest.php index 8f031d365..0ae453c9c 100644 --- a/tests/Unit/Fetcher/FeedFetcherTest.php +++ b/tests/Unit/Fetcher/FeedFetcherTest.php @@ -113,6 +113,7 @@ class FeedFetcherTest extends TestCase private $pub; private $updated; private $body; + private $parsed_body; /** * @var Author */ @@ -172,21 +173,22 @@ class FeedFetcherTest extends TestCase $timeFactory, $this->logger ); - $this->url = 'http://tests'; + $this->url = 'http://tests/'; - $this->permalink = 'http://permalink'; - $this->title = 'my&lt;' title'; - $this->guid = 'hey guid here'; - $this->guid_hash = 'df9a5f84e44bfe38cf44f6070d5b0250'; - $this->body = 'let the bodies hit the floor test'; - $this->pub = 23111; - $this->updated = 23444; - $this->author = new Author(); + $this->permalink = 'http://permalink'; + $this->title = 'my&lt;' title'; + $this->guid = 'hey guid here'; + $this->guid_hash = 'df9a5f84e44bfe38cf44f6070d5b0250'; + $this->body = 'test]]>'; + $this->parsed_body = 'let the bodies hit the floor test'; + $this->pub = 23111; + $this->updated = 23444; + $this->author = new Author(); $this->author->setName('<boogieman'); - $this->enclosure = 'http://enclosure.you'; + $this->enclosure = 'http://enclosure.you'; $this->feed_title = '<a>&its a</a> title'; - $this->feed_link = 'http://tests'; + $this->feed_link = 'http://tests/'; $this->feed_image = '/an/image'; $this->web_favicon = 'http://anon.google.com'; $this->modified = $this->getMockBuilder('\DateTime')->getMock(); @@ -231,9 +233,9 @@ class FeedFetcherTest extends TestCase public function testFetchAccount() { - $this->__setUpReader('http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests'); + $this->__setUpReader('http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests/'); $item = $this->_createItem(); - $feed = $this->_createFeed('de-DE', false, 'http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests'); + $feed = $this->_createFeed('de-DE', false, 'http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests/'); $this->_mockIterator($this->feed_mock, [$this->item_mock]); $result = $this->fetcher->fetch($this->url, false, null, 'account@email.com', 'F9sEU*Rt%:KFK8HMHT&'); @@ -477,7 +479,7 @@ class FeedFetcherTest extends TestCase $item->setTitle('my<\' title'); $item->setGuid($this->guid); $item->setGuidHash($this->guid_hash); - $item->setBody($this->body); + $item->setBody($this->parsed_body); $item->setRtl(false); $item->setLastModified(3); $item->setPubDate(3); diff --git a/tests/Unit/Utility/ProxyConfigParserTest.php b/tests/Unit/Utility/ProxyConfigParserTest.php deleted file mode 100644 index 1433558e7..000000000 --- a/tests/Unit/Utility/ProxyConfigParserTest.php +++ /dev/null @@ -1,103 +0,0 @@ - - * @author Bernhard Posselt - * @copyright 2012 Alessandro Cosentino - * @copyright 2012-2014 Bernhard Posselt - */ - -namespace OCA\News\Tests\Unit\Utility; - - -use OCA\News\Utility\ProxyConfigParser; -use OCP\IConfig; -use PHPUnit\Framework\TestCase; - -class ProxyConfigParserTest extends TestCase -{ - - private $config; - private $feedService; - private $itemService; - private $parser; - - protected function setUp() - { - $this->config = $this->getMockBuilder(IConfig::class) - ->disableOriginalConstructor() - ->getMock(); - $this->parser = new ProxyConfigParser($this->config); - } - - private function setExpectedProxy($proxy=null, $userpasswd=null) - { - $this->config->expects($this->at(0)) - ->method('getSystemValue') - ->with($this->equalTo('proxy')) - ->will($this->returnValue($proxy)); - $this->config->expects($this->at(1)) - ->method('getSystemValue') - ->with($this->equalTo('proxyuserpwd')) - ->will($this->returnValue($userpasswd)); - } - - public function testParsesNoProxy() - { - $expected = [ - 'host' => null, - 'port' => null, - 'user' => null, - 'password' => null - ]; - $this->setExpectedProxy(); - $result = $this->parser->parse(); - $this->assertEquals($expected, $result); - } - - - public function testParsesHost() - { - $expected = [ - 'host' => 'http://google.com/mytest', - 'port' => null, - 'user' => null, - 'password' => null - ]; - $this->setExpectedProxy('http://google.com/mytest'); - $result = $this->parser->parse(); - $this->assertEquals($expected, $result); - } - - - public function testParsesHostAndPort() - { - $expected = [ - 'host' => 'http://google.com/mytest', - 'port' => 89, - 'user' => null, - 'password' => null - ]; - $this->setExpectedProxy('http://google.com:89/mytest'); - $result = $this->parser->parse(); - $this->assertEquals($expected, $result); - } - - - public function testParsesUser() - { - $expected = [ - 'host' => null, - 'port' => null, - 'user' => 'john', - 'password' => 'doe:hey' - ]; - $this->setExpectedProxy(null, 'john:doe:hey'); - $result = $this->parser->parse(); - $this->assertEquals($expected, $result); - } -} \ No newline at end of file