зеркало из https://github.com/nextcloud/news.git
Reimplement full-text scraping (#563)
Add readability.php scraper Fixes #482 Signed-off-by: Gioele Falcetti <thegio.f@gmail.com>
This commit is contained in:
Родитель
c2f617dd40
Коммит
6673cbc3d9
|
@ -49,7 +49,8 @@
|
|||
"arthurhoaro/favicon": "^1.2",
|
||||
"ext-json": "*",
|
||||
"ext-simplexml": "*",
|
||||
"ext-libxml": "*"
|
||||
"ext-libxml": "*",
|
||||
"andreskrey/readability.php": "^2.1"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^7.5",
|
||||
|
|
|
@ -4,8 +4,61 @@
|
|||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "2178133694c535deff7942447efde95d",
|
||||
"content-hash": "1df92ada3f365f720fb91d2f7232a91f",
|
||||
"packages": [
|
||||
{
|
||||
"name": "andreskrey/readability.php",
|
||||
"version": "v2.1.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/andreskrey/readability.php.git",
|
||||
"reference": "7617a912b6c527909168f5d41d263792f171c42a"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/andreskrey/readability.php/zipball/7617a912b6c527909168f5d41d263792f171c42a",
|
||||
"reference": "7617a912b6c527909168f5d41d263792f171c42a",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-dom": "*",
|
||||
"ext-mbstring": "*",
|
||||
"ext-xml": "*",
|
||||
"php": ">=7.0.0",
|
||||
"psr/log": "^1.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"monolog/monolog": "^1.24",
|
||||
"phpunit/phpunit": "^6.5"
|
||||
},
|
||||
"suggest": {
|
||||
"monolog/monolog": "Allow logging debug information"
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"andreskrey\\Readability\\": "src/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"Apache-2.0"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Andres Rey",
|
||||
"email": "andreskrey@gmail.com",
|
||||
"role": "Lead Developer"
|
||||
}
|
||||
],
|
||||
"description": "A PHP port of Readability.js",
|
||||
"homepage": "https://github.com/andreskrey/readability",
|
||||
"keywords": [
|
||||
"html",
|
||||
"readability"
|
||||
],
|
||||
"time": "2019-07-22T21:42:25+00:00"
|
||||
},
|
||||
{
|
||||
"name": "arthurhoaro/favicon",
|
||||
"version": "v1.2.2",
|
||||
|
|
|
@ -41,6 +41,7 @@ use OCA\News\Fetcher\FeedFetcher;
|
|||
use OCA\News\Fetcher\Fetcher;
|
||||
use OCA\News\Fetcher\YoutubeFetcher;
|
||||
use OCA\News\Utility\ProxyConfigParser;
|
||||
use OCA\News\Scraper\Scraper;
|
||||
|
||||
/**
|
||||
* Class Application
|
||||
|
@ -193,5 +194,14 @@ class Application extends App
|
|||
$fetcher->registerFetcher($c->query(FeedFetcher::class));
|
||||
return $fetcher;
|
||||
});
|
||||
|
||||
/**
|
||||
* Scrapers
|
||||
*/
|
||||
$container->registerService(Scraper::class, function (IContainer $c): Scraper {
|
||||
return new Scraper(
|
||||
$c->query(PsrLogger::class)
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,8 @@ class ShowFeed extends Command
|
|||
->setDescription('Prints a JSON string which represents the given feed as it would be in the DB.')
|
||||
->addArgument('feed', InputArgument::REQUIRED, 'Feed to parse')
|
||||
->addOption('user', 'u', InputOption::VALUE_OPTIONAL, 'Username for the feed')
|
||||
->addOption('password', 'p', InputOption::VALUE_OPTIONAL, 'Password for the feed');
|
||||
->addOption('password', 'p', InputOption::VALUE_OPTIONAL, 'Password for the feed')
|
||||
->addOption('full-text', 'f', InputOption::VALUE_NONE, 'Usa a scraper to get full text');
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output)
|
||||
|
@ -56,9 +57,10 @@ class ShowFeed extends Command
|
|||
$url = $input->getArgument('feed');
|
||||
$user = $input->getOption('user');
|
||||
$password = $input->getOption('password');
|
||||
$fullTextEnabled = (bool) $input->getOption('full-text');
|
||||
|
||||
try {
|
||||
list($feed, $items) = $this->feedFetcher->fetch($url, true, null, $user, $password);
|
||||
list($feed, $items) = $this->feedFetcher->fetch($url, true, null, $fullTextEnabled, $user, $password);
|
||||
$output->writeln("Feed: " . json_encode($feed, JSON_PRETTY_PRINT));
|
||||
$output->writeln("Items: " . json_encode($items, JSON_PRETTY_PRINT));
|
||||
} catch (\Throwable $ex) {
|
||||
|
|
|
@ -26,6 +26,7 @@ use OCP\IL10N;
|
|||
use OCA\News\Db\Item;
|
||||
use OCA\News\Db\Feed;
|
||||
use OCA\News\Utility\Time;
|
||||
use OCA\News\Scraper\Scraper;
|
||||
use SimpleXMLElement;
|
||||
|
||||
class FeedFetcher implements IFeedFetcher
|
||||
|
@ -36,14 +37,22 @@ class FeedFetcher implements IFeedFetcher
|
|||
private $l10n;
|
||||
private $time;
|
||||
private $logger;
|
||||
private $scraper;
|
||||
|
||||
public function __construct(FeedIo $fetcher, Favicon $favicon, IL10N $l10n, Time $time, PsrLogger $logger)
|
||||
{
|
||||
public function __construct(
|
||||
FeedIo $fetcher,
|
||||
Favicon $favicon,
|
||||
IL10N $l10n,
|
||||
Time $time,
|
||||
PsrLogger $logger,
|
||||
Scraper $scraper
|
||||
) {
|
||||
$this->reader = $fetcher;
|
||||
$this->faviconFactory = $favicon;
|
||||
$this->l10n = $l10n;
|
||||
$this->time = $time;
|
||||
$this->logger = $logger;
|
||||
$this->scraper = $scraper;
|
||||
}
|
||||
|
||||
|
||||
|
@ -65,7 +74,7 @@ class FeedFetcher implements IFeedFetcher
|
|||
*
|
||||
* @inheritdoc
|
||||
*/
|
||||
public function fetch(string $url, bool $favicon, $lastModified, $user, $password): array
|
||||
public function fetch(string $url, bool $favicon, $lastModified, bool $fullTextEnabled, $user, $password): array
|
||||
{
|
||||
$url2 = new Net_URL2($url);
|
||||
if (!empty($user) && !empty(trim($user))) {
|
||||
|
@ -99,12 +108,32 @@ class FeedFetcher implements IFeedFetcher
|
|||
);
|
||||
|
||||
$items = [];
|
||||
$RTL = $this->determineRtl($parsedFeed);
|
||||
$feedName = $parsedFeed->getTitle();
|
||||
$this->logger->debug('Feed {url} was modified since last fetch. #{count} items', [
|
||||
'url' => $url,
|
||||
'count' => count($parsedFeed),
|
||||
]);
|
||||
|
||||
foreach ($parsedFeed as $item) {
|
||||
$items[] = $this->buildItem($item, $parsedFeed);
|
||||
$body = null;
|
||||
$currRTL = $RTL;
|
||||
|
||||
// Scrape content if enabled
|
||||
if ($fullTextEnabled) {
|
||||
if ($this->scraper->scrape($item->getLink())) {
|
||||
$body = $this->scraper->getContent();
|
||||
$currRTL = $this->scraper->getRTL($currRTL);
|
||||
}
|
||||
}
|
||||
|
||||
$builtItem = $this->buildItem($item, $body, $currRTL);
|
||||
$this->logger->debug('Added item {title} for feed {feed} publishdate: {datetime}', [
|
||||
'title' => $builtItem->getTitle(),
|
||||
'feed' => $feedName,
|
||||
'datetime' => $builtItem->getLastModified(),
|
||||
]);
|
||||
$items[] = $builtItem;
|
||||
}
|
||||
|
||||
return [$feed, $items];
|
||||
|
@ -164,11 +193,12 @@ class FeedFetcher implements IFeedFetcher
|
|||
* Build an item based on a feed.
|
||||
*
|
||||
* @param ItemInterface $parsedItem The item to use
|
||||
* @param FeedInterface $parsedFeed The feed to use
|
||||
* @param string $body Text of the item, if not provided use description from $parsedItem
|
||||
* @param bool $RTL True if the feed is RTL (Right-to-left)
|
||||
*
|
||||
* @return Item
|
||||
*/
|
||||
protected function buildItem(ItemInterface $parsedItem, FeedInterface $parsedFeed): Item
|
||||
protected function buildItem(ItemInterface $parsedItem, string $body = null, bool $RTL = false): Item
|
||||
{
|
||||
$item = new Item();
|
||||
$item->setUnread(true);
|
||||
|
@ -188,7 +218,7 @@ class FeedFetcher implements IFeedFetcher
|
|||
$item->setPubDate($pubDT->getTimestamp());
|
||||
|
||||
$item->setLastModified($lastmodified->getTimestamp());
|
||||
$item->setRtl($this->determineRtl($parsedFeed));
|
||||
$item->setRtl($RTL);
|
||||
|
||||
// unescape content because angularjs helps against XSS
|
||||
$item->setTitle($this->decodeTwice($parsedItem->getTitle()));
|
||||
|
@ -197,8 +227,12 @@ class FeedFetcher implements IFeedFetcher
|
|||
$item->setAuthor($this->decodeTwice($author->getName()));
|
||||
}
|
||||
|
||||
// Use description from feed if body is not provided (by a scraper)
|
||||
if ($body === null) {
|
||||
$body = $parsedItem->getValue("content:encoded") ?? $parsedItem->getDescription();
|
||||
}
|
||||
|
||||
// purification is done in the service layer
|
||||
$body = $parsedItem->getValue("content:encoded") ?? $parsedItem->getDescription();
|
||||
$body = mb_convert_encoding(
|
||||
$body,
|
||||
'HTML-ENTITIES',
|
||||
|
@ -231,12 +265,6 @@ class FeedFetcher implements IFeedFetcher
|
|||
}
|
||||
|
||||
$item->generateSearchIndex();
|
||||
|
||||
$this->logger->debug('Added item {title} for feed {feed} publishdate: {datetime}', [
|
||||
'title' => $item->getTitle(),
|
||||
'feed' => $parsedFeed->getTitle(),
|
||||
'datetime' => $item->getLastModified(),
|
||||
]);
|
||||
return $item;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,17 +45,24 @@ class Fetcher
|
|||
*
|
||||
* @param string $url remote url of the feed
|
||||
* @param boolean $getFavicon if the favicon should also be fetched, defaults to true
|
||||
* @param string $lastModified a last modified value from an http header defaults to false.
|
||||
* @param string $lastModified a last modified value from an http header defaults to false.
|
||||
* If lastModified matches the http header from the feed no results are fetched
|
||||
* @param string $user if given, basic auth is set for this feed
|
||||
* @param string $password if given, basic auth is set for this feed. Ignored if user is empty
|
||||
* @param bool $fullTextEnabled If true use a scraper to download the full article
|
||||
* @param string $user if given, basic auth is set for this feed
|
||||
* @param string $password if given, basic auth is set for this feed. Ignored if user is empty
|
||||
*
|
||||
* @throws ReadErrorException if FeedIO fails
|
||||
* @return array an array containing the new feed and its items, first
|
||||
* element being the Feed and second element being an array of Items
|
||||
*/
|
||||
public function fetch($url, $getFavicon = true, $lastModified = null, $user = null, $password = null)
|
||||
{
|
||||
public function fetch(
|
||||
$url,
|
||||
$getFavicon = true,
|
||||
$lastModified = null,
|
||||
$fullTextEnabled = false,
|
||||
$user = null,
|
||||
$password = null
|
||||
) {
|
||||
foreach ($this->fetchers as $fetcher) {
|
||||
if (!$fetcher->canHandle($url)) {
|
||||
continue;
|
||||
|
@ -64,6 +71,7 @@ class Fetcher
|
|||
$url,
|
||||
$getFavicon,
|
||||
$lastModified,
|
||||
$fullTextEnabled,
|
||||
$user,
|
||||
$password
|
||||
);
|
||||
|
|
|
@ -25,6 +25,7 @@ interface IFeedFetcher
|
|||
* @param boolean $favicon if the favicon should also be fetched, defaults to true
|
||||
* @param string|null $lastModified a last modified value from an http header defaults to false.
|
||||
* If lastModified matches the http header from the feed no results are fetched
|
||||
* @param bool $fullTextEnabled If true use a scraper to download the full article
|
||||
* @param string|null $user if given, basic auth is set for this feed
|
||||
* @param string|null $password if given, basic auth is set for this feed. Ignored if user is empty
|
||||
*
|
||||
|
@ -32,7 +33,7 @@ interface IFeedFetcher
|
|||
* element being the Feed and second element being an array of Items
|
||||
* @throws ReadErrorException if the Feed-IO fetcher encounters a problem
|
||||
*/
|
||||
public function fetch(string $url, bool $favicon, $lastModified, $user, $password): array;
|
||||
public function fetch(string $url, bool $favicon, $lastModified, bool $fullTextEnabled, $user, $password): array;
|
||||
|
||||
/**
|
||||
* Can a fetcher handle a feed.
|
||||
|
|
|
@ -50,7 +50,7 @@ class YoutubeFetcher implements IFeedFetcher
|
|||
*
|
||||
* @inheritdoc
|
||||
*/
|
||||
public function fetch(string $url, bool $favicon, $lastModified, $user, $password): array
|
||||
public function fetch(string $url, bool $favicon, $lastModified, bool $fullTextEnabled, $user, $password): array
|
||||
{
|
||||
$transformedUrl = $this->buildUrl($url);
|
||||
|
||||
|
@ -58,6 +58,7 @@ class YoutubeFetcher implements IFeedFetcher
|
|||
$transformedUrl,
|
||||
$favicon,
|
||||
$lastModified,
|
||||
$fullTextEnabled,
|
||||
$user,
|
||||
$password
|
||||
);
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
<?php
|
||||
/**
|
||||
* Nextcloud - News
|
||||
*
|
||||
* This file is licensed under the Affero General Public License version 3 or
|
||||
* later. See the COPYING file.
|
||||
*
|
||||
* @author Gioele Falcetti <thegio.f@gmail.com>
|
||||
* @copyright 2019 Gioele Falcetti
|
||||
*/
|
||||
|
||||
namespace OCA\News\Scraper;
|
||||
|
||||
interface IScraper
|
||||
{
|
||||
/**
|
||||
* Scrape feed url
|
||||
*
|
||||
* @param string $url
|
||||
*
|
||||
* @return bool False if failed
|
||||
*
|
||||
*/
|
||||
public function scrape(string $url): bool;
|
||||
|
||||
/**
|
||||
* Get the scraped content
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
public function getContent(): string;
|
||||
|
||||
/**
|
||||
* Get the RTL (rigth-to-left) information
|
||||
*
|
||||
* @param bool $default Return this value if the scraper is unable to determine it
|
||||
*
|
||||
* @return bool
|
||||
*
|
||||
*/
|
||||
public function getRTL(bool $default = false): bool;
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
<?php
|
||||
/**
|
||||
* Nextcloud - News
|
||||
*
|
||||
* This file is licensed under the Affero General Public License version 3 or
|
||||
* later. See the COPYING file.
|
||||
*
|
||||
* @author Gioele Falcetti <thegio.f@gmail.com>
|
||||
* @copyright 2019 Gioele Falcetti
|
||||
*/
|
||||
|
||||
namespace OCA\News\Scraper;
|
||||
|
||||
use OCA\News\Utility\PsrLogger;
|
||||
|
||||
use andreskrey\Readability\Readability;
|
||||
use andreskrey\Readability\Configuration;
|
||||
use andreskrey\Readability\ParseException;
|
||||
|
||||
class Scraper implements IScraper
|
||||
{
|
||||
private $logger;
|
||||
private $config;
|
||||
private $readability;
|
||||
private $curl_opts;
|
||||
|
||||
public function __construct(PsrLogger $logger)
|
||||
{
|
||||
$this->logger = $logger;
|
||||
$this->config = new Configuration([
|
||||
'FixRelativeURLs' => true,
|
||||
'SummonCthulhu' => true, // Remove <script>
|
||||
]);
|
||||
$this->readability = null;
|
||||
|
||||
$this->curl_opts = array(
|
||||
CURLOPT_RETURNTRANSFER => true, // return web page
|
||||
CURLOPT_HEADER => false, // do not return headers
|
||||
CURLOPT_FOLLOWLOCATION => true, // follow redirects
|
||||
//CURLOPT_USERAGENT => "php-news", // who am i
|
||||
CURLOPT_AUTOREFERER => true, // set referer on redirect
|
||||
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
|
||||
CURLOPT_TIMEOUT => 120, // timeout on response
|
||||
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
|
||||
);
|
||||
}
|
||||
|
||||
private function getHTTPContent(string $url): array
|
||||
{
|
||||
$handler = curl_init($url);
|
||||
curl_setopt_array($handler, $this->curl_opts);
|
||||
$content = curl_exec($handler);
|
||||
$header = curl_getinfo($handler);
|
||||
curl_close($handler);
|
||||
|
||||
// Update the url after the redirects has been followed
|
||||
$url = $header['url'];
|
||||
return array($content, $header['url']);
|
||||
}
|
||||
|
||||
public function scrape(string $url): bool
|
||||
{
|
||||
list($content, $redirected_url) = $this->getHTTPContent($url);
|
||||
if ($content === false) {
|
||||
$this->logger->error('Unable to recive content from {url}', [
|
||||
'url' => $url,
|
||||
]);
|
||||
$this->readability = null;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update URL used to convert relative URLs
|
||||
$this->config->setOriginalURL($redirected_url);
|
||||
$this->readability = new Readability($this->config);
|
||||
|
||||
try {
|
||||
$this->readability->parse($content);
|
||||
} catch (ParseException $e) {
|
||||
$this->logger->error('Unable to parse content from {url}', [
|
||||
'url' => $url,
|
||||
]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public function getContent(): string
|
||||
{
|
||||
if ($this->readability === null) {
|
||||
return null;
|
||||
}
|
||||
return $this->readability->getContent();
|
||||
}
|
||||
|
||||
public function getRTL(bool $default = false): bool
|
||||
{
|
||||
if ($this->readability === null) {
|
||||
return $default;
|
||||
}
|
||||
|
||||
$RTL = $this->readability->getDirection();
|
||||
if ($RTL === null) {
|
||||
return $default;
|
||||
}
|
||||
return $RTL === "rtl";
|
||||
}
|
||||
}
|
|
@ -113,7 +113,7 @@ class FeedService extends Service
|
|||
* @var Feed $feed
|
||||
* @var Item[] $items
|
||||
*/
|
||||
list($feed, $items) = $this->feedFetcher->fetch($feedUrl, true, null, $user, $password);
|
||||
list($feed, $items) = $this->feedFetcher->fetch($feedUrl, true, null, false, $user, $password);
|
||||
// try again if feed exists depending on the reported link
|
||||
try {
|
||||
$hash = $feed->getUrlHash();
|
||||
|
@ -224,6 +224,7 @@ class FeedService extends Service
|
|||
$location,
|
||||
false,
|
||||
$existingFeed->getHttpLastModified(),
|
||||
$existingFeed->getFullTextEnabled(),
|
||||
$existingFeed->getBasicAuthUser(),
|
||||
$existingFeed->getBasicAuthPassword()
|
||||
);
|
||||
|
|
|
@ -21,6 +21,7 @@ use FeedIo\FeedInterface;
|
|||
use OC\L10N\L10N;
|
||||
use \OCA\News\Db\Feed;
|
||||
use \OCA\News\Db\Item;
|
||||
use OCA\News\Scraper\Scraper;
|
||||
use OCA\News\Fetcher\FeedFetcher;
|
||||
use OCA\News\Utility\PsrLogger;
|
||||
|
||||
|
@ -163,12 +164,16 @@ class FeedFetcherTest extends TestCase
|
|||
$this->logger = $this->getMockBuilder(PsrLogger::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$this->scraper = $this->getMockBuilder(Scraper::class)
|
||||
->disableOriginalConstructor()
|
||||
->getMock();
|
||||
$this->fetcher = new FeedFetcher(
|
||||
$this->reader,
|
||||
$this->favicon,
|
||||
$this->l10n,
|
||||
$timeFactory,
|
||||
$this->logger
|
||||
$this->logger,
|
||||
$this->scraper
|
||||
);
|
||||
$this->url = 'http://tests/';
|
||||
|
||||
|
@ -220,7 +225,7 @@ class FeedFetcherTest extends TestCase
|
|||
->with(
|
||||
'Feed {url} was not modified since last fetch. old: {old}, new: {new}'
|
||||
);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@0', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@0', false, null, null);
|
||||
|
||||
$this->assertSame([null, []], $result);
|
||||
}
|
||||
|
@ -234,7 +239,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem();
|
||||
$feed = $this->createFeed();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '0', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '0', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -285,7 +290,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem();
|
||||
$feed = $this->createFeed();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '0', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '0', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
|
||||
|
@ -302,7 +307,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem();
|
||||
$feed = $this->createFeed();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -316,7 +321,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem();
|
||||
$feed = $this->createFeed('de-DE', false, 'http://account%40email.com:F9sEU%2ARt%25%3AKFK8HMHT%26@tests/');
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', 'account@email.com', 'F9sEU*Rt%:KFK8HMHT&');
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', false, 'account@email.com', 'F9sEU*Rt%:KFK8HMHT&');
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -330,7 +335,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem('audio/ogg');
|
||||
$feed = $this->createFeed();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -344,7 +349,7 @@ class FeedFetcherTest extends TestCase
|
|||
$item = $this->createItem('video/ogg');
|
||||
$feed = $this->createFeed();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -359,7 +364,7 @@ class FeedFetcherTest extends TestCase
|
|||
$feed = $this->createFeed('de-DE', true);
|
||||
$item = $this->createItem();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, true, '@1553118393', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, true, '@1553118393', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -378,7 +383,7 @@ class FeedFetcherTest extends TestCase
|
|||
|
||||
$item = $this->createItem();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
$result = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
|
||||
$this->assertEquals([$feed, [$item]], $result);
|
||||
}
|
||||
|
@ -392,7 +397,7 @@ class FeedFetcherTest extends TestCase
|
|||
$this->createFeed('he-IL');
|
||||
$this->createItem();
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
$this->assertTrue($items[0]->getRtl());
|
||||
}
|
||||
|
||||
|
@ -418,7 +423,7 @@ class FeedFetcherTest extends TestCase
|
|||
|
||||
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
$this->assertSame($items[0]->getPubDate(), 1522180229);
|
||||
}
|
||||
|
||||
|
@ -444,7 +449,7 @@ class FeedFetcherTest extends TestCase
|
|||
|
||||
|
||||
$this->mockIterator($this->feed_mock, [$this->item_mock]);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', null, null);
|
||||
list($feed, $items) = $this->fetcher->fetch($this->url, false, '@1553118393', false, null, null);
|
||||
$this->assertSame($items[0]->getPubDate(), 1519761029);
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ class YoutubeFetcherTest extends TestCase
|
|||
*/
|
||||
private $feedFetcher;
|
||||
|
||||
public function setUp()
|
||||
public function setUp()
|
||||
{
|
||||
$this->feedFetcher = $this->getMockBuilder(FeedFetcher::class)
|
||||
->disableOriginalConstructor()
|
||||
|
@ -44,26 +44,27 @@ class YoutubeFetcherTest extends TestCase
|
|||
}
|
||||
|
||||
|
||||
public function testCanHandleFails()
|
||||
public function testCanHandleFails()
|
||||
{
|
||||
$url = 'http://youtube.com';
|
||||
$this->assertFalse($this->fetcher->canHandle($url));
|
||||
}
|
||||
|
||||
|
||||
public function testCanHandle()
|
||||
public function testCanHandle()
|
||||
{
|
||||
$url = 'http://youtube.com/test/?test=a&list=b&b=c';
|
||||
$this->assertTrue($this->fetcher->canHandle($url));
|
||||
}
|
||||
|
||||
|
||||
public function testPlaylistUrl()
|
||||
public function testPlaylistUrl()
|
||||
{
|
||||
$url = 'http://youtube.com/something/weird?a=b&list=sobo3&c=1';
|
||||
$transformedUrl = 'http://gdata.youtube.com/feeds/api/playlists/sobo3';
|
||||
$favicon = true;
|
||||
$modified = 3;
|
||||
$fullTextEnabled = false;
|
||||
$user = 5;
|
||||
$password = 5;
|
||||
$feed = new Feed();
|
||||
|
@ -76,13 +77,14 @@ class YoutubeFetcherTest extends TestCase
|
|||
$this->equalTo($transformedUrl),
|
||||
$this->equalTo($favicon),
|
||||
$this->equalTo($modified),
|
||||
$this->equalTo($fullTextEnabled),
|
||||
$this->equalTo($user)
|
||||
)
|
||||
->will($this->returnValue($result));
|
||||
$feed = $this->fetcher->fetch($url, $favicon, $modified, $user, $password);
|
||||
$feed = $this->fetcher->fetch($url, $favicon, $modified, $fullTextEnabled, $user, $password);
|
||||
|
||||
$this->assertEquals($url, $result[0]->getUrl());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -657,7 +657,8 @@ class FeedServiceTest extends TestCase
|
|||
->with(
|
||||
$this->equalTo($feed->getUrl()),
|
||||
$this->equalTo(false),
|
||||
$this->equalTo($feed->getHttpLastModified())
|
||||
$this->equalTo($feed->getHttpLastModified()),
|
||||
$this->equalTo($feed->getFullTextEnabled())
|
||||
)
|
||||
->will($this->throwException($ex));
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче