news/fetcher/feedfetcher.php

208 строки
7.0 KiB
PHP
Исходник Обычный вид История

2012-05-28 22:27:18 +04:00
<?php
/**
* ownCloud - News
*
* This file is licensed under the Affero General Public License version 3 or
* later. See the COPYING file.
*
* @author Alessandro Cosentino <cosenal@gmail.com>
* @author Bernhard Posselt <dev@bernhard-posselt.com>
* @copyright Alessandro Cosentino 2012
* @copyright Bernhard Posselt 2012, 2014
*/
2012-05-28 22:27:18 +04:00
namespace OCA\News\Fetcher;
use \PicoFeed\Parser\MalFormedXmlException;
use \PicoFeed\Reader\Reader;
use \PicoFeed\Reader\SubscriptionNotFoundException;
use \PicoFeed\Reader\UnsupportedFeedFormatException;
use \PicoFeed\Client\InvalidCertificateException;
use \PicoFeed\Client\InvalidUrlException;
use \PicoFeed\Client\MaxRedirectException;
use \PicoFeed\Client\MaxSizeException;
use \PicoFeed\Client\TimeoutException;
use \OCP\IL10N;
use \OCA\News\Db\Item;
use \OCA\News\Db\Feed;
use \OCA\News\Utility\PicoFeedFaviconFactory;
use \OCA\News\Utility\PicoFeedReaderFactory;
2013-04-02 13:09:33 +04:00
class FeedFetcher implements IFeedFetcher {
private $faviconFactory;
private $reader;
private $l10n;
private $time;
2014-10-22 03:35:59 +04:00
public function __construct(Reader $reader,
PicoFeedFaviconFactory $faviconFactory,
IL10N $l10n,
$time){
$this->faviconFactory = $faviconFactory;
$this->reader = $reader;
$this->time = $time;
$this->l10n = $l10n;
}
/**
* This fetcher handles all the remaining urls therefore always returns true
*/
public function canHandle($url){
return true;
}
/**
* Fetch a feed from remote
* @param string $url remote url of the feed
* @param boolean $getFavicon if the favicon should also be fetched,
* defaults to true
2014-10-22 13:06:43 +04:00
* @param string $lastModified a last modified value from an http header
* defaults to false. If lastModified matches the http header from the feed
* no results are fetched
* @param string $etag an etag from an http header.
* If lastModified matches the http header from the feed
* no results are fetched
2014-11-05 13:58:59 +03:00
* @throws FetcherException if it fails
* @return array an array containing the new feed and its items, first
* element being the Feed and second element being an array of Items
*/
2014-10-22 13:06:43 +04:00
public function fetch($url, $getFavicon=true, $lastModified=null,
$etag=null) {
try {
$resource = $this->reader->discover($url, $lastModified, $etag);
2014-10-22 12:49:34 +04:00
if (!$resource->isModified()) {
return [null, null];
}
$location = $resource->getUrl();
$etag = $resource->getEtag();
$content = $resource->getContent();
$encoding = $resource->getEncoding();
$lastModified = $resource->getLastModified();
2014-10-22 13:35:12 +04:00
$parser = $this->reader->getParser($location, $content, $encoding);
2014-10-22 03:35:59 +04:00
$parsedFeed = $parser->execute();
$feed = $this->buildFeed(
$parsedFeed, $url, $getFavicon, $lastModified, $etag, $location
);
$items = [];
2014-10-22 03:35:59 +04:00
foreach($parsedFeed->getItems() as $item) {
2014-10-22 12:49:34 +04:00
$items[] = $this->buildItem($item);
}
return [$feed, $items];
} catch(\Exception $ex){
$msg = $ex->getMessage();
if ($ex instanceof MalFormedXmlException) {
$msg = $this->l10n->t('Feed contains invalid XML');
} else if ($ex instanceof SubscriptionNotFoundException) {
$msg = $this->l10n->t('Could not find a feed');
} else if ($ex instanceof UnsupportedFeedFormatException) {
$msg = $this->l10n->t('Detected feed format is not supported');
} else if ($ex instanceof InvalidCertificateException) {
$msg = $this->l10n->t('SSL Certificate is invalid');
} else if ($ex instanceof InvalidUrlException) {
$msg = $this->l10n->t('Website not found');
} else if ($ex instanceof MaxRedirectException) {
$msg = $this->l10n->t('More redirects than allowed, aborting');
} else if ($ex instanceof MaxSizeException) {
$msg = $this->l10n->t('Bigger than maximum allowed size');
} else if ($ex instanceof TimeoutException) {
$msg = $this->l10n->t('Request timed out');
}
throw new FetcherException($msg);
}
}
private function decodeTwice($string) {
// behold! &apos; is not converted by PHP that's why we need to do it
// manually (TM)
return str_replace('&apos;', '\'',
html_entity_decode(
html_entity_decode(
$string, ENT_QUOTES, 'UTF-8'
),
ENT_QUOTES, 'UTF-8'
)
);
}
2014-10-22 12:49:34 +04:00
protected function buildItem($parsedItem) {
$item = new Item();
$item->setUnread();
2014-10-30 13:02:32 +03:00
$item->setUrl($parsedItem->getUrl());
2014-10-30 13:30:12 +03:00
$item->setGuid($parsedItem->getId());
$item->setGuidHash($item->getGuid());
2014-10-30 13:30:12 +03:00
$item->setPubDate($parsedItem->getDate());
$item->setLastModified($this->time->getTime());
// unescape content because angularjs helps against XSS
2014-10-22 03:35:59 +04:00
$item->setTitle($this->decodeTwice($parsedItem->getTitle()));
2014-10-30 13:30:12 +03:00
$item->setAuthor($this->decodeTwice($parsedItem->getAuthor()));
// purification is done in the service layer
2014-10-22 16:26:43 +04:00
$body = $parsedItem->getContent();
2014-10-23 16:00:35 +04:00
$body = mb_convert_encoding($body, 'HTML-ENTITIES',
mb_detect_encoding($body));
2014-10-22 16:26:43 +04:00
$item->setBody($body);
2014-10-22 03:35:59 +04:00
$enclosureUrl = $parsedItem->getEnclosureUrl();
if($enclosureUrl) {
$enclosureType = $parsedItem->getEnclosureType();
if(stripos($enclosureType, 'audio/') !== false ||
stripos($enclosureType, 'video/') !== false) {
$item->setEnclosureMime($enclosureType);
2014-10-22 03:35:59 +04:00
$item->setEnclosureLink($enclosureUrl);
}
}
return $item;
}
2014-10-22 13:06:43 +04:00
protected function buildFeed($parsedFeed, $url, $getFavicon, $modified,
2014-10-30 13:30:12 +03:00
$etag, $location) {
$feed = new Feed();
2014-10-30 13:30:12 +03:00
$link = $parsedFeed->getUrl();
2014-10-30 13:30:12 +03:00
if (!$link) {
$link = $location;
}
2014-10-30 13:30:12 +03:00
// unescape content because angularjs helps against XSS
$title = strip_tags($this->decodeTwice($parsedFeed->getTitle()));
$feed->setTitle($title);
2014-10-30 13:30:12 +03:00
$feed->setUrl($url); // the url used to add the feed
$feed->setLocation($location); // the url where the feed was found
$feed->setLink($link); // <link> attribute in the feed
2014-10-22 13:06:43 +04:00
$feed->setLastModified($modified);
$feed->setEtag($etag);
$feed->setAdded($this->time->getTime());
if ($getFavicon) {
$faviconFetcher = $this->faviconFactory->build();
$favicon = $faviconFetcher->find($feed->getLink());
$feed->setFaviconLink($favicon);
}
return $feed;
}
2013-04-18 17:56:12 +04:00
}