2012-05-28 22:27:18 +04:00
|
|
|
<?php
|
2013-03-22 14:18:16 +04:00
|
|
|
|
2012-05-28 22:27:18 +04:00
|
|
|
/**
|
2013-03-22 14:18:16 +04:00
|
|
|
* ownCloud - News
|
2012-05-28 22:27:18 +04:00
|
|
|
*
|
|
|
|
* @author Alessandro Cosentino
|
2013-03-22 14:18:16 +04:00
|
|
|
* @author Bernhard Posselt
|
|
|
|
* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
|
|
|
|
* @copyright 2012 Bernhard Posselt nukeawhale@gmail.com
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 3 of the License, or any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
|
2012-07-21 18:45:37 +04:00
|
|
|
*
|
2013-03-22 14:18:16 +04:00
|
|
|
* You should have received a copy of the GNU Affero General Public
|
|
|
|
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
2012-07-21 18:45:37 +04:00
|
|
|
*
|
2012-05-28 22:27:18 +04:00
|
|
|
*/
|
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
namespace OCA\News\Utility;
|
2012-08-11 20:19:37 +04:00
|
|
|
|
2013-04-02 12:41:01 +04:00
|
|
|
use \OCA\AppFramework\Core\API;
|
2013-04-17 20:29:01 +04:00
|
|
|
use \OCA\AppFramework\Utility\FaviconFetcher;
|
2013-04-18 17:56:12 +04:00
|
|
|
use \OCA\AppFramework\Utility\SimplePieAPIFactory;
|
|
|
|
use \OCA\AppFramework\Utility\TimeFactory;
|
2013-04-02 12:41:01 +04:00
|
|
|
|
2013-03-26 21:04:02 +04:00
|
|
|
use \OCA\News\Db\Item;
|
|
|
|
use \OCA\News\Db\Feed;
|
|
|
|
|
2012-07-03 07:39:19 +04:00
|
|
|
|
2013-04-02 13:09:33 +04:00
|
|
|
class FeedFetcher implements IFeedFetcher {
|
2012-08-16 23:34:41 +04:00
|
|
|
|
2013-04-02 12:41:01 +04:00
|
|
|
private $api;
|
2013-04-12 02:33:08 +04:00
|
|
|
private $cacheDirectory;
|
|
|
|
private $cacheDuration;
|
2013-04-17 20:29:01 +04:00
|
|
|
private $faviconFetcher;
|
2013-04-18 17:56:12 +04:00
|
|
|
private $simplePieFactory;
|
|
|
|
private $time;
|
2013-05-04 02:15:41 +04:00
|
|
|
private $purifier;
|
2013-04-18 17:56:12 +04:00
|
|
|
|
|
|
|
public function __construct(API $api,
|
|
|
|
SimplePieAPIFactory $simplePieFactory,
|
|
|
|
FaviconFetcher $faviconFetcher,
|
|
|
|
TimeFactory $time,
|
|
|
|
$cacheDirectory,
|
2013-05-04 02:15:41 +04:00
|
|
|
$cacheDuration,
|
|
|
|
$purifier){
|
2013-04-02 12:41:01 +04:00
|
|
|
$this->api = $api;
|
2013-04-12 02:33:08 +04:00
|
|
|
$this->cacheDirectory = $cacheDirectory;
|
|
|
|
$this->cacheDuration = $cacheDuration;
|
2013-04-17 20:29:01 +04:00
|
|
|
$this->faviconFetcher = $faviconFetcher;
|
2013-04-18 17:56:12 +04:00
|
|
|
$this->simplePieFactory = $simplePieFactory;
|
|
|
|
$this->time = $time;
|
2013-05-04 02:15:41 +04:00
|
|
|
$this->purifier = $purifier;
|
2013-04-02 12:41:01 +04:00
|
|
|
}
|
|
|
|
|
2012-08-16 23:34:41 +04:00
|
|
|
|
2013-04-17 20:29:01 +04:00
|
|
|
/**
|
|
|
|
* This fetcher handles all the remaining urls therefore always returns true
|
|
|
|
*/
|
2013-04-02 13:09:33 +04:00
|
|
|
public function canHandle($url){
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-28 22:27:18 +04:00
|
|
|
/**
|
2013-03-22 14:40:15 +04:00
|
|
|
* Fetch a feed from remote
|
|
|
|
* @param string url remote url of the feed
|
|
|
|
* @throws FetcherException if simple pie fails
|
2013-03-22 15:35:30 +04:00
|
|
|
* @return array an array containing the new feed and its items
|
2012-05-28 22:27:18 +04:00
|
|
|
*/
|
2013-03-22 14:18:16 +04:00
|
|
|
public function fetch($url) {
|
2013-04-18 17:56:12 +04:00
|
|
|
$simplePie = $this->simplePieFactory->getCore();
|
|
|
|
$simplePie->set_feed_url($url);
|
2013-04-14 00:48:31 +04:00
|
|
|
$simplePie->enable_cache(true);
|
2013-04-12 02:33:08 +04:00
|
|
|
$simplePie->set_cache_location($this->cacheDirectory);
|
|
|
|
$simplePie->set_cache_duration($this->cacheDuration);
|
2013-03-22 15:35:30 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
if (!$simplePie->init()) {
|
2013-03-22 15:35:30 +04:00
|
|
|
throw new FetcherException('Could not initialize simple pie');
|
2012-07-30 22:08:36 +04:00
|
|
|
}
|
2012-08-02 20:52:33 +04:00
|
|
|
|
2013-04-18 17:56:12 +04:00
|
|
|
|
2012-10-23 22:54:55 +04:00
|
|
|
try {
|
2013-04-18 17:56:12 +04:00
|
|
|
// somehow $simplePie turns into a feed after init
|
2012-10-23 22:54:55 +04:00
|
|
|
$items = array();
|
2013-03-22 14:40:15 +04:00
|
|
|
if ($feedItems = $simplePie->get_items()) {
|
|
|
|
foreach($feedItems as $feedItem) {
|
2013-04-18 17:56:12 +04:00
|
|
|
array_push($items, $this->buildItem($feedItem));
|
2012-08-15 20:27:32 +04:00
|
|
|
}
|
2012-07-30 23:38:58 +04:00
|
|
|
}
|
2012-08-02 20:52:33 +04:00
|
|
|
|
2013-04-18 17:56:12 +04:00
|
|
|
$feed = $this->buildFeed($simplePie, $url);
|
2013-04-14 00:48:31 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
return array($feed, $items);
|
2013-04-14 00:48:31 +04:00
|
|
|
|
2013-03-22 15:35:30 +04:00
|
|
|
} catch(\Exception $ex){
|
2013-03-22 14:40:15 +04:00
|
|
|
throw new FetcherException($ex->getMessage());
|
2012-07-23 23:02:49 +04:00
|
|
|
}
|
2013-03-22 14:40:15 +04:00
|
|
|
|
2012-07-23 20:43:54 +04:00
|
|
|
}
|
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
|
2013-04-18 17:56:12 +04:00
|
|
|
protected function buildItem($simplePieItem) {
|
|
|
|
$item = new Item();
|
|
|
|
$item->setStatus(0);
|
|
|
|
$item->setUnread();
|
2013-05-25 05:28:51 +04:00
|
|
|
$item->setUrl(html_entity_decode($simplePieItem->get_permalink(),
|
|
|
|
ENT_COMPAT, 'UTF-8'));
|
2013-04-29 15:25:04 +04:00
|
|
|
// unescape content because angularjs helps against XSS
|
2013-05-25 05:28:51 +04:00
|
|
|
$item->setTitle(html_entity_decode($simplePieItem->get_title(),
|
|
|
|
ENT_COMPAT, 'UTF-8'));
|
2013-04-18 17:56:12 +04:00
|
|
|
$guid = $simplePieItem->get_id();
|
|
|
|
$item->setGuid($guid);
|
|
|
|
$item->setGuidHash(md5($guid));
|
2013-05-02 22:01:35 +04:00
|
|
|
$item->setBody(str_replace('<a', '<a target="_blank"',
|
2013-05-04 02:15:41 +04:00
|
|
|
// escape XSS
|
|
|
|
$this->purifier->purify($simplePieItem->get_content())));
|
2013-04-18 17:56:12 +04:00
|
|
|
$item->setPubDate($simplePieItem->get_date('U'));
|
|
|
|
$item->setLastModified($this->time->getTime());
|
|
|
|
|
|
|
|
$author = $simplePieItem->get_author();
|
|
|
|
if ($author !== null) {
|
2013-04-28 19:11:15 +04:00
|
|
|
$name = html_entity_decode($author->get_name(),
|
2013-04-28 19:27:59 +04:00
|
|
|
ENT_COMPAT, 'UTF-8' );
|
2013-04-22 12:14:35 +04:00
|
|
|
if ($name) {
|
|
|
|
$item->setAuthor($name);
|
2013-04-22 02:41:47 +04:00
|
|
|
} else {
|
2013-04-28 19:11:15 +04:00
|
|
|
$item->setAuthor(html_entity_decode($author->get_email()),
|
2013-04-28 19:27:59 +04:00
|
|
|
ENT_COMPAT, 'UTF-8' );
|
2013-04-22 02:41:47 +04:00
|
|
|
}
|
2013-04-18 17:56:12 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: make it work for video files also
|
|
|
|
$enclosure = $simplePieItem->get_enclosure();
|
|
|
|
if($enclosure !== null) {
|
|
|
|
$enclosureType = $enclosure->get_type();
|
|
|
|
if(stripos($enclosureType, "audio/") !== false) {
|
|
|
|
$item->setEnclosureMime($enclosureType);
|
|
|
|
$item->setEnclosureLink($enclosure->get_link());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $item;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
protected function buildFeed($simplePieFeed, $url) {
|
|
|
|
$feed = new Feed();
|
|
|
|
|
2013-04-29 15:25:04 +04:00
|
|
|
// unescape content because angularjs helps against XSS
|
2013-04-28 19:11:15 +04:00
|
|
|
$title = html_entity_decode($simplePieFeed->get_title(),
|
2013-04-28 19:27:59 +04:00
|
|
|
ENT_COMPAT, 'UTF-8' );
|
2013-04-21 15:53:22 +04:00
|
|
|
|
|
|
|
// if there is no title use the url
|
|
|
|
if(!$title) {
|
|
|
|
$title = $url;
|
|
|
|
}
|
|
|
|
|
|
|
|
$feed->setTitle($title);
|
2013-04-18 17:56:12 +04:00
|
|
|
$feed->setUrl($url);
|
|
|
|
$feed->setLink($simplePieFeed->get_link());
|
|
|
|
$feed->setUrlHash(md5($url));
|
|
|
|
$feed->setAdded($this->time->getTime());
|
|
|
|
|
2013-04-27 16:00:52 +04:00
|
|
|
// use the favicon from the page first since most feeds use a weird image
|
|
|
|
$favicon = $this->faviconFetcher->fetch($feed->getLink());
|
|
|
|
|
|
|
|
if (!$favicon) {
|
|
|
|
$favicon = $simplePieFeed->get_image_url();
|
2013-04-18 17:56:12 +04:00
|
|
|
}
|
2013-04-27 16:00:52 +04:00
|
|
|
|
|
|
|
$feed->setFaviconLink($favicon);
|
2013-04-18 17:56:12 +04:00
|
|
|
|
|
|
|
return $feed;
|
|
|
|
}
|
|
|
|
|
2013-04-22 02:41:47 +04:00
|
|
|
}
|