2012-05-28 22:27:18 +04:00
|
|
|
<?php
|
2013-03-22 14:18:16 +04:00
|
|
|
|
2012-05-28 22:27:18 +04:00
|
|
|
/**
|
2013-03-22 14:18:16 +04:00
|
|
|
* ownCloud - News
|
2012-05-28 22:27:18 +04:00
|
|
|
*
|
|
|
|
* @author Alessandro Cosentino
|
2013-03-22 14:18:16 +04:00
|
|
|
* @author Bernhard Posselt
|
|
|
|
* @copyright 2012 Alessandro Cosentino cosenal@gmail.com
|
|
|
|
* @copyright 2012 Bernhard Posselt nukeawhale@gmail.com
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 3 of the License, or any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
|
2012-07-21 18:45:37 +04:00
|
|
|
*
|
2013-03-22 14:18:16 +04:00
|
|
|
* You should have received a copy of the GNU Affero General Public
|
|
|
|
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
|
2012-07-21 18:45:37 +04:00
|
|
|
*
|
2012-05-28 22:27:18 +04:00
|
|
|
*/
|
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
namespace OCA\News\Utility;
|
2012-08-11 20:19:37 +04:00
|
|
|
|
2013-03-26 21:04:02 +04:00
|
|
|
use \OCA\News\Db\Item;
|
|
|
|
use \OCA\News\Db\Feed;
|
|
|
|
|
2012-07-03 07:39:19 +04:00
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
class FeedFetcher {
|
2012-08-16 23:34:41 +04:00
|
|
|
|
|
|
|
|
2012-05-28 22:27:18 +04:00
|
|
|
/**
|
2013-03-22 14:40:15 +04:00
|
|
|
* Fetch a feed from remote
|
|
|
|
* @param string url remote url of the feed
|
|
|
|
* @throws FetcherException if simple pie fails
|
2013-03-22 15:35:30 +04:00
|
|
|
* @return array an array containing the new feed and its items
|
2012-05-28 22:27:18 +04:00
|
|
|
*/
|
2013-03-22 14:18:16 +04:00
|
|
|
public function fetch($url) {
|
2013-03-22 14:40:15 +04:00
|
|
|
$simplePie = new \SimplePie_Core();
|
|
|
|
$simplePie->set_feed_url( $url );
|
|
|
|
$simplePie->enable_cache( false );
|
2013-03-22 15:35:30 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
if (!$simplePie->init()) {
|
2013-03-22 15:35:30 +04:00
|
|
|
throw new FetcherException('Could not initialize simple pie');
|
2012-07-30 22:08:36 +04:00
|
|
|
}
|
2012-08-02 20:52:33 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
// temporary try-catch to bypass SimplePie bugs
|
2012-10-23 22:54:55 +04:00
|
|
|
try {
|
2013-03-22 14:40:15 +04:00
|
|
|
$simplePie->handle_content_type();
|
2012-10-23 22:54:55 +04:00
|
|
|
|
|
|
|
$items = array();
|
2013-03-22 14:40:15 +04:00
|
|
|
if ($feedItems = $simplePie->get_items()) {
|
|
|
|
foreach($feedItems as $feedItem) {
|
|
|
|
$item = new Item();
|
2013-03-26 21:04:02 +04:00
|
|
|
$item->setStatus(0);
|
|
|
|
$item->setUnread();
|
2013-03-22 14:40:15 +04:00
|
|
|
$item->setUrl( $feedItem->get_permalink() );
|
|
|
|
$item->setTitle( $feedItem->get_title() );
|
|
|
|
$item->setGuid( $feedItem->get_id() );
|
2013-03-25 14:48:15 +04:00
|
|
|
$item->setGuidHash( md5($feedItem->get_id()) );
|
2013-03-22 14:40:15 +04:00
|
|
|
$item->setBody( $feedItem->get_content() );
|
2013-03-26 21:04:02 +04:00
|
|
|
$item->setPubDate( $feedItem->get_date('U') );
|
|
|
|
$item->setLastModified(time());
|
2013-03-22 14:40:15 +04:00
|
|
|
|
|
|
|
$author = $feedItem->get_author();
|
|
|
|
if ($author !== null) {
|
|
|
|
$item->setAuthor( $author->get_name() );
|
2012-10-23 22:54:55 +04:00
|
|
|
}
|
|
|
|
|
2013-03-22 15:35:30 +04:00
|
|
|
// TODO: make it work for video files also
|
2013-03-22 14:40:15 +04:00
|
|
|
$enclosure = $feedItem->get_enclosure();
|
|
|
|
if($enclosure !== null) {
|
|
|
|
$enclosureType = $enclosure->get_type();
|
|
|
|
if(stripos($enclosureType, "audio/") !== false) {
|
|
|
|
$enclosure->setEnclosureMime($enclosureType);
|
|
|
|
$enclosure->setEnclosureLink($enclosure->get_link());
|
2012-11-29 02:57:07 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
array_push($items, $item);
|
2012-08-15 20:27:32 +04:00
|
|
|
}
|
2012-07-30 23:38:58 +04:00
|
|
|
}
|
2012-08-02 20:52:33 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
$feed = new Feed();
|
|
|
|
$feed->setTitle( $simplePie->get_title());
|
|
|
|
$feed->setUrl($url);
|
|
|
|
$feed->setUrlHash(md5($url));
|
|
|
|
$feed->setAdded(time());
|
2012-07-21 18:45:37 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
$favicon = $simplePie->get_image_url();
|
2012-08-21 03:50:53 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
if ($favicon !== null && $this->checkFavicon($favicon)) {
|
|
|
|
$feed->setFaviconLink($favicon);
|
|
|
|
|
|
|
|
} else {
|
2013-03-22 14:18:16 +04:00
|
|
|
$webFavicon = $this->discoverFavicon($url);
|
2012-10-23 22:54:55 +04:00
|
|
|
if ($webFavicon !== null) {
|
2013-03-22 14:40:15 +04:00
|
|
|
$feed->setFaviconLink($webFavicon);
|
2012-10-23 22:54:55 +04:00
|
|
|
}
|
|
|
|
}
|
2013-03-22 14:40:15 +04:00
|
|
|
return array($feed, $items);
|
2013-03-22 15:35:30 +04:00
|
|
|
} catch(\Exception $ex){
|
2013-03-22 14:40:15 +04:00
|
|
|
throw new FetcherException($ex->getMessage());
|
2012-07-23 23:02:49 +04:00
|
|
|
}
|
2013-03-22 14:40:15 +04:00
|
|
|
|
2012-07-23 20:43:54 +04:00
|
|
|
}
|
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
public function checkFavicon($favicon) {
|
2012-10-14 23:15:16 +04:00
|
|
|
if ($favicon === null || $favicon == false)
|
2012-08-21 03:50:53 +04:00
|
|
|
return false;
|
|
|
|
|
2012-08-11 20:19:37 +04:00
|
|
|
$file = new \SimplePie_File($favicon);
|
2012-07-23 23:56:58 +04:00
|
|
|
// size in bytes
|
|
|
|
$filesize = strlen($file->body);
|
2012-07-23 20:43:54 +04:00
|
|
|
|
2012-08-19 21:49:58 +04:00
|
|
|
if($file->success && $filesize > 0 && $filesize < 50000) { //bigger files are not considered favicons
|
2012-08-11 20:19:37 +04:00
|
|
|
$sniffer = new \SimplePie_Content_Type_Sniffer($file);
|
2012-07-23 20:43:54 +04:00
|
|
|
if(substr($sniffer->get_type(), 0, 6) === 'image/') {
|
2013-03-26 21:04:02 +04:00
|
|
|
$imgsize = @getimagesize($favicon);
|
|
|
|
if ($imgsize && $imgsize['0'] <= 32 && $imgsize['1'] <= 32) { //bigger images are not considered favicons
|
2012-08-19 21:49:58 +04:00
|
|
|
return true;
|
|
|
|
}
|
2012-07-23 20:43:54 +04:00
|
|
|
}
|
2012-07-18 01:37:54 +04:00
|
|
|
}
|
2012-07-23 20:43:54 +04:00
|
|
|
return false;
|
|
|
|
}
|
2012-07-22 01:25:39 +04:00
|
|
|
|
2013-03-22 14:40:15 +04:00
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
public function discoverFavicon($url) {
|
2012-07-23 20:43:54 +04:00
|
|
|
//try webroot favicon
|
2012-08-11 20:19:37 +04:00
|
|
|
$favicon = \SimplePie_Misc::absolutize_url('/favicon.ico', $url);
|
2012-07-21 18:45:37 +04:00
|
|
|
|
2013-03-22 14:18:16 +04:00
|
|
|
if($this->checkFavicon($favicon))
|
2012-07-23 20:43:54 +04:00
|
|
|
return $favicon;
|
|
|
|
|
|
|
|
//try to extract favicon from web page
|
2012-08-11 20:19:37 +04:00
|
|
|
$absoluteUrl = \SimplePie_Misc::absolutize_url('/', $url);
|
2013-04-02 09:33:42 +04:00
|
|
|
$page = \OC_Util::getUrlContent($absoluteUrl);
|
2012-10-14 23:15:16 +04:00
|
|
|
|
2013-04-02 09:33:42 +04:00
|
|
|
if ( FALSE !== $page ) {
|
2012-07-23 20:43:54 +04:00
|
|
|
preg_match ( '/<[^>]*link[^>]*(rel=["\']icon["\']|rel=["\']shortcut icon["\']) .*href=["\']([^>]*)["\'].*>/iU', $page, $match );
|
|
|
|
if (1<sizeof($match)) {
|
|
|
|
// the specified uri might be an url, an absolute or a relative path
|
|
|
|
// we have to turn it into an url to be able to display it out of context
|
|
|
|
$favicon = htmlspecialchars_decode ( $match[2] );
|
|
|
|
// test for an url
|
|
|
|
if (parse_url($favicon,PHP_URL_SCHEME)) {
|
2013-03-22 14:18:16 +04:00
|
|
|
if($this->checkFavicon($favicon))
|
2012-07-23 20:43:54 +04:00
|
|
|
return $favicon;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return null;
|
2012-05-28 22:27:18 +04:00
|
|
|
}
|
|
|
|
}
|