зеркало из https://github.com/nextcloud/news.git
update picofeed
This commit is contained in:
Родитель
c335e5f349
Коммит
6bc0c9a660
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -56,12 +56,12 @@
|
|||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "a2cc36244278afbfb9578037b1700ca3cd5a87d7"
|
||||
"reference": "cd72fe816d799a05dd0533f92270efc111306342"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/a2cc36244278afbfb9578037b1700ca3cd5a87d7",
|
||||
"reference": "a2cc36244278afbfb9578037b1700ca3cd5a87d7",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/cd72fe816d799a05dd0533f92270efc111306342",
|
||||
"reference": "cd72fe816d799a05dd0533f92270efc111306342",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
|
@ -95,7 +95,7 @@
|
|||
],
|
||||
"description": "Modern library to handle RSS/Atom feeds",
|
||||
"homepage": "https://github.com/fguillot/picoFeed",
|
||||
"time": "2015-09-12 11:05:28"
|
||||
"time": "2015-10-16 00:28:29"
|
||||
},
|
||||
{
|
||||
"name": "pear/net_url2",
|
||||
|
|
|
@ -162,12 +162,12 @@
|
|||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "a2cc36244278afbfb9578037b1700ca3cd5a87d7"
|
||||
"reference": "cd72fe816d799a05dd0533f92270efc111306342"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/a2cc36244278afbfb9578037b1700ca3cd5a87d7",
|
||||
"reference": "a2cc36244278afbfb9578037b1700ca3cd5a87d7",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/cd72fe816d799a05dd0533f92270efc111306342",
|
||||
"reference": "cd72fe816d799a05dd0533f92270efc111306342",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
|
@ -181,7 +181,7 @@
|
|||
"suggest": {
|
||||
"ext-curl": "PicoFeed will use cURL if present"
|
||||
},
|
||||
"time": "2015-09-12 11:05:28",
|
||||
"time": "2015-10-16 00:28:29",
|
||||
"bin": [
|
||||
"picofeed"
|
||||
],
|
||||
|
|
|
@ -88,9 +88,10 @@ class Html
|
|||
*/
|
||||
public function __construct($html, $website)
|
||||
{
|
||||
$this->config = new Config;
|
||||
$this->input = XmlParser::HtmlToXml($html);
|
||||
$this->output = '';
|
||||
$this->tag = new Tag;
|
||||
$this->tag = new Tag($this->config);
|
||||
$this->website = $website;
|
||||
$this->attribute = new Attribute(new Url($website));
|
||||
}
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
namespace PicoFeed\Filter;
|
||||
|
||||
use DOMXpath;
|
||||
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
/**
|
||||
* Tag Filter class
|
||||
|
@ -13,6 +15,14 @@ use PicoFeed\Parser\XmlParser;
|
|||
*/
|
||||
class Tag
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Tags blacklist (Xpath expressions)
|
||||
*
|
||||
|
@ -71,6 +81,11 @@ class Tag
|
|||
'q',
|
||||
);
|
||||
|
||||
public function __construct(Config $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the tag is allowed and is not a pixel tracker
|
||||
*
|
||||
|
@ -130,7 +145,10 @@ class Tag
|
|||
*/
|
||||
public function isAllowedTag($tag)
|
||||
{
|
||||
return in_array($tag, $this->tag_whitelist);
|
||||
return in_array($tag, array_merge(
|
||||
$this->tag_whitelist,
|
||||
array_keys($this->config->getFilterWhitelistedTags(array()))
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -4,10 +4,16 @@ return array(
|
|||
'%.*%' => array(
|
||||
'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
|
||||
'body' => array(
|
||||
'//div[@class="entry"]',
|
||||
'//div[@data-js="gallerySlides"]',
|
||||
'//article',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[@id="linker_widget"]',
|
||||
'//*[@class="credit"]',
|
||||
'//div[@data-js="slideCount"]',
|
||||
'//span[@class="visually-hidden"]',
|
||||
'//*[@data-slide-number="_endslate"]',
|
||||
'//*[@id="related"]',
|
||||
'//*[contains(@class, "bio")]',
|
||||
'//*[contains(@class, "entry-footer")]',
|
||||
'//*[contains(@class, "mobify_backtotop_link")]',
|
||||
|
@ -15,7 +21,11 @@ return array(
|
|||
'//*[contains(@class, "gallery-thumbnail")]',
|
||||
'//img[contains(@src, "1x1")]',
|
||||
'//a[contains(@href, "creativecommons")]',
|
||||
'//a[@href="#start-of-content"]',
|
||||
'//ul[@id="article-tags"],
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
|
||||
'body' => array(
|
||||
'//figure[@class="article-content"]',
|
||||
'//div[@class="article-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h1',
|
||||
'//h3',
|
||||
'//ul',
|
||||
'//a[@target="_self"]',
|
||||
'//div[@data-embed-type="Brightcove"]',
|
||||
'//div[@class="QuoteContainer"]'
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
|
||||
'body' => array(
|
||||
'//div[@class="story-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//p[@class="kindofstory"]',
|
||||
'//cite[@class="byline"]',
|
||||
'//div[contains(@class,"related-topics")]',
|
||||
'//links',
|
||||
'//sharebar',
|
||||
'//related-topics',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://hosted.ap.org/dynamic/stories/A/AS_CHINA_GAO_ZHISHENG?SITE=AP&SECTION=HOME&TEMPLATE=DEFAULT',
|
||||
'body' => array(
|
||||
'//img[@class="ap-smallphoto-img"]',
|
||||
'//span[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,17 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://arstechnica.com/tech-policy/2015/09/judge-warners-2m-happy-birthday-copyright-is-bogus/',
|
||||
'body' => array(
|
||||
'//section[@id="article-guts"]',
|
||||
'//div[@class="superscroll-content show"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//figcaption',
|
||||
'//aside',
|
||||
'//div[@class="article-expander"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bangkokpost.com/news/politics/704204/new-us-ambassador-arrives-in-bangkok',
|
||||
'body' => array(
|
||||
'//div[@class="articleContents"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h2',
|
||||
'//h4',
|
||||
'//div[@class="text-size"]',
|
||||
'//div[@class="relate-story"]',
|
||||
'//div[@class="text-ads"]',
|
||||
'//script',
|
||||
'//ul',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://bgr.com/2015/09/27/iphone-6s-waterproof-testing/',
|
||||
'body' => array(
|
||||
'//img[contains(@class,"img")]',
|
||||
'//div[@class="text-column"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//strong',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html',
|
||||
'body' => array(
|
||||
'//p[@class="media__caption"]',
|
||||
'//figure/div/a/img',
|
||||
'//p[@class="content__segment"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.buenosairesherald.com/article/199344/manzur-named-next-governor-of-tucum%C3%A1n',
|
||||
'body' => array(
|
||||
'//div[@class="img_despliege"]',
|
||||
'//div[@id="nota_despliegue"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//span[@id="fecha"]',
|
||||
'//h1',
|
||||
'//div[@class="autor"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm',
|
||||
'body' => array(
|
||||
'//p[@itemprop="description"]',
|
||||
'//div[@itemprop="articleBody"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//a[@class="clickToEnlarge"]',
|
||||
'//div[@section="topSharebar"]',
|
||||
'//div[contains(@class,"related")]',
|
||||
'//div[contains(@class,"ad-")]',
|
||||
'//div[@section="shortcodeGallery"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.csmonitor.com/USA/Politics/2015/0925/John-Boehner-steps-down-Self-sacrificing-but-will-it-lead-to-better-government',
|
||||
'body' => array(
|
||||
'//figure[@id="image-top-1"]',
|
||||
'//div[@id="story-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//img[@title="hide caption"]',
|
||||
'//*[contains(@class,"promo_link")]',
|
||||
'//div[@id="story-embed-column"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.dailytech.com/Apples+First+Fixes+to+iOS+9+Land+w+iOS++901+Release/article37495.htm',
|
||||
'body' => array(
|
||||
'//div[@class="NewsBodyImage"]',
|
||||
'//span[@id="lblSummary"]',
|
||||
'//span[@id="lblBody"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,17 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blogs.discovermagazine.com/the-extremo-files/2015/09/11/have-scientists-found-the-worlds-deepest-fish/',
|
||||
'body' => array(
|
||||
'//div[@class="entry"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h1',
|
||||
'//div[@class="meta"]',
|
||||
'//div[@class="shareIcons"]',
|
||||
'//div[@class="navigation"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://espn.go.com/nfl/story/_/id/13388208/jason-whitlock-chip-kelly-controversy',
|
||||
'body' => array(
|
||||
'//p',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.geek.com/news/the-11-best-ways-to-eat-eggs-1634076/',
|
||||
'body' => array(
|
||||
'//div[@class="articleinfo"]/figure',
|
||||
'//div[@class="articleinfo"]/article',
|
||||
'//span[@class="by"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//span[@class="red"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.japantimes.co.jp/news/2015/09/27/world/social-issues-world/pope-meets-sex-abuse-victims-philadelphia-promises-accountability/',
|
||||
'body' => array(
|
||||
'//article[@role="main"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//header',
|
||||
'//div[contains(@class, "meta")]',
|
||||
'//div[@class="clearfix"]',
|
||||
'//div[@class="OUTBRAIN"]',
|
||||
'//ul[@id="content_footer_menu"]',
|
||||
'//div[@class="article_footer_ad"]',
|
||||
'//div[@id="disqus_thread"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.japantoday.com/category/politics/view/japan-u-s-to-sign-new-base-environment-pact',
|
||||
'body' => array(
|
||||
'//div[@id="article_container"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h2',
|
||||
'//div[@id="article_info"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,27 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html',
|
||||
'body' => array(
|
||||
'//div[@id="article"]',
|
||||
'//div[@id="mainContent"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="storyTimestamp"]',
|
||||
'//img[@class="floatLeft"]',
|
||||
'//div[@class="overlineUpper"]',
|
||||
'//div[@class="updated"]',
|
||||
'//div[@class="columnist_link"]',
|
||||
'//div[@class="side_container_01"]',
|
||||
'//div[@class="credit"]',
|
||||
'//h1',
|
||||
'//h2',
|
||||
'//h4',
|
||||
'//ul',
|
||||
'//div[contains(@class, "footer-pkg")]',
|
||||
'//img[contains(@src,"analytics")]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.koreaherald.com/view.php?ud=20150926000018',
|
||||
'body' => array(
|
||||
'//div[@class="content_view"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,14 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.linux.org/threads/lua-the-scripting-interpreter.8352/',
|
||||
'body' => array(
|
||||
'//div[@class="messageContent"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//aside',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,17 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.linuxinsider.com/story/82526.html?rss=1',
|
||||
'body' => array(
|
||||
'//div[@id="story-graphic-xlarge"]',
|
||||
'//div[@id="story-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//div[@class="story-advertisement"]',
|
||||
'//iframe',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,15 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.nba.com/2015/news/09/25/knicks-jackson-to-spend-more-time-around-coaching-staff.ap/index.html?rss=true',
|
||||
'body' => array(
|
||||
'//section[@id="nbaArticleContent"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@id="nbaArticleSocialWrapper_bot"]',
|
||||
'//h5',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.networkworld.com/article/2986764/smartphones/samsung-tried-to-troll-apple-fans-waiting-in-line-for-the-iphone-6s.html#tk.rss_all',
|
||||
'body' => array(
|
||||
'//figure/img',
|
||||
'//section[@class="deck"]',
|
||||
'//div[@itemprop="articleBody"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//aside',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://osnews.com/story/28863/Google_said_to_be_under_US_antitrust_scrutiny_over_Android',
|
||||
'body' => array(
|
||||
'//div[@class="newscontent1"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://recode.net/2015/09/26/big-tech-rolls-out-red-carpet-for-indian-prime-minister-lobbies-behind-closed-doors/',
|
||||
'body' => array(
|
||||
'//img[contains(@class,"attachment-large")]',
|
||||
'//div[contains(@class,"postarea")]',
|
||||
'//li[@class,"author"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//div[contains(@class,"sharedaddy")]',
|
||||
'//div[@class="post-send-off"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.securityfocus.com/news/11569?ref=rss',
|
||||
'body' => array(
|
||||
'//div[@class="expanded"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.soundandvision.com/content/james-guthrie-mixing-roger-waters-and-pink-floyd-51',
|
||||
'body' => array(
|
||||
'//div[@id="left"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="meta"]',
|
||||
'//div[@class="ratingsbox"]',
|
||||
'//h1',
|
||||
'//h2',
|
||||
'//addthis',
|
||||
'//comment-links',
|
||||
'//div[@class="book-navigation"]',
|
||||
'//div[@class="comment-links"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.stereophile.com/content/2015-rocky-mountain-audio-fest-starts-friday',
|
||||
'body' => array(
|
||||
'//div[@class="content clear-block"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blog.the-ebook-reader.com/2015/09/25/kobo-glo-hd-and-kobo-touch-2-0-covers-and-cases-roundup/',
|
||||
'body' => array(
|
||||
'//div[@class="entry"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.theatlantic.com/politics/archive/2015/09/what-does-it-mean-to-lament-the-poor-inside-panem/407317/',
|
||||
'body' => array(
|
||||
'//picture[@class="img"]',
|
||||
'//figure/figcaption/span',
|
||||
'//div/p[@itemprop="description"]',
|
||||
'//div[@class="article-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//aside[@class="callout"]',
|
||||
'//span[@class="credit"]',
|
||||
'//figcaption[@class="credit"]',
|
||||
'//aside[contains(@class,"partner-box")]',
|
||||
'//div[contains(@class,"ad")]',
|
||||
'//a[contains(@class,"social-icon")]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.thehindu.com/sci-tech/science/why-is-the-shape-of-cells-in-a-honeycomb-always-hexagonal/article7692306.ece?utm_source=RSS_Feed&utm_medium=RSS&utm_campaign=RSS_Syndication',
|
||||
'body' => array(
|
||||
'//div/img[@class="main-image"]',
|
||||
'//div[@class="photo-caption"]',
|
||||
'//div[@class="articleLead"]',
|
||||
'//p',
|
||||
'//span[@class="upper"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@id="articleKeywords"]',
|
||||
'//div[@class="photo-source"]'
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,18 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.themoscowtimes.com/business/article/535500.html',
|
||||
'body' => array(
|
||||
'//div[@class="article_main_img"]',
|
||||
'//div[@class="article_text"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="articlebottom"]',
|
||||
'//p/b',
|
||||
'//p/a[contains(@href, "/article.php?id=")]',
|
||||
'//div[@class="disqus_wrap"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.theonion.com/article/wild-eyed-jim-harbaugh-informs-players-they-must-k-51397?utm_medium=RSS&utm_campaign=feeds',
|
||||
'body' => array(
|
||||
'//div[@class="content-masthead"]/figure/div/noscript/img',
|
||||
'//div[@class="content-text"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.thestandard.com.hk/breaking_news_detail.asp?id=67156',
|
||||
'body' => array(
|
||||
'//span[@class="bodyCopy"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//br',
|
||||
'//map[@name="gif_bar"]',
|
||||
'//img[@usemap=""gif_bar"]',
|
||||
'//a',
|
||||
'//span[@class="bodyHeadline"]',
|
||||
'//i',
|
||||
'//b',
|
||||
'//table'
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.upi.com/Top_News/US/2015/09/26/Tech-giants-Hollywood-stars-among-guests-at-state-dinner-for-Chinas-Xi-Jinping/4541443281006/',
|
||||
'body' => array(
|
||||
'//div[@class="img"]',
|
||||
'//div[@class="st_text_c"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@align="center"]',
|
||||
'//div[@class="ad_slot"]',
|
||||
'//div[@class="ipara"]',
|
||||
'//div[@class="st_embed"]',
|
||||
'//div[contains(@styel,"font-size"]',
|
||||
'//ul',
|
||||
'//style[@type="text/css"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
|
@ -4,16 +4,24 @@ return array(
|
|||
'%.*%' => array(
|
||||
'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
|
||||
'body' => array(
|
||||
'//div[@class="story-body"]',
|
||||
'//div[@class="story-body__inner"] | //div[@class="article"]',
|
||||
'//div[@class="indPost"]'
|
||||
),
|
||||
'strip' => array(
|
||||
'//form',
|
||||
'//div[@id="headline"]',
|
||||
'//*[@class="warning"]',
|
||||
'//span[@class="off-screen"]',
|
||||
'//span[@class="story-image-copyright"]',
|
||||
'//div[@class="ad_wrapper"]',
|
||||
'//div[@id="article-sidebar"]',
|
||||
'//div[@class="data-table-outer"]',
|
||||
'//*[@class="story-date"]',
|
||||
'//*[@class="story-header"]',
|
||||
'//figure[contains(@class,"has-caption")]',
|
||||
'//*[@class="story-related"]',
|
||||
'//*[contains(@class, "byline")]',
|
||||
'//p[contains(@class, "media-message")]',
|
||||
'//*[contains(@class, "story-feature")]',
|
||||
'//*[@id="video-carousel-container"]',
|
||||
'//*[@id="also-related-links"]',
|
||||
|
@ -21,4 +29,4 @@ return array(
|
|||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
|
|
@ -9,7 +9,9 @@ return array(
|
|||
'strip' => array(
|
||||
'//*[@class="bucket img"]',
|
||||
'//*[@class="creditwrap"]',
|
||||
'//*[@class="credit"]',
|
||||
'//*[@class="captionwrap"]',
|
||||
'//*[@class="toggle-caption"]',
|
||||
'//*[contains(@class, "enlargebtn")]',
|
||||
),
|
||||
)
|
||||
|
|
|
@ -2,14 +2,14 @@
|
|||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html',
|
||||
'test_url' => 'http://www.numerama.com/sciences/125959-recherches-ladn-recompensees-nobel-de-chimie.html',
|
||||
'body' => array(
|
||||
'//div[@class="col_left"]//div[@class="content"]',
|
||||
'//article',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="news_social"]',
|
||||
'//div[@id="newssuiv"]',
|
||||
'//footer',
|
||||
'//section[@class="related-article"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://zdnet.com.feedsportal.com/c/35462/f/675637/s/4a33c93e/sc/11/l/0L0Szdnet0N0Carticle0Cchina0Eus0Eagree0Eon0Ecybercrime0Ecooperation0Eamid0Econtinued0Etension0C0Tftag0FRSSbaffb68/story01.htm',
|
||||
'body' => array(
|
||||
'//p[@class="summary"]',
|
||||
'//div[contains(@class,"storyBody")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//*[contains(@class,"ad-")]',
|
||||
'//p/span',
|
||||
'//script',
|
||||
'//p[@class="summary"]',
|
||||
'//div[contains(@class,"relatedContent")]',
|
||||
'//div[contains(@class,"loader")]',
|
||||
'//p[@class="photoDetails"]',
|
||||
'//div[@class="thumbnailSlider"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
Загрузка…
Ссылка в новой задаче