This commit is contained in:
Bernhard Posselt 2014-11-09 11:44:09 +01:00
Родитель e5d18a22c3
Коммит b9e330109b
37 изменённых файлов: 400 добавлений и 177 удалений

Просмотреть файл

@ -32,6 +32,6 @@
"require": {
"pear/net_url2": "~2.1",
"ezyang/htmlpurifier": "~4.6",
"fguillot/picofeed": "0.1.0-dev-dev"
"fguillot/picofeed": "~0.1.0"
}
}

16
composer.lock сгенерированный
Просмотреть файл

@ -4,7 +4,7 @@
"Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"hash": "a50b15fc49d316cb5b2db3e9e7ea78b3",
"hash": "3cbce8173a1c93656be80baf11715718",
"packages": [
{
"name": "ezyang/htmlpurifier",
@ -53,16 +53,16 @@
},
{
"name": "fguillot/picofeed",
"version": "dev-0.1.0-dev",
"version": "v0.1.0",
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "e7e32522b487256c3164eeece30203313b09456a"
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/e7e32522b487256c3164eeece30203313b09456a",
"reference": "e7e32522b487256c3164eeece30203313b09456a",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d36a878e912a04c15f49da97e8d627280ae6918c",
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c",
"shasum": ""
},
"require": {
@ -86,7 +86,7 @@
],
"description": "Modern library to write or read feeds (RSS/Atom)",
"homepage": "http://fguillot.github.io/picoFeed",
"time": "2014-11-05 01:21:29"
"time": "2014-11-08 14:43:27"
},
{
"name": "pear/net_url2",
@ -156,9 +156,7 @@
"packages-dev": [],
"aliases": [],
"minimum-stability": "stable",
"stability-flags": {
"fguillot/picofeed": 20
},
"stability-flags": [],
"prefer-stable": false,
"platform": [],
"platform-dev": []

2
vendor/autoload.php поставляемый
Просмотреть файл

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f::getLoader();
return ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870::getLoader();

10
vendor/composer/autoload_real.php поставляемый
Просмотреть файл

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
class ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870
{
private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
return self::$loader;
}
spl_autoload_register(array('ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870', 'loadClassLoader'));
$includePaths = require __DIR__ . '/include_paths.php';
array_push($includePaths, get_include_path());
@ -46,14 +46,14 @@ class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequire4750e3a2a6327c742e19653287d1e34f($file);
composerRequire1a5ae2c0139f33f17ed4df8077538870($file);
}
return $loader;
}
}
function composerRequire4750e3a2a6327c742e19653287d1e34f($file)
function composerRequire1a5ae2c0139f33f17ed4df8077538870($file)
{
require $file;
}

14
vendor/composer/installed.json поставляемый
Просмотреть файл

@ -114,25 +114,25 @@
},
{
"name": "fguillot/picofeed",
"version": "dev-0.1.0-dev",
"version_normalized": "dev-0.1.0-dev",
"version": "v0.1.0",
"version_normalized": "0.1.0.0",
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "e7e32522b487256c3164eeece30203313b09456a"
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/e7e32522b487256c3164eeece30203313b09456a",
"reference": "e7e32522b487256c3164eeece30203313b09456a",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d36a878e912a04c15f49da97e8d627280ae6918c",
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2014-11-05 01:21:29",
"time": "2014-11-08 14:43:27",
"type": "library",
"installation-source": "dist",
"installation-source": "source",
"autoload": {
"psr-0": {
"PicoFeed": "lib/"

8
vendor/fguillot/picofeed/README.markdown поставляемый
Просмотреть файл

@ -42,6 +42,13 @@ Authors
- [Bernhard Posselt](https://github.com/Raydiation)
- [David Pennington](https://github.com/Xeoncross)
Real world usage
----------------
- [AnythingNew](http://anythingnew.co)
- [Miniflux](http://miniflux.net)
- [Owncloud News](https://github.com/owncloud/news)
Documentation
-------------
@ -53,5 +60,6 @@ Documentation
- [OPML file importation](docs/opml-import.markdown)
- [OPML file exportation](docs/opml-export.markdown)
- [Web scraping](docs/grabber.markdown)
- [Exceptions](docs/exceptions.markdown)
- [Debugging](docs/debugging.markdown)
- [Configuration](docs/config.markdown)

10
vendor/fguillot/picofeed/docs/config.markdown поставляемый
Просмотреть файл

@ -8,8 +8,8 @@ To change the default parameters, you have to use the Config class.
Create a new instance and pass it to the Reader object like that:
```php
use PicoFeed\Reader;
use PicoFeed\Config;
use PicoFeed\Reader\Reader;
use PicoFeed\Config\Config;
$config = new Config;
$config->setClientUserAgent('My custom RSS Reader')
@ -80,7 +80,7 @@ $config->setProxyHostname('proxy.example.org');
- Argument value: port number (integer)
```php
$config->getProxyPort(8118);
$config->setProxyPort(8118);
```
### Proxy username
@ -132,7 +132,7 @@ Parser
### Hash algorithm used for item id generation
- Method name: `setParserHashAlgo()`
- Default value: `crc32b`
- Default value: `sha256`
- Argument value: any value returned by the function `hash_algos()` (string)
- See: http://php.net/hash_algos
@ -147,7 +147,7 @@ $config->setParserHashAlgo('sha1');
- Argument value: boolean
```php
$config->setContentFiltering();
$config->setContentFiltering(false);
```
### Timezone

Просмотреть файл

@ -1,13 +1,18 @@
Debugging
=========
Get log messages
----------------
Logging
-------
PicoFeed log in memory the execution flow, if a feed doesn't work correctly it's easy to see what is wrong.
### Reading messages
```php
print_r(PicoFeed\Logging::getMessages());
use PicoFeed\Logging\Logger;
// All messages are stored inside an Array
print_r(Logger::getMessages());
```
You will got an output like that:
@ -36,11 +41,46 @@ Array
)
```
Remove messages
---------------
### Remove messages
All messages are stored in memory, if you need to clear them just call the method `Logging::deleteMessages()`:
All messages are stored in memory, if you need to clear them just call the method `Logger::deleteMessages()`:
```php
PicoFeed\Logging::deleteMessages();
Logger::deleteMessages();
```
Command line utility
====================
PicoFeed provides a basic command line tool to debug feeds quickly.
The tool is located in the root directory project.
### Usage
```bash
$ ./picofeed
Usage:
./picofeed feed <feed-url> # Parse a feed a dump the ouput on stdout
./picofeed debug <feed-url> # Display all logging messages for a feed
./picofeed item <feed-url> <item-id> # Fetch only one item
./picofeed nofilter <feed-url> <item-id> # Fetch an item but with no content filtering
```
### Example
```bash
$ ./picofeed debug https://linuxfr.org/
Exception thrown ===> "Invalid SSL certificate"
Array
(
[0] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Fetch URL: https://linuxfr.org/
[1] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Etag provided:
[2] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Last-Modified provided:
[3] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL total time: 1.850634
[4] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL dns lookup time: 0.00093
[5] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL connect time: 0.115213
[6] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL speed download: 0
[7] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL effective url: https://linuxfr.org/
[8] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL error: SSL certificate problem: Invalid certificate chain
)
```

28
vendor/fguillot/picofeed/docs/exceptions.markdown поставляемый Normal file
Просмотреть файл

@ -0,0 +1,28 @@
Exceptions
==========
All exceptions inherits from the standard `Exception` class.
### Library Exceptions
- `PicoFeed\PicoFeedException`: Base class exception for the library
### Client Exceptions
- `PicoFeed\Client\ClientException`: Base exception class for the Client class
- `PicoFeed\Client\InvalidCertificateException`: Invalid SSL certificate
- `PicoFeed\Client\InvalidUrlException`: Malformed URL, page not found (404), unable to establish a connection
- `PicoFeed\Client\MaxRedirectException`: Maximum of HTTP redirections reached
- `PicoFeed\Client\MaxSizeException`: The response size exceeds to maximum allowed
- `PicoFeed\Client\TimeoutException`: Connection timeout
### Parser Exceptions
- `PicoFeed\Parser\ParserException`: Base exception class for the Parser class
- `PicoFeed\Parser\MalformedXmlException`: XML Parser error
### Reader Exceptions
- `PicoFeed\Reader\ReaderException`: Base exception class for the Reader
- `PicoFeed\Reader\SubscriptionNotFoundException`: Unable to find a feed for the given website
- `PicoFeed\Reader\UnsupportedFeedFormatException`: Unable to detect the feed format

Просмотреть файл

@ -5,15 +5,13 @@ Find and download the favicon
-----------------------------
```php
use PicoFeed\Favicon;
use PicoFeed\Client\Favicon;
$favicon = new Favicon;
// The icon link is https://bits.wikimedia.org/favicon/wikipedia.ico
$icon_link = $favicon->find('https://en.wikipedia.org/');
$icon_content = $favicon->getContent();
```
PicoFeed will try first to find the favicon from the meta tags and fallback to the `favicon.ico` located in the website's root if nothing is found.
@ -27,14 +25,12 @@ Check if a favicon link exists
------------------------------
```php
use PicoFeed\Favicon;
use PicoFeed\Client\Favicon;
$favicon = new Favicon;
// Return true if the file exists
var_dump($favicon->exists('http://php.net/favicon.ico'));
```
Use personalized HTTP settings
@ -43,14 +39,12 @@ Use personalized HTTP settings
Like other classes, the Favicon class support the Config object as constructor argument:
```php
use PicoFeed\Config;
use PicoFeed\Favicon;
use PicoFeed\Config\Config;
use PicoFeed\Client\Favicon;
$config = new Config;
$config->setClientUserAgent('My RSS Reader');
$favicon = new Favicon($config);
$favicon->find('https://github.com');
```

Просмотреть файл

@ -1,11 +1,13 @@
Feed creation
=============
PicoFeed can also generate Atom and RSS feeds.
Generate RSS 2.0 feed
----------------------
```php
use PicoFeed\Writers\Rss20;
use PicoFeed\Syndication\Rss20;
$writer = new Rss20();
$writer->title = 'My site';
@ -48,7 +50,7 @@ Generate Atom feed
------------------
```php
use PicoFeed\Writers\Atom;
use PicoFeed\Syndication\Atom;
$writer = new Atom();
$writer->title = 'My site';

Просмотреть файл

@ -13,7 +13,7 @@ try {
$reader = new Reader;
// Return a resource
$resource = $reader->download('https://linuxfr.org/news.atom');
$resource = $reader->download('http://linuxfr.org/news.atom');
// Return the right parser instance according to the feed format
$parser = $reader->getParser(
@ -59,16 +59,6 @@ Item::author = Syvolc
Item::enclosure_url =
Item::enclosure_type =
Item::content = 18307 bytes
----
Item::id = d0ebddc90bfc3f109f9be00a3bb0b4a770af7a647cdc88454fe15d79168e0dea
Item::title = Fuzix OS, parce que les petites choses sont belles
Item::url = http://linuxfr.org/news/fuzix-os-parce-que-les-petites-choses-sont-belles
Item::date = 1415112167
Item::language = en-US
Item::author = Thomas DEBESSE
Item::enclosure_url =
Item::enclosure_type =
Item::content = 6104 bytes
....
```
@ -134,7 +124,54 @@ catch (PicoFeedException $e) {
HTTP caching
------------
TODO
PicoFeed supports HTTP caching to avoid unnecessary processing.
1. After the first download, save in your database the values of the Etag and LastModified HTTP headers
2. For the next requests, provide those values to the `download()` method and check if the feed was modified or not
Here an example:
```php
try {
// Fetch from your database the previous values of the Etag and LastModified headers
$etag = '...';
$last_modified = '...';
$reader = new Reader;
// Provide those values to the download method
$resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag);
// Return true if the remote content has changed
if ($resource->isModified()) {
$parser = $reader->getParser(
$resource->getUrl(),
$resource->getContent(),
$resource->getEncoding()
);
$feed = $parser->execute();
// Save your feed in your database
// ...
// Store the Etag and the LastModified headers in your database for the next requests
$etag = $resource->getEtag();
$last_modified = $resource->getLastModified();
// ...
}
else {
echo 'Not modified, nothing to do!';
}
}
catch (PicoFeedException $e) {
// Do something...
}
```
Feed and item properties
@ -162,3 +199,24 @@ $feed->items[0]->getEnclosureUrl(); // Enclosure url
$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...)
$feed->items[0]->getContent(); // Item content (filtered or raw)
```
RTL language detection
----------------------
There is an utility method to determine if a language code is Right-To-Left or not:
```php
// Return true if RTL
Parser::isLanguageRTL($item->getLanguage());
```
Known RTL languages are:
- Arabic (ar-**)
- Farsi (fa-**)
- Urdu (ur-**)
- Pashtu (ps-**)
- Syriac (syr-**)
- Divehi (dv-**)
- Hebrew (he-**)
- Yiddish (yi-**)

Просмотреть файл

@ -6,33 +6,48 @@ The web scraper is useful for feeds that display only a summary of articles, the
How the content grabber works?
------------------------------
1. Try with rules first (xpath patterns) for the domain name (see `PicoFeed\Rules\`)
1. Try with rules first (XPath queries) for the domain name (see `PicoFeed\Rules\`)
2. Try to find the text content by using common attributes for class and id
3. Finally, if nothing is found, the feed content is displayed
**The best results are obtained with Xpath rules file.**
**The best results are obtained with XPath rules file.**
How to use the content scraper?
-------------------------------
Before parsing all items, just call the method `$parser->enableContentGrabber()`:
```php
use PicoFeed\Reader;
use PicoFeed\Reader\Reader;
use PicoFeed\PicoFeedException;
$reader = new Reader;
$reader->download('http://www.egscomics.com/rss.php');
try {
$parser = $reader->getParser();
$reader = new Reader;
if ($parser !== false) {
// Return a resource
$resource = $reader->download('http://www.egscomics.com/rss.php');
$parser->enableContentGrabber(); // <= Enable the content grabber
// Return the right parser instance according to the feed format
$parser = $reader->getParser(
$resource->getUrl(),
$resource->getContent(),
$resource->getEncoding()
);
// Enable content grabber before parsing items
$parser->enableContentGrabber();
// Return a Feed object
$feed = $parser->execute();
// ...
}
catch (PicoFeedException $e) {
// Do Something...
}
```
When the content scraper is enabled, everything will be slower.
For each item a new HTTP request is made and the HTML downloaded is parsed with XML/Xpath.
**For each item a new HTTP request is made** and the HTML downloaded is parsed with XML/XPath.
Configuration
-------------

Просмотреть файл

@ -4,12 +4,14 @@ Installation
Versions
--------
- Development version: branch master
- Development version: master
- Available versions:
- v0.1.0 (stable)
- v0.0.2
- v0.0.1
Note: The public API has changed between 0.0.x and 0.1.0
Installation with Composer
--------------------------
@ -35,7 +37,7 @@ And download the code:
composer install # or update
```
Usage example with the Composer autoloading:
Usage example with the Composer autoloader:
```php
<?php
@ -47,7 +49,7 @@ use PicoFeed\Reader\Reader;
try {
$reader = new Reader;
$resource = $reader->download('https://linuxfr.org/news.atom');
$resource = $reader->download('http://linuxfr.org/news.atom');
$parser = $reader->getParser(
$resource->getUrl(),

Просмотреть файл

@ -5,7 +5,7 @@ Example with no categories
--------------------------
```php
use PicoFeed\Export;
use PicoFeed\Serialization\Export;
$feeds = array(
array(
@ -26,7 +26,7 @@ Example with categories
-----------------------
```php
use PicoFeed\Export;
use PicoFeed\Serialization\Export;
$feeds = array(
'my category' => array(

Просмотреть файл

@ -4,7 +4,7 @@ Import OPML file
Importing a list of subscriptions is pretty straightforward:
```php
use PicoFeed\Import;
use PicoFeed\Serialization\Import;
$opml = file_get_contents('mySubscriptions.opml');
$import = new Import($opml);

45
vendor/fguillot/picofeed/example.php поставляемый Normal file
Просмотреть файл

@ -0,0 +1,45 @@
<?php
require 'vendor/autoload.php';
use PicoFeed\Reader\Reader;
use PicoFeed\PicoFeedException;
try {
// Fetch from your database the previous values of the Etag and LastModified headers
$etag = '...';
$last_modified = '...';
$reader = new Reader;
// Provide those values to the download method
$resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag);
if ($resource->isModified()) {
$parser = $reader->getParser(
$resource->getUrl(),
$resource->getContent(),
$resource->getEncoding()
);
$feed = $parser->execute();
// Save your feed in your database
// ...
// Store the Etag and the LastModified headers in your database
$etag = $resource->getEtag();
$last_modified = $resource->getLastModified();
// ...
}
else {
echo 'Not modified, nothing to do!';
}
}
catch (PicoFeedException $e) {
// Do something...
}

Просмотреть файл

@ -3,7 +3,7 @@
namespace PicoFeed\Client;
use LogicException;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
/**
* Client class
@ -166,9 +166,9 @@ abstract class Client
$this->url = $url;
}
Logging::setMessage(get_called_class().' Fetch URL: '.$this->url);
Logging::setMessage(get_called_class().' Etag provided: '.$this->etag);
Logging::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
Logger::setMessage(get_called_class().' Fetch URL: '.$this->url);
Logger::setMessage(get_called_class().' Etag provided: '.$this->etag);
Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
$response = $this->doRequest();
@ -204,7 +204,7 @@ abstract class Client
}
if ($this->is_modified === false) {
Logging::setMessage(get_called_class().' Resource not modified');
Logger::setMessage(get_called_class().' Resource not modified');
}
}
@ -297,10 +297,10 @@ abstract class Client
}
}
Logging::setMessage(get_called_class().' HTTP status code: '.$status);
Logger::setMessage(get_called_class().' HTTP status code: '.$status);
foreach ($headers as $name => $value) {
Logging::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
}
return array($status, $headers);

Просмотреть файл

@ -2,7 +2,7 @@
namespace PicoFeed\Client;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
/**
* cURL HTTP client
@ -129,18 +129,18 @@ class Curl extends Client
{
if ($this->proxy_hostname) {
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname);
if ($this->proxy_username) {
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
}
else {
Logging::setMessage(get_called_class().' Proxy credentials: No');
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}
@ -185,16 +185,16 @@ class Curl extends Client
$ch = $this->prepareContext();
curl_exec($ch);
Logging::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
Logging::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
Logging::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
Logging::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
Logging::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
$curl_errno = curl_errno($ch);
if ($curl_errno) {
Logging::setMessage(get_called_class().' cURL error: '.curl_error($ch));
Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch));
curl_close($ch);
$this->handleError($curl_errno);
@ -246,11 +246,12 @@ class Curl extends Client
*
* @access private
* @param string $location Redirected URL
* @return boolean|array
* @return array
*/
private function handleRedirection($location)
{
$nb_redirects = 0;
$result = array();
$this->url = $location;
$this->body = '';
$this->body_length = 0;
@ -262,7 +263,7 @@ class Curl extends Client
$nb_redirects++;
if ($nb_redirects >= $this->max_redirects) {
return false;
throw new MaxRedirectException('Maximum number of redirections reached');
}
$result = $this->doRequest(false);
@ -275,11 +276,11 @@ class Curl extends Client
$this->headers_counter = 0;
}
else {
return $result;
break;
}
}
return false;
return $result;
}
/**

Просмотреть файл

@ -5,7 +5,7 @@ namespace PicoFeed\Client;
use DOMXpath;
use PicoFeed\Config\Config;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\Parser\XmlParser;
/**
@ -67,7 +67,7 @@ class Favicon
{
try {
Logging::setMessage(get_called_class().' Download => '.$url);
Logger::setMessage(get_called_class().' Download => '.$url);
$client = Client::getInstance();
$client->setConfig($this->config);

Просмотреть файл

@ -5,7 +5,7 @@ namespace PicoFeed\Client;
use DOMXPath;
use PicoFeed\Encoding\Encoding;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\Filter\Filter;
use PicoFeed\Parser\XmlParser;
@ -147,7 +147,7 @@ class Grabber
* Set config object
*
* @access public
* @param \PicoFeed\Config $config Config instance
* @param \PicoFeed\Config\Config $config Config instance
* @return \PicoFeed\Grabber
*/
public function setConfig($config)
@ -188,30 +188,30 @@ class Grabber
{
if ($this->html) {
Logging::setMessage(get_called_class().' Fix encoding');
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
Logger::setMessage(get_called_class().' Fix encoding');
Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
$this->html = Filter::stripHeadTags($this->html);
$this->html = Encoding::convert($this->html, $this->encoding);
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
Logger::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
$rules = $this->getRules();
if (is_array($rules)) {
Logging::setMessage(get_called_class().' Parse content with rules');
Logger::setMessage(get_called_class().' Parse content with rules');
$this->parseContentWithRules($rules);
}
else {
Logging::setMessage(get_called_class().' Parse content with candidates');
Logger::setMessage(get_called_class().' Parse content with candidates');
$this->parseContentWithCandidates();
}
}
else {
Logging::setMessage(get_called_class().' No content fetched');
Logger::setMessage(get_called_class().' No content fetched');
}
Logging::setMessage(get_called_class().' Content length: '.strlen($this->content).' bytes');
Logging::setMessage(get_called_class().' Grabber done');
Logger::setMessage(get_called_class().' Content length: '.strlen($this->content).' bytes');
Logger::setMessage(get_called_class().' Grabber done');
return $this->content !== '';
}
@ -260,14 +260,12 @@ class Grabber
$files[] = substr($hostname, 0, $pos);
}
// Logging::setMessage(var_export($files, true));
foreach ($files as $file) {
$filename = __DIR__.'/../Rules/'.$file.'.php';
if (file_exists($filename)) {
Logging::setMessage(get_called_class().' Load rule: '.$file);
Logger::setMessage(get_called_class().' Load rule: '.$file);
return include $filename;
}
}
@ -283,7 +281,7 @@ class Grabber
*/
public function parseContentWithRules(array $rules)
{
// Logging::setMessage($this->html);
// Logger::setMessage($this->html);
$dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html);
$xpath = new DOMXPath($dom);
@ -329,13 +327,13 @@ class Grabber
// Try to lookup in each tag
foreach ($this->candidatesAttributes as $candidate) {
Logging::setMessage(get_called_class().' Try this candidate: "'.$candidate.'"');
Logger::setMessage(get_called_class().' Try this candidate: "'.$candidate.'"');
$nodes = $xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
if ($nodes !== false && $nodes->length > 0) {
$this->content = $dom->saveXML($nodes->item(0));
Logging::setMessage(get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
Logger::setMessage(get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
break;
}
}
@ -347,16 +345,16 @@ class Grabber
if ($nodes !== false && $nodes->length > 0) {
$this->content = $dom->saveXML($nodes->item(0));
Logging::setMessage(get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
Logger::setMessage(get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
}
}
if (strlen($this->content) < 50) {
Logging::setMessage(get_called_class().' No enought content fetched, get the full body');
Logger::setMessage(get_called_class().' No enought content fetched, get the full body');
$this->content = $dom->saveXML($dom->firstChild);
}
Logging::setMessage(get_called_class().' Strip garbage');
Logger::setMessage(get_called_class().' Strip garbage');
$this->stripGarbage();
}
@ -378,7 +376,7 @@ class Grabber
$nodes = $xpath->query('//'.$tag);
if ($nodes !== false && $nodes->length > 0) {
Logging::setMessage(get_called_class().' Strip tag: "'.$tag.'"');
Logger::setMessage(get_called_class().' Strip tag: "'.$tag.'"');
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
@ -390,7 +388,7 @@ class Grabber
$nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]');
if ($nodes !== false && $nodes->length > 0) {
Logging::setMessage(get_called_class().' Strip attribute: "'.$attribute.'"');
Logger::setMessage(get_called_class().' Strip attribute: "'.$attribute.'"');
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}

Просмотреть файл

@ -2,7 +2,7 @@
namespace PicoFeed\Client;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
/**
* Stream context HTTP client
@ -63,16 +63,16 @@ class Stream extends Client
if ($this->proxy_hostname) {
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
$context['http']['request_fulluri'] = true;
if ($this->proxy_username) {
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
}
else {
Logging::setMessage(get_called_class().' Proxy credentials: No');
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}

Просмотреть файл

@ -132,6 +132,11 @@ class Html
return $this->output;
}
/**
* Called after XML parsing
*
* @access public
*/
public function postFilter()
{
$this->output = $this->tag->removeEmptyTags($this->output);

Просмотреть файл

@ -11,7 +11,7 @@ use DateTimeZone;
* @author Frederic Guillot
* @package Logging
*/
class Logging
class Logger
{
/**
* List of messages

Просмотреть файл

@ -3,7 +3,6 @@
namespace PicoFeed\Parser;
use SimpleXMLElement;
use PicoFeed\Logging\Logging;
use PicoFeed\Filter\Filter;
use PicoFeed\Client\Url;

Просмотреть файл

@ -8,7 +8,7 @@ use DateTimeZone;
use PicoFeed\Encoding\Encoding;
use PicoFeed\Filter\Filter;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\Client\Url;
use PicoFeed\Client\Grabber;
@ -109,7 +109,7 @@ abstract class Parser
$this->content = Filter::stripXmlTag($content);
// Encode everything in UTF-8
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
// Workarounds
@ -124,13 +124,13 @@ abstract class Parser
*/
public function execute()
{
Logging::setMessage(get_called_class().': begin parsing');
Logger::setMessage(get_called_class().': begin parsing');
$xml = XmlParser::getSimpleXml($this->content);
if ($xml === false) {
Logging::setMessage(get_called_class().': XML parsing error');
Logging::setMessage(XmlParser::getErrors());
Logger::setMessage(get_called_class().': XML parsing error');
Logger::setMessage(XmlParser::getErrors());
throw new MalformedXmlException('XML parsing error');
}
@ -172,7 +172,7 @@ abstract class Parser
$feed->items[] = $item;
}
Logging::setMessage(get_called_class().PHP_EOL.$feed);
Logger::setMessage(get_called_class().PHP_EOL.$feed);
return $feed;
}
@ -243,7 +243,7 @@ abstract class Parser
$item->content = $filter->execute();
}
else {
Logging::setMessage(get_called_class().': Content filtering disabled');
Logger::setMessage(get_called_class().': Content filtering disabled');
}
}

Просмотреть файл

@ -3,7 +3,6 @@
namespace PicoFeed\Parser;
use SimpleXMLElement;
use PicoFeed\Logging\Logging;
use PicoFeed\Filter\Filter;
use PicoFeed\Client\Url;

Просмотреть файл

@ -7,7 +7,7 @@ use DOMXPath;
use PicoFeed\Config\Config;
use PicoFeed\Client\Client;
use PicoFeed\Client\Url;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\Filter\Filter;
use PicoFeed\Parser\XmlParser;
@ -50,7 +50,7 @@ class Reader
public function __construct(Config $config = null)
{
$this->config = $config ?: new Config;
Logging::setTimezone($this->config->getTimezone());
Logger::setTimezone($this->config->getTimezone());
}
/**
@ -111,7 +111,7 @@ class Reader
*/
public function find($url, $html)
{
Logging::setMessage(get_called_class().': Try to discover subscriptions');
Logger::setMessage(get_called_class().': Try to discover subscriptions');
$dom = XmlParser::getHtmlDocument($html);
$xpath = new DOMXPath($dom);
@ -140,7 +140,7 @@ class Reader
}
}
Logging::setMessage(get_called_class().': '.implode(', ', $links));
Logger::setMessage(get_called_class().': '.implode(', ', $links));
return $links;
}
@ -183,7 +183,7 @@ class Reader
{
$first_tag = Filter::getFirstTag($content);
Logging::setMessage(get_called_class().': DetectFormat(): '.$first_tag);
Logger::setMessage(get_called_class().': DetectFormat(): '.$first_tag);
foreach ($this->formats as $parser => $needles) {

10
vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php поставляемый Normal file
Просмотреть файл

@ -0,0 +1,10 @@
<?php
return array(
'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
'body' => array(
'//div[@class="contenu"]',
),
'strip' => array(
'//div[contains(@class, "a2a")]'
),
);

9
vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php поставляемый Normal file
Просмотреть файл

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/',
'body' => array(
'//section[@class="article_body"]',
),
'strip' => array(
),
);

11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php поставляемый Normal file
Просмотреть файл

@ -0,0 +1,11 @@
<?php
return array(
'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155',
'body' => array(
'/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]'
),
'strip' => array(
'//style',
'//font',
),
);

Просмотреть файл

@ -5,7 +5,7 @@ namespace PicoFeed\Serialization;
use SimpleXmlElement;
use StdClass;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\Parser\XmlParser;
/**
@ -51,17 +51,17 @@ class Import
*/
public function execute()
{
Logging::setMessage(get_called_class().': start importation');
Logger::setMessage(get_called_class().': start importation');
$xml = XmlParser::getSimpleXml(trim($this->content));
if ($xml === false || $xml->getName() !== 'opml' || ! isset($xml->body)) {
Logging::setMessage(get_called_class().': OPML tag not found or malformed XML document');
Logger::setMessage(get_called_class().': OPML tag not found or malformed XML document');
return false;
}
$this->parseEntries($xml->body);
Logging::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
Logger::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
return $this->items;
}

Просмотреть файл

@ -70,7 +70,7 @@ class Rss20 extends Writer
// <description/>
$description = $this->dom->createElement('description');
$description->appendChild($this->dom->createTextNode(isset($this->description) ? $this->description : $this->title));
$description->appendChild($this->dom->createTextNode($this->description ?: $this->title));
$channel->appendChild($description);
// <pubDate/>

Просмотреть файл

@ -9,7 +9,6 @@ use RuntimeException;
*
* @author Frederic Guillot
* @package Syndication
* @property string $description Feed description
*/
abstract class Writer
{
@ -61,6 +60,14 @@ abstract class Writer
*/
public $title = '';
/**
* Feed description
*
* @access public
* @var string
*/
public $description = '';
/**
* Feed modification date (timestamp)
*

55
vendor/fguillot/picofeed/picofeed поставляемый
Просмотреть файл

@ -1,32 +1,35 @@
#!/usr/bin/env php
<?php
require_once './vendor/autoload.php';
require_once 'vendor/autoload.php';
use PicoFeed\Reader\Reader;
use PicoFeed\Logging\Logging;
use PicoFeed\Logging\Logger;
use PicoFeed\PicoFeedException;
function get_feed($url, $disable_filtering = false)
{
$reader = new Reader;
$reader->download($url);
try {
$parser = $reader->getParser();
$reader = new Reader;
$resource = $reader->discover($url);
if ($disable_filtering) {
$parser->disableContentFiltering();
}
$parser = $reader->getParser(
$resource->getUrl(),
$resource->getContent(),
$resource->getEncoding()
);
if ($parser !== false) {
$feed = $parser->execute();
if ($feed !== false) {
return $feed;
if ($disable_filtering) {
$parser->disableContentFiltering();
}
}
return false;
return $parser->execute();
}
catch (PicoFeedException $e) {
echo 'Exception thrown ===> "'.$e->getMessage().'"'.PHP_EOL;
return false;
}
}
function get_item($feed, $item_id)
@ -45,42 +48,34 @@ function get_item($feed, $item_id)
function dump_feed($url)
{
$feed = get_feed($url);
if ($feed === false) {
die("Unable to fetch the feed\n");
}
echo $feed;
}
function debug_feed($url)
{
get_feed($url);
print_r(Logging::getMessages());
print_r(Logger::getMessages());
}
function dump_item($url, $item_id)
{
$feed = get_feed($url);
if ($feed === false) {
die("Unable to fetch the feed\n");
if ($feed !== false) {
get_item($feed, $item_id);
}
get_item($feed, $item_id);
}
function nofilter_item($url, $item_id)
{
$feed = get_feed($url, true);
if ($feed === false) {
die("Unable to fetch the feed\n");
if ($feed !== false) {
get_item($feed, $item_id);
}
get_item($feed, $item_id);
}
// Parse command line arguments
if ($argc === 4) {
switch ($argv[1]) {
case 'item':

Просмотреть файл

@ -1,10 +1,9 @@
<?php
namespace PicoFeed\Client;
use PHPUnit_Framework_TestCase;
use PicoFeed\Reader\Reader;
use PicoFeed\Logging\Logging;
class GrabberTest extends PHPUnit_Framework_TestCase
{

Просмотреть файл

@ -1,9 +1,9 @@
<?php
namespace PicoFeed\Client;
use PHPUnit_Framework_TestCase;
class UrlTest extends PHPUnit_Framework_TestCase
{
public function testHasScheme()