зеркало из https://github.com/nextcloud/news.git
update picofeed to 0.1.0
This commit is contained in:
Родитель
e5d18a22c3
Коммит
b9e330109b
|
@ -32,6 +32,6 @@
|
|||
"require": {
|
||||
"pear/net_url2": "~2.1",
|
||||
"ezyang/htmlpurifier": "~4.6",
|
||||
"fguillot/picofeed": "0.1.0-dev-dev"
|
||||
"fguillot/picofeed": "~0.1.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
"Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"hash": "a50b15fc49d316cb5b2db3e9e7ea78b3",
|
||||
"hash": "3cbce8173a1c93656be80baf11715718",
|
||||
"packages": [
|
||||
{
|
||||
"name": "ezyang/htmlpurifier",
|
||||
|
@ -53,16 +53,16 @@
|
|||
},
|
||||
{
|
||||
"name": "fguillot/picofeed",
|
||||
"version": "dev-0.1.0-dev",
|
||||
"version": "v0.1.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "e7e32522b487256c3164eeece30203313b09456a"
|
||||
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/e7e32522b487256c3164eeece30203313b09456a",
|
||||
"reference": "e7e32522b487256c3164eeece30203313b09456a",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d36a878e912a04c15f49da97e8d627280ae6918c",
|
||||
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
|
@ -86,7 +86,7 @@
|
|||
],
|
||||
"description": "Modern library to write or read feeds (RSS/Atom)",
|
||||
"homepage": "http://fguillot.github.io/picoFeed",
|
||||
"time": "2014-11-05 01:21:29"
|
||||
"time": "2014-11-08 14:43:27"
|
||||
},
|
||||
{
|
||||
"name": "pear/net_url2",
|
||||
|
@ -156,9 +156,7 @@
|
|||
"packages-dev": [],
|
||||
"aliases": [],
|
||||
"minimum-stability": "stable",
|
||||
"stability-flags": {
|
||||
"fguillot/picofeed": 20
|
||||
},
|
||||
"stability-flags": [],
|
||||
"prefer-stable": false,
|
||||
"platform": [],
|
||||
"platform-dev": []
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
|
||||
require_once __DIR__ . '/composer' . '/autoload_real.php';
|
||||
|
||||
return ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f::getLoader();
|
||||
return ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870::getLoader();
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
// autoload_real.php @generated by Composer
|
||||
|
||||
class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
|
||||
class ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870
|
||||
{
|
||||
private static $loader;
|
||||
|
||||
|
@ -19,9 +19,9 @@ class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
|
|||
return self::$loader;
|
||||
}
|
||||
|
||||
spl_autoload_register(array('ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f', 'loadClassLoader'), true, true);
|
||||
spl_autoload_register(array('ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870', 'loadClassLoader'), true, true);
|
||||
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f', 'loadClassLoader'));
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit1a5ae2c0139f33f17ed4df8077538870', 'loadClassLoader'));
|
||||
|
||||
$includePaths = require __DIR__ . '/include_paths.php';
|
||||
array_push($includePaths, get_include_path());
|
||||
|
@ -46,14 +46,14 @@ class ComposerAutoloaderInit4750e3a2a6327c742e19653287d1e34f
|
|||
|
||||
$includeFiles = require __DIR__ . '/autoload_files.php';
|
||||
foreach ($includeFiles as $file) {
|
||||
composerRequire4750e3a2a6327c742e19653287d1e34f($file);
|
||||
composerRequire1a5ae2c0139f33f17ed4df8077538870($file);
|
||||
}
|
||||
|
||||
return $loader;
|
||||
}
|
||||
}
|
||||
|
||||
function composerRequire4750e3a2a6327c742e19653287d1e34f($file)
|
||||
function composerRequire1a5ae2c0139f33f17ed4df8077538870($file)
|
||||
{
|
||||
require $file;
|
||||
}
|
||||
|
|
|
@ -114,25 +114,25 @@
|
|||
},
|
||||
{
|
||||
"name": "fguillot/picofeed",
|
||||
"version": "dev-0.1.0-dev",
|
||||
"version_normalized": "dev-0.1.0-dev",
|
||||
"version": "v0.1.0",
|
||||
"version_normalized": "0.1.0.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "e7e32522b487256c3164eeece30203313b09456a"
|
||||
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/e7e32522b487256c3164eeece30203313b09456a",
|
||||
"reference": "e7e32522b487256c3164eeece30203313b09456a",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d36a878e912a04c15f49da97e8d627280ae6918c",
|
||||
"reference": "d36a878e912a04c15f49da97e8d627280ae6918c",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"time": "2014-11-05 01:21:29",
|
||||
"time": "2014-11-08 14:43:27",
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"installation-source": "source",
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"PicoFeed": "lib/"
|
||||
|
|
|
@ -42,6 +42,13 @@ Authors
|
|||
- [Bernhard Posselt](https://github.com/Raydiation)
|
||||
- [David Pennington](https://github.com/Xeoncross)
|
||||
|
||||
Real world usage
|
||||
----------------
|
||||
|
||||
- [AnythingNew](http://anythingnew.co)
|
||||
- [Miniflux](http://miniflux.net)
|
||||
- [Owncloud News](https://github.com/owncloud/news)
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
|
@ -53,5 +60,6 @@ Documentation
|
|||
- [OPML file importation](docs/opml-import.markdown)
|
||||
- [OPML file exportation](docs/opml-export.markdown)
|
||||
- [Web scraping](docs/grabber.markdown)
|
||||
- [Exceptions](docs/exceptions.markdown)
|
||||
- [Debugging](docs/debugging.markdown)
|
||||
- [Configuration](docs/config.markdown)
|
||||
|
|
|
@ -8,8 +8,8 @@ To change the default parameters, you have to use the Config class.
|
|||
Create a new instance and pass it to the Reader object like that:
|
||||
|
||||
```php
|
||||
use PicoFeed\Reader;
|
||||
use PicoFeed\Config;
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
$config = new Config;
|
||||
$config->setClientUserAgent('My custom RSS Reader')
|
||||
|
@ -80,7 +80,7 @@ $config->setProxyHostname('proxy.example.org');
|
|||
- Argument value: port number (integer)
|
||||
|
||||
```php
|
||||
$config->getProxyPort(8118);
|
||||
$config->setProxyPort(8118);
|
||||
```
|
||||
|
||||
### Proxy username
|
||||
|
@ -132,7 +132,7 @@ Parser
|
|||
### Hash algorithm used for item id generation
|
||||
|
||||
- Method name: `setParserHashAlgo()`
|
||||
- Default value: `crc32b`
|
||||
- Default value: `sha256`
|
||||
- Argument value: any value returned by the function `hash_algos()` (string)
|
||||
- See: http://php.net/hash_algos
|
||||
|
||||
|
@ -147,7 +147,7 @@ $config->setParserHashAlgo('sha1');
|
|||
- Argument value: boolean
|
||||
|
||||
```php
|
||||
$config->setContentFiltering();
|
||||
$config->setContentFiltering(false);
|
||||
```
|
||||
|
||||
### Timezone
|
||||
|
|
|
@ -1,13 +1,18 @@
|
|||
Debugging
|
||||
=========
|
||||
|
||||
Get log messages
|
||||
----------------
|
||||
Logging
|
||||
-------
|
||||
|
||||
PicoFeed log in memory the execution flow, if a feed doesn't work correctly it's easy to see what is wrong.
|
||||
|
||||
### Reading messages
|
||||
|
||||
```php
|
||||
print_r(PicoFeed\Logging::getMessages());
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
// All messages are stored inside an Array
|
||||
print_r(Logger::getMessages());
|
||||
```
|
||||
|
||||
You will got an output like that:
|
||||
|
@ -36,11 +41,46 @@ Array
|
|||
)
|
||||
```
|
||||
|
||||
Remove messages
|
||||
---------------
|
||||
### Remove messages
|
||||
|
||||
All messages are stored in memory, if you need to clear them just call the method `Logging::deleteMessages()`:
|
||||
All messages are stored in memory, if you need to clear them just call the method `Logger::deleteMessages()`:
|
||||
|
||||
```php
|
||||
PicoFeed\Logging::deleteMessages();
|
||||
Logger::deleteMessages();
|
||||
```
|
||||
|
||||
Command line utility
|
||||
====================
|
||||
|
||||
PicoFeed provides a basic command line tool to debug feeds quickly.
|
||||
The tool is located in the root directory project.
|
||||
|
||||
### Usage
|
||||
|
||||
```bash
|
||||
$ ./picofeed
|
||||
Usage:
|
||||
./picofeed feed <feed-url> # Parse a feed a dump the ouput on stdout
|
||||
./picofeed debug <feed-url> # Display all logging messages for a feed
|
||||
./picofeed item <feed-url> <item-id> # Fetch only one item
|
||||
./picofeed nofilter <feed-url> <item-id> # Fetch an item but with no content filtering
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
```bash
|
||||
$ ./picofeed debug https://linuxfr.org/
|
||||
Exception thrown ===> "Invalid SSL certificate"
|
||||
Array
|
||||
(
|
||||
[0] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Fetch URL: https://linuxfr.org/
|
||||
[1] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Etag provided:
|
||||
[2] => [2014-11-08 14:04:14] PicoFeed\Client\Curl Last-Modified provided:
|
||||
[3] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL total time: 1.850634
|
||||
[4] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL dns lookup time: 0.00093
|
||||
[5] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL connect time: 0.115213
|
||||
[6] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL speed download: 0
|
||||
[7] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL effective url: https://linuxfr.org/
|
||||
[8] => [2014-11-08 14:04:16] PicoFeed\Client\Curl cURL error: SSL certificate problem: Invalid certificate chain
|
||||
)
|
||||
```
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
Exceptions
|
||||
==========
|
||||
|
||||
All exceptions inherits from the standard `Exception` class.
|
||||
|
||||
### Library Exceptions
|
||||
|
||||
- `PicoFeed\PicoFeedException`: Base class exception for the library
|
||||
|
||||
### Client Exceptions
|
||||
|
||||
- `PicoFeed\Client\ClientException`: Base exception class for the Client class
|
||||
- `PicoFeed\Client\InvalidCertificateException`: Invalid SSL certificate
|
||||
- `PicoFeed\Client\InvalidUrlException`: Malformed URL, page not found (404), unable to establish a connection
|
||||
- `PicoFeed\Client\MaxRedirectException`: Maximum of HTTP redirections reached
|
||||
- `PicoFeed\Client\MaxSizeException`: The response size exceeds to maximum allowed
|
||||
- `PicoFeed\Client\TimeoutException`: Connection timeout
|
||||
|
||||
### Parser Exceptions
|
||||
|
||||
- `PicoFeed\Parser\ParserException`: Base exception class for the Parser class
|
||||
- `PicoFeed\Parser\MalformedXmlException`: XML Parser error
|
||||
|
||||
### Reader Exceptions
|
||||
|
||||
- `PicoFeed\Reader\ReaderException`: Base exception class for the Reader
|
||||
- `PicoFeed\Reader\SubscriptionNotFoundException`: Unable to find a feed for the given website
|
||||
- `PicoFeed\Reader\UnsupportedFeedFormatException`: Unable to detect the feed format
|
|
@ -5,15 +5,13 @@ Find and download the favicon
|
|||
-----------------------------
|
||||
|
||||
```php
|
||||
|
||||
use PicoFeed\Favicon;
|
||||
use PicoFeed\Client\Favicon;
|
||||
|
||||
$favicon = new Favicon;
|
||||
|
||||
// The icon link is https://bits.wikimedia.org/favicon/wikipedia.ico
|
||||
$icon_link = $favicon->find('https://en.wikipedia.org/');
|
||||
$icon_content = $favicon->getContent();
|
||||
|
||||
```
|
||||
|
||||
PicoFeed will try first to find the favicon from the meta tags and fallback to the `favicon.ico` located in the website's root if nothing is found.
|
||||
|
@ -27,14 +25,12 @@ Check if a favicon link exists
|
|||
------------------------------
|
||||
|
||||
```php
|
||||
|
||||
use PicoFeed\Favicon;
|
||||
use PicoFeed\Client\Favicon;
|
||||
|
||||
$favicon = new Favicon;
|
||||
|
||||
// Return true if the file exists
|
||||
var_dump($favicon->exists('http://php.net/favicon.ico'));
|
||||
|
||||
```
|
||||
|
||||
Use personalized HTTP settings
|
||||
|
@ -43,14 +39,12 @@ Use personalized HTTP settings
|
|||
Like other classes, the Favicon class support the Config object as constructor argument:
|
||||
|
||||
```php
|
||||
|
||||
use PicoFeed\Config;
|
||||
use PicoFeed\Favicon;
|
||||
use PicoFeed\Config\Config;
|
||||
use PicoFeed\Client\Favicon;
|
||||
|
||||
$config = new Config;
|
||||
$config->setClientUserAgent('My RSS Reader');
|
||||
|
||||
$favicon = new Favicon($config);
|
||||
$favicon->find('https://github.com');
|
||||
|
||||
```
|
|
@ -1,11 +1,13 @@
|
|||
Feed creation
|
||||
=============
|
||||
|
||||
PicoFeed can also generate Atom and RSS feeds.
|
||||
|
||||
Generate RSS 2.0 feed
|
||||
----------------------
|
||||
|
||||
```php
|
||||
use PicoFeed\Writers\Rss20;
|
||||
use PicoFeed\Syndication\Rss20;
|
||||
|
||||
$writer = new Rss20();
|
||||
$writer->title = 'My site';
|
||||
|
@ -48,7 +50,7 @@ Generate Atom feed
|
|||
------------------
|
||||
|
||||
```php
|
||||
use PicoFeed\Writers\Atom;
|
||||
use PicoFeed\Syndication\Atom;
|
||||
|
||||
$writer = new Atom();
|
||||
$writer->title = 'My site';
|
||||
|
|
|
@ -13,7 +13,7 @@ try {
|
|||
$reader = new Reader;
|
||||
|
||||
// Return a resource
|
||||
$resource = $reader->download('https://linuxfr.org/news.atom');
|
||||
$resource = $reader->download('http://linuxfr.org/news.atom');
|
||||
|
||||
// Return the right parser instance according to the feed format
|
||||
$parser = $reader->getParser(
|
||||
|
@ -59,16 +59,6 @@ Item::author = Syvolc
|
|||
Item::enclosure_url =
|
||||
Item::enclosure_type =
|
||||
Item::content = 18307 bytes
|
||||
----
|
||||
Item::id = d0ebddc90bfc3f109f9be00a3bb0b4a770af7a647cdc88454fe15d79168e0dea
|
||||
Item::title = Fuzix OS, parce que les petites choses sont belles
|
||||
Item::url = http://linuxfr.org/news/fuzix-os-parce-que-les-petites-choses-sont-belles
|
||||
Item::date = 1415112167
|
||||
Item::language = en-US
|
||||
Item::author = Thomas DEBESSE
|
||||
Item::enclosure_url =
|
||||
Item::enclosure_type =
|
||||
Item::content = 6104 bytes
|
||||
....
|
||||
```
|
||||
|
||||
|
@ -134,7 +124,54 @@ catch (PicoFeedException $e) {
|
|||
HTTP caching
|
||||
------------
|
||||
|
||||
TODO
|
||||
PicoFeed supports HTTP caching to avoid unnecessary processing.
|
||||
|
||||
1. After the first download, save in your database the values of the Etag and LastModified HTTP headers
|
||||
2. For the next requests, provide those values to the `download()` method and check if the feed was modified or not
|
||||
|
||||
Here an example:
|
||||
|
||||
```php
|
||||
try {
|
||||
|
||||
// Fetch from your database the previous values of the Etag and LastModified headers
|
||||
$etag = '...';
|
||||
$last_modified = '...';
|
||||
|
||||
$reader = new Reader;
|
||||
|
||||
// Provide those values to the download method
|
||||
$resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag);
|
||||
|
||||
// Return true if the remote content has changed
|
||||
if ($resource->isModified()) {
|
||||
|
||||
$parser = $reader->getParser(
|
||||
$resource->getUrl(),
|
||||
$resource->getContent(),
|
||||
$resource->getEncoding()
|
||||
);
|
||||
|
||||
$feed = $parser->execute();
|
||||
|
||||
// Save your feed in your database
|
||||
// ...
|
||||
|
||||
// Store the Etag and the LastModified headers in your database for the next requests
|
||||
$etag = $resource->getEtag();
|
||||
$last_modified = $resource->getLastModified();
|
||||
|
||||
// ...
|
||||
}
|
||||
else {
|
||||
|
||||
echo 'Not modified, nothing to do!';
|
||||
}
|
||||
}
|
||||
catch (PicoFeedException $e) {
|
||||
// Do something...
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Feed and item properties
|
||||
|
@ -162,3 +199,24 @@ $feed->items[0]->getEnclosureUrl(); // Enclosure url
|
|||
$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...)
|
||||
$feed->items[0]->getContent(); // Item content (filtered or raw)
|
||||
```
|
||||
|
||||
RTL language detection
|
||||
----------------------
|
||||
|
||||
There is an utility method to determine if a language code is Right-To-Left or not:
|
||||
|
||||
```php
|
||||
// Return true if RTL
|
||||
Parser::isLanguageRTL($item->getLanguage());
|
||||
```
|
||||
|
||||
Known RTL languages are:
|
||||
|
||||
- Arabic (ar-**)
|
||||
- Farsi (fa-**)
|
||||
- Urdu (ur-**)
|
||||
- Pashtu (ps-**)
|
||||
- Syriac (syr-**)
|
||||
- Divehi (dv-**)
|
||||
- Hebrew (he-**)
|
||||
- Yiddish (yi-**)
|
||||
|
|
|
@ -6,33 +6,48 @@ The web scraper is useful for feeds that display only a summary of articles, the
|
|||
How the content grabber works?
|
||||
------------------------------
|
||||
|
||||
1. Try with rules first (xpath patterns) for the domain name (see `PicoFeed\Rules\`)
|
||||
1. Try with rules first (XPath queries) for the domain name (see `PicoFeed\Rules\`)
|
||||
2. Try to find the text content by using common attributes for class and id
|
||||
3. Finally, if nothing is found, the feed content is displayed
|
||||
|
||||
**The best results are obtained with Xpath rules file.**
|
||||
**The best results are obtained with XPath rules file.**
|
||||
|
||||
How to use the content scraper?
|
||||
-------------------------------
|
||||
|
||||
Before parsing all items, just call the method `$parser->enableContentGrabber()`:
|
||||
|
||||
```php
|
||||
use PicoFeed\Reader;
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
$reader = new Reader;
|
||||
$reader->download('http://www.egscomics.com/rss.php');
|
||||
try {
|
||||
|
||||
$parser = $reader->getParser();
|
||||
$reader = new Reader;
|
||||
|
||||
if ($parser !== false) {
|
||||
// Return a resource
|
||||
$resource = $reader->download('http://www.egscomics.com/rss.php');
|
||||
|
||||
$parser->enableContentGrabber(); // <= Enable the content grabber
|
||||
// Return the right parser instance according to the feed format
|
||||
$parser = $reader->getParser(
|
||||
$resource->getUrl(),
|
||||
$resource->getContent(),
|
||||
$resource->getEncoding()
|
||||
);
|
||||
|
||||
// Enable content grabber before parsing items
|
||||
$parser->enableContentGrabber();
|
||||
|
||||
// Return a Feed object
|
||||
$feed = $parser->execute();
|
||||
// ...
|
||||
}
|
||||
catch (PicoFeedException $e) {
|
||||
// Do Something...
|
||||
}
|
||||
```
|
||||
|
||||
When the content scraper is enabled, everything will be slower.
|
||||
For each item a new HTTP request is made and the HTML downloaded is parsed with XML/Xpath.
|
||||
**For each item a new HTTP request is made** and the HTML downloaded is parsed with XML/XPath.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
|
|
@ -4,12 +4,14 @@ Installation
|
|||
Versions
|
||||
--------
|
||||
|
||||
- Development version: branch master
|
||||
- Development version: master
|
||||
- Available versions:
|
||||
- v0.1.0 (stable)
|
||||
- v0.0.2
|
||||
- v0.0.1
|
||||
|
||||
Note: The public API has changed between 0.0.x and 0.1.0
|
||||
|
||||
Installation with Composer
|
||||
--------------------------
|
||||
|
||||
|
@ -35,7 +37,7 @@ And download the code:
|
|||
composer install # or update
|
||||
```
|
||||
|
||||
Usage example with the Composer autoloading:
|
||||
Usage example with the Composer autoloader:
|
||||
|
||||
```php
|
||||
<?php
|
||||
|
@ -47,7 +49,7 @@ use PicoFeed\Reader\Reader;
|
|||
try {
|
||||
|
||||
$reader = new Reader;
|
||||
$resource = $reader->download('https://linuxfr.org/news.atom');
|
||||
$resource = $reader->download('http://linuxfr.org/news.atom');
|
||||
|
||||
$parser = $reader->getParser(
|
||||
$resource->getUrl(),
|
||||
|
|
|
@ -5,7 +5,7 @@ Example with no categories
|
|||
--------------------------
|
||||
|
||||
```php
|
||||
use PicoFeed\Export;
|
||||
use PicoFeed\Serialization\Export;
|
||||
|
||||
$feeds = array(
|
||||
array(
|
||||
|
@ -26,7 +26,7 @@ Example with categories
|
|||
-----------------------
|
||||
|
||||
```php
|
||||
use PicoFeed\Export;
|
||||
use PicoFeed\Serialization\Export;
|
||||
|
||||
$feeds = array(
|
||||
'my category' => array(
|
||||
|
|
|
@ -4,7 +4,7 @@ Import OPML file
|
|||
Importing a list of subscriptions is pretty straightforward:
|
||||
|
||||
```php
|
||||
use PicoFeed\Import;
|
||||
use PicoFeed\Serialization\Import;
|
||||
|
||||
$opml = file_get_contents('mySubscriptions.opml');
|
||||
$import = new Import($opml);
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
|
||||
require 'vendor/autoload.php';
|
||||
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
try {
|
||||
|
||||
// Fetch from your database the previous values of the Etag and LastModified headers
|
||||
$etag = '...';
|
||||
$last_modified = '...';
|
||||
|
||||
$reader = new Reader;
|
||||
|
||||
// Provide those values to the download method
|
||||
$resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag);
|
||||
|
||||
if ($resource->isModified()) {
|
||||
|
||||
$parser = $reader->getParser(
|
||||
$resource->getUrl(),
|
||||
$resource->getContent(),
|
||||
$resource->getEncoding()
|
||||
);
|
||||
|
||||
$feed = $parser->execute();
|
||||
|
||||
// Save your feed in your database
|
||||
// ...
|
||||
|
||||
// Store the Etag and the LastModified headers in your database
|
||||
$etag = $resource->getEtag();
|
||||
$last_modified = $resource->getLastModified();
|
||||
|
||||
// ...
|
||||
}
|
||||
else {
|
||||
|
||||
echo 'Not modified, nothing to do!';
|
||||
}
|
||||
}
|
||||
catch (PicoFeedException $e) {
|
||||
// Do something...
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
namespace PicoFeed\Client;
|
||||
|
||||
use LogicException;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Client class
|
||||
|
@ -166,9 +166,9 @@ abstract class Client
|
|||
$this->url = $url;
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().' Fetch URL: '.$this->url);
|
||||
Logging::setMessage(get_called_class().' Etag provided: '.$this->etag);
|
||||
Logging::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
|
||||
Logger::setMessage(get_called_class().' Fetch URL: '.$this->url);
|
||||
Logger::setMessage(get_called_class().' Etag provided: '.$this->etag);
|
||||
Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
|
||||
|
||||
$response = $this->doRequest();
|
||||
|
||||
|
@ -204,7 +204,7 @@ abstract class Client
|
|||
}
|
||||
|
||||
if ($this->is_modified === false) {
|
||||
Logging::setMessage(get_called_class().' Resource not modified');
|
||||
Logger::setMessage(get_called_class().' Resource not modified');
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,10 +297,10 @@ abstract class Client
|
|||
}
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().' HTTP status code: '.$status);
|
||||
Logger::setMessage(get_called_class().' HTTP status code: '.$status);
|
||||
|
||||
foreach ($headers as $name => $value) {
|
||||
Logging::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
|
||||
Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
|
||||
}
|
||||
|
||||
return array($status, $headers);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* cURL HTTP client
|
||||
|
@ -129,18 +129,18 @@ class Curl extends Client
|
|||
{
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
|
||||
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
|
||||
curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname);
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: No');
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,16 +185,16 @@ class Curl extends Client
|
|||
$ch = $this->prepareContext();
|
||||
curl_exec($ch);
|
||||
|
||||
Logging::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
|
||||
Logging::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
|
||||
Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
|
||||
Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
|
||||
$curl_errno = curl_errno($ch);
|
||||
|
||||
if ($curl_errno) {
|
||||
Logging::setMessage(get_called_class().' cURL error: '.curl_error($ch));
|
||||
Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch));
|
||||
curl_close($ch);
|
||||
|
||||
$this->handleError($curl_errno);
|
||||
|
@ -246,11 +246,12 @@ class Curl extends Client
|
|||
*
|
||||
* @access private
|
||||
* @param string $location Redirected URL
|
||||
* @return boolean|array
|
||||
* @return array
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
{
|
||||
$nb_redirects = 0;
|
||||
$result = array();
|
||||
$this->url = $location;
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
|
@ -262,7 +263,7 @@ class Curl extends Client
|
|||
$nb_redirects++;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
return false;
|
||||
throw new MaxRedirectException('Maximum number of redirections reached');
|
||||
}
|
||||
|
||||
$result = $this->doRequest(false);
|
||||
|
@ -275,11 +276,11 @@ class Curl extends Client
|
|||
$this->headers_counter = 0;
|
||||
}
|
||||
else {
|
||||
return $result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -5,7 +5,7 @@ namespace PicoFeed\Client;
|
|||
use DOMXpath;
|
||||
|
||||
use PicoFeed\Config\Config;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
|
@ -67,7 +67,7 @@ class Favicon
|
|||
{
|
||||
try {
|
||||
|
||||
Logging::setMessage(get_called_class().' Download => '.$url);
|
||||
Logger::setMessage(get_called_class().' Download => '.$url);
|
||||
|
||||
$client = Client::getInstance();
|
||||
$client->setConfig($this->config);
|
||||
|
|
|
@ -5,7 +5,7 @@ namespace PicoFeed\Client;
|
|||
use DOMXPath;
|
||||
|
||||
use PicoFeed\Encoding\Encoding;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
|
@ -147,7 +147,7 @@ class Grabber
|
|||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
* @return \PicoFeed\Grabber
|
||||
*/
|
||||
public function setConfig($config)
|
||||
|
@ -188,30 +188,30 @@ class Grabber
|
|||
{
|
||||
if ($this->html) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Fix encoding');
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||
Logger::setMessage(get_called_class().' Fix encoding');
|
||||
Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||
|
||||
$this->html = Filter::stripHeadTags($this->html);
|
||||
$this->html = Encoding::convert($this->html, $this->encoding);
|
||||
|
||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||
Logger::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||
$rules = $this->getRules();
|
||||
|
||||
if (is_array($rules)) {
|
||||
Logging::setMessage(get_called_class().' Parse content with rules');
|
||||
Logger::setMessage(get_called_class().' Parse content with rules');
|
||||
$this->parseContentWithRules($rules);
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Parse content with candidates');
|
||||
Logger::setMessage(get_called_class().' Parse content with candidates');
|
||||
$this->parseContentWithCandidates();
|
||||
}
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' No content fetched');
|
||||
Logger::setMessage(get_called_class().' No content fetched');
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->content).' bytes');
|
||||
Logging::setMessage(get_called_class().' Grabber done');
|
||||
Logger::setMessage(get_called_class().' Content length: '.strlen($this->content).' bytes');
|
||||
Logger::setMessage(get_called_class().' Grabber done');
|
||||
|
||||
return $this->content !== '';
|
||||
}
|
||||
|
@ -260,14 +260,12 @@ class Grabber
|
|||
$files[] = substr($hostname, 0, $pos);
|
||||
}
|
||||
|
||||
// Logging::setMessage(var_export($files, true));
|
||||
|
||||
foreach ($files as $file) {
|
||||
|
||||
$filename = __DIR__.'/../Rules/'.$file.'.php';
|
||||
|
||||
if (file_exists($filename)) {
|
||||
Logging::setMessage(get_called_class().' Load rule: '.$file);
|
||||
Logger::setMessage(get_called_class().' Load rule: '.$file);
|
||||
return include $filename;
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +281,7 @@ class Grabber
|
|||
*/
|
||||
public function parseContentWithRules(array $rules)
|
||||
{
|
||||
// Logging::setMessage($this->html);
|
||||
// Logger::setMessage($this->html);
|
||||
$dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
|
@ -329,13 +327,13 @@ class Grabber
|
|||
// Try to lookup in each tag
|
||||
foreach ($this->candidatesAttributes as $candidate) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Try this candidate: "'.$candidate.'"');
|
||||
Logger::setMessage(get_called_class().' Try this candidate: "'.$candidate.'"');
|
||||
|
||||
$nodes = $xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
$this->content = $dom->saveXML($nodes->item(0));
|
||||
Logging::setMessage(get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
|
||||
Logger::setMessage(get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -347,16 +345,16 @@ class Grabber
|
|||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
$this->content = $dom->saveXML($nodes->item(0));
|
||||
Logging::setMessage(get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
|
||||
Logger::setMessage(get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
|
||||
}
|
||||
}
|
||||
|
||||
if (strlen($this->content) < 50) {
|
||||
Logging::setMessage(get_called_class().' No enought content fetched, get the full body');
|
||||
Logger::setMessage(get_called_class().' No enought content fetched, get the full body');
|
||||
$this->content = $dom->saveXML($dom->firstChild);
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().' Strip garbage');
|
||||
Logger::setMessage(get_called_class().' Strip garbage');
|
||||
$this->stripGarbage();
|
||||
}
|
||||
|
||||
|
@ -378,7 +376,7 @@ class Grabber
|
|||
$nodes = $xpath->query('//'.$tag);
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
Logging::setMessage(get_called_class().' Strip tag: "'.$tag.'"');
|
||||
Logger::setMessage(get_called_class().' Strip tag: "'.$tag.'"');
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
|
@ -390,7 +388,7 @@ class Grabber
|
|||
$nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]');
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
Logging::setMessage(get_called_class().' Strip attribute: "'.$attribute.'"');
|
||||
Logger::setMessage(get_called_class().' Strip attribute: "'.$attribute.'"');
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Stream context HTTP client
|
||||
|
@ -63,16 +63,16 @@ class Stream extends Client
|
|||
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
$context['http']['request_fulluri'] = true;
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: No');
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -132,6 +132,11 @@ class Html
|
|||
return $this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after XML parsing
|
||||
*
|
||||
* @access public
|
||||
*/
|
||||
public function postFilter()
|
||||
{
|
||||
$this->output = $this->tag->removeEmptyTags($this->output);
|
||||
|
|
|
@ -11,7 +11,7 @@ use DateTimeZone;
|
|||
* @author Frederic Guillot
|
||||
* @package Logging
|
||||
*/
|
||||
class Logging
|
||||
class Logger
|
||||
{
|
||||
/**
|
||||
* List of messages
|
|
@ -3,7 +3,6 @@
|
|||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ use DateTimeZone;
|
|||
|
||||
use PicoFeed\Encoding\Encoding;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Client\Grabber;
|
||||
|
||||
|
@ -109,7 +109,7 @@ abstract class Parser
|
|||
$this->content = Filter::stripXmlTag($content);
|
||||
|
||||
// Encode everything in UTF-8
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
|
||||
|
||||
// Workarounds
|
||||
|
@ -124,13 +124,13 @@ abstract class Parser
|
|||
*/
|
||||
public function execute()
|
||||
{
|
||||
Logging::setMessage(get_called_class().': begin parsing');
|
||||
Logger::setMessage(get_called_class().': begin parsing');
|
||||
|
||||
$xml = XmlParser::getSimpleXml($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
Logging::setMessage(get_called_class().': XML parsing error');
|
||||
Logging::setMessage(XmlParser::getErrors());
|
||||
Logger::setMessage(get_called_class().': XML parsing error');
|
||||
Logger::setMessage(XmlParser::getErrors());
|
||||
throw new MalformedXmlException('XML parsing error');
|
||||
}
|
||||
|
||||
|
@ -172,7 +172,7 @@ abstract class Parser
|
|||
$feed->items[] = $item;
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().PHP_EOL.$feed);
|
||||
Logger::setMessage(get_called_class().PHP_EOL.$feed);
|
||||
|
||||
return $feed;
|
||||
}
|
||||
|
@ -243,7 +243,7 @@ abstract class Parser
|
|||
$item->content = $filter->execute();
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().': Content filtering disabled');
|
||||
Logger::setMessage(get_called_class().': Content filtering disabled');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
namespace PicoFeed\Parser;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ use DOMXPath;
|
|||
use PicoFeed\Config\Config;
|
||||
use PicoFeed\Client\Client;
|
||||
use PicoFeed\Client\Url;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
|
@ -50,7 +50,7 @@ class Reader
|
|||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->config = $config ?: new Config;
|
||||
Logging::setTimezone($this->config->getTimezone());
|
||||
Logger::setTimezone($this->config->getTimezone());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -111,7 +111,7 @@ class Reader
|
|||
*/
|
||||
public function find($url, $html)
|
||||
{
|
||||
Logging::setMessage(get_called_class().': Try to discover subscriptions');
|
||||
Logger::setMessage(get_called_class().': Try to discover subscriptions');
|
||||
|
||||
$dom = XmlParser::getHtmlDocument($html);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
@ -140,7 +140,7 @@ class Reader
|
|||
}
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().': '.implode(', ', $links));
|
||||
Logger::setMessage(get_called_class().': '.implode(', ', $links));
|
||||
|
||||
return $links;
|
||||
}
|
||||
|
@ -183,7 +183,7 @@ class Reader
|
|||
{
|
||||
$first_tag = Filter::getFirstTag($content);
|
||||
|
||||
Logging::setMessage(get_called_class().': DetectFormat(): '.$first_tag);
|
||||
Logger::setMessage(get_called_class().': DetectFormat(): '.$first_tag);
|
||||
|
||||
foreach ($this->formats as $parser => $needles) {
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
|
||||
'body' => array(
|
||||
'//div[@class="contenu"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[contains(@class, "a2a")]'
|
||||
),
|
||||
);
|
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/',
|
||||
'body' => array(
|
||||
'//section[@class="article_body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
);
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155',
|
||||
'body' => array(
|
||||
'/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]'
|
||||
),
|
||||
'strip' => array(
|
||||
'//style',
|
||||
'//font',
|
||||
),
|
||||
);
|
|
@ -5,7 +5,7 @@ namespace PicoFeed\Serialization;
|
|||
use SimpleXmlElement;
|
||||
use StdClass;
|
||||
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
|
@ -51,17 +51,17 @@ class Import
|
|||
*/
|
||||
public function execute()
|
||||
{
|
||||
Logging::setMessage(get_called_class().': start importation');
|
||||
Logger::setMessage(get_called_class().': start importation');
|
||||
|
||||
$xml = XmlParser::getSimpleXml(trim($this->content));
|
||||
|
||||
if ($xml === false || $xml->getName() !== 'opml' || ! isset($xml->body)) {
|
||||
Logging::setMessage(get_called_class().': OPML tag not found or malformed XML document');
|
||||
Logger::setMessage(get_called_class().': OPML tag not found or malformed XML document');
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->parseEntries($xml->body);
|
||||
Logging::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
|
||||
Logger::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
|
||||
|
||||
return $this->items;
|
||||
}
|
||||
|
|
|
@ -70,7 +70,7 @@ class Rss20 extends Writer
|
|||
|
||||
// <description/>
|
||||
$description = $this->dom->createElement('description');
|
||||
$description->appendChild($this->dom->createTextNode(isset($this->description) ? $this->description : $this->title));
|
||||
$description->appendChild($this->dom->createTextNode($this->description ?: $this->title));
|
||||
$channel->appendChild($description);
|
||||
|
||||
// <pubDate/>
|
||||
|
|
|
@ -9,7 +9,6 @@ use RuntimeException;
|
|||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Syndication
|
||||
* @property string $description Feed description
|
||||
*/
|
||||
abstract class Writer
|
||||
{
|
||||
|
@ -61,6 +60,14 @@ abstract class Writer
|
|||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Feed description
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $description = '';
|
||||
|
||||
/**
|
||||
* Feed modification date (timestamp)
|
||||
*
|
||||
|
|
|
@ -1,32 +1,35 @@
|
|||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
require_once './vendor/autoload.php';
|
||||
require_once 'vendor/autoload.php';
|
||||
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\Logging\Logging;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
function get_feed($url, $disable_filtering = false)
|
||||
{
|
||||
$reader = new Reader;
|
||||
$reader->download($url);
|
||||
try {
|
||||
|
||||
$parser = $reader->getParser();
|
||||
$reader = new Reader;
|
||||
$resource = $reader->discover($url);
|
||||
|
||||
if ($disable_filtering) {
|
||||
$parser->disableContentFiltering();
|
||||
}
|
||||
$parser = $reader->getParser(
|
||||
$resource->getUrl(),
|
||||
$resource->getContent(),
|
||||
$resource->getEncoding()
|
||||
);
|
||||
|
||||
if ($parser !== false) {
|
||||
|
||||
$feed = $parser->execute();
|
||||
|
||||
if ($feed !== false) {
|
||||
return $feed;
|
||||
if ($disable_filtering) {
|
||||
$parser->disableContentFiltering();
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return $parser->execute();
|
||||
}
|
||||
catch (PicoFeedException $e) {
|
||||
echo 'Exception thrown ===> "'.$e->getMessage().'"'.PHP_EOL;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function get_item($feed, $item_id)
|
||||
|
@ -45,42 +48,34 @@ function get_item($feed, $item_id)
|
|||
function dump_feed($url)
|
||||
{
|
||||
$feed = get_feed($url);
|
||||
|
||||
if ($feed === false) {
|
||||
die("Unable to fetch the feed\n");
|
||||
}
|
||||
|
||||
echo $feed;
|
||||
}
|
||||
|
||||
function debug_feed($url)
|
||||
{
|
||||
get_feed($url);
|
||||
print_r(Logging::getMessages());
|
||||
print_r(Logger::getMessages());
|
||||
}
|
||||
|
||||
function dump_item($url, $item_id)
|
||||
{
|
||||
$feed = get_feed($url);
|
||||
|
||||
if ($feed === false) {
|
||||
die("Unable to fetch the feed\n");
|
||||
if ($feed !== false) {
|
||||
get_item($feed, $item_id);
|
||||
}
|
||||
|
||||
get_item($feed, $item_id);
|
||||
}
|
||||
|
||||
function nofilter_item($url, $item_id)
|
||||
{
|
||||
$feed = get_feed($url, true);
|
||||
|
||||
if ($feed === false) {
|
||||
die("Unable to fetch the feed\n");
|
||||
if ($feed !== false) {
|
||||
get_item($feed, $item_id);
|
||||
}
|
||||
|
||||
get_item($feed, $item_id);
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
if ($argc === 4) {
|
||||
switch ($argv[1]) {
|
||||
case 'item':
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PHPUnit_Framework_TestCase;
|
||||
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\Logging\Logging;
|
||||
|
||||
class GrabberTest extends PHPUnit_Framework_TestCase
|
||||
{
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
<?php
|
||||
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
use PHPUnit_Framework_TestCase;
|
||||
|
||||
|
||||
class UrlTest extends PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testHasScheme()
|
||||
|
|
Загрузка…
Ссылка в новой задаче