This commit is contained in:
Bernhard Posselt 2015-02-24 09:31:49 +01:00
Родитель 4ca9f79b1c
Коммит 3195dfe402
7 изменённых файлов: 82 добавлений и 58 удалений

8
composer.lock сгенерированный
Просмотреть файл

@ -57,12 +57,12 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "6f0ac9238dcb0899c8322933e7f4598890ecf744"
"reference": "aa83e0c66525251cb6c6acab3babbc9e1879527b"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6f0ac9238dcb0899c8322933e7f4598890ecf744",
"reference": "6f0ac9238dcb0899c8322933e7f4598890ecf744",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/aa83e0c66525251cb6c6acab3babbc9e1879527b",
"reference": "aa83e0c66525251cb6c6acab3babbc9e1879527b",
"shasum": ""
},
"require": {
@ -91,7 +91,7 @@
],
"description": "Modern library to write or read feeds (RSS/Atom)",
"homepage": "http://fguillot.github.io/picoFeed",
"time": "2015-02-18 02:47:12"
"time": "2015-02-24 03:00:15"
},
{
"name": "pear/net_url2",

2
vendor/autoload.php поставляемый
Просмотреть файл

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9::getLoader();
return ComposerAutoloaderInitfee7a50d745041658cc0946610dc5951::getLoader();

10
vendor/composer/autoload_real.php поставляемый
Просмотреть файл

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9
class ComposerAutoloaderInitfee7a50d745041658cc0946610dc5951
{
private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9
return self::$loader;
}
spl_autoload_register(array('ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInitfee7a50d745041658cc0946610dc5951', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInitfee7a50d745041658cc0946610dc5951', 'loadClassLoader'));
$includePaths = require __DIR__ . '/include_paths.php';
array_push($includePaths, get_include_path());
@ -46,14 +46,14 @@ class ComposerAutoloaderInit373583df083e6f26edc2b67c5b6380e9
$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequire373583df083e6f26edc2b67c5b6380e9($file);
composerRequirefee7a50d745041658cc0946610dc5951($file);
}
return $loader;
}
}
function composerRequire373583df083e6f26edc2b67c5b6380e9($file)
function composerRequirefee7a50d745041658cc0946610dc5951($file)
{
require $file;
}

8
vendor/composer/installed.json поставляемый
Просмотреть файл

@ -119,12 +119,12 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "6f0ac9238dcb0899c8322933e7f4598890ecf744"
"reference": "aa83e0c66525251cb6c6acab3babbc9e1879527b"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6f0ac9238dcb0899c8322933e7f4598890ecf744",
"reference": "6f0ac9238dcb0899c8322933e7f4598890ecf744",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/aa83e0c66525251cb6c6acab3babbc9e1879527b",
"reference": "aa83e0c66525251cb6c6acab3babbc9e1879527b",
"shasum": ""
},
"require": {
@ -135,7 +135,7 @@
"ext-xml": "*",
"php": ">=5.3.0"
},
"time": "2015-02-18 02:47:12",
"time": "2015-02-24 03:00:15",
"type": "library",
"installation-source": "dist",
"autoload": {

Просмотреть файл

@ -121,6 +121,8 @@ class Html
*/
public function execute()
{
$this->preFilter();
$parser = xml_parser_create();
xml_set_object($parser, $this);
@ -135,6 +137,16 @@ class Html
return $this->output;
}
/**
* Called before XML parsing
*
* @access public
*/
public function preFilter()
{
$this->input = $this->tag->removeBlacklistedTags($this->input);
}
/**
* Called after XML parsing
*

Просмотреть файл

@ -2,6 +2,9 @@
namespace PicoFeed\Filter;
use DOMXpath;
use PicoFeed\Parser\XmlParser;
/**
* Tag Filter class
*
@ -10,6 +13,17 @@ namespace PicoFeed\Filter;
*/
class Tag
{
/**
* Tags blacklist (Xpath expressions)
*
* @access private
* @var array
*/
private $tag_blacklist = array(
'//script',
'//style',
);
/**
* Tags whitelist
*
@ -104,7 +118,7 @@ class Tag
*/
public function isSelfClosingTag($tag)
{
return in_array($tag, array('br', 'img'));
return $tag === 'br' || $tag === 'img';
}
/**
@ -134,6 +148,28 @@ class Tag
$attributes['height'] == 1 && $attributes['width'] == 1;
}
/**
* Remove script tags
*
* @access public
* @param string $data Input data
* @return string
*/
public function removeBlacklistedTags($data)
{
$dom = XmlParser::getDomDocument($data);
$xpath = new DOMXpath($dom);
$nodes = $xpath->query(implode(' | ', $this->tag_blacklist));
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
return $dom->saveXML();
}
/**
* Remove empty tags
*

Просмотреть файл

@ -1,9 +1,9 @@
<?php
namespace PicoFeed\Filter;
use PHPUnit_Framework_TestCase;
class HtmlFilterTest extends PHPUnit_Framework_TestCase
{
public function testExecute()
@ -31,6 +31,24 @@ class HtmlFilterTest extends PHPUnit_Framework_TestCase
$this->assertEquals($expected, $f->execute());
}
public function testClearScriptAttributes()
{
$data = '<div><script>this is the content</script><script>blubb content</script><p>something</p></div><p>hi</p>';
$f = new Html($data, 'http://blabla');
$expected = '<p>something</p><p>hi</p>';
$this->assertEquals($expected, $f->execute());
}
public function testClearStyleAttributes()
{
$data = '<div><style>this is the content</style><style>blubb content</style><p>something</p></div><p>hi</p>';
$f = new Html($data, 'http://blabla');
$expected = '<p>something</p><p>hi</p>';
$this->assertEquals($expected, $f->execute());
}
public function testEmptyTags()
{
$data = <<<EOD
@ -124,46 +142,4 @@ x-amz-id-2: DDjqfqz2ZJufzqRAcj1mh+9XvSogrPohKHwXlo8IlkzH67G6w4wnjn9HYgbs4uI0
$f = new Html('<table><tr></tr></table>', 'http://blabla');
$this->assertEquals('', $f->execute());
}
/*
public function testFilter()
{
$input = <<<EOD
<div xmlns="http://www.w3.org/1999/xhtml"><article>
<figure>
<img src="/2014/08/06/4694-pluie" alt="Flaque de pluie"/>
<figcaption>La Saussaye, France, 6 août 2014</figcaption>
</figure>
<div lang="en" class="extrait">
<blockquote cite="urn:isbn:978-0-8248-3742-6">
<p>Spring had truly arrived. Countless streams suddenly materialized all over the roads, fields, grasslands, and thickets; flowing as if the melting snow's waters were spilling over. </p>
</blockquote>
<p class="source"><span class="auteur">Takiji Kobayashi</span>, <cite class="titre">Yasuko</cite>.</p>
</div>
<p>La pluie abonde. La forêt humide resplendit. L'eau monte, l'eau déborde. Il reste pourtant notre humanité. Toute entière, resplendissante.</p>
</article>
</div>
EOD;
$expected = <<<EOD
<figure>
<img src="http://www.la-grange.net/2014/08/06/4694-pluie" alt="Flaque de pluie"/>
<figcaption>La Saussaye, France, 6 août 2014</figcaption>
</figure>
<blockquote>
<p>Spring had truly arrived. Countless streams suddenly materialized all over the roads, fields, grasslands, and thickets; flowing as if the melting snow&#039;s waters were spilling over. </p>
</blockquote>
<p>Takiji Kobayashi, <cite>Yasuko</cite>.</p>
<p>La pluie abonde. La forêt humide resplendit. L&#039;eau monte, l&#039;eau déborde. Il reste pourtant notre humanité. Toute entière, resplendissante.</p>
EOD;
$f = new Html($input, 'http://www.la-grange.net/');
$this->assertEquals($expected, $f->execute());
}*/
}