diff options
author | jenkins-bot <jenkins-bot@gerrit.wikimedia.org> | 2021-12-23 12:44:22 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@wikimedia.org> | 2021-12-23 12:44:22 +0000 |
commit | d974510c56c7d9abe96a5a3ca0a7cf34237896ee (patch) | |
tree | 539a3abe0f887242ecd9e1d7c30f085778361a5d | |
parent | 95fe8522bee580847200f928068dcac327b53518 (diff) | |
parent | 8e06927190922cffa27af1fab845de44765c442a (diff) |
Merge "Make Sanitizer::stripAllTags() strip css and js tag contents"
-rw-r--r-- | includes/parser/RemexStripTagHandler.php | 33 | ||||
-rw-r--r-- | tests/phpunit/unit/includes/parser/SanitizerUnitTest.php | 3 |
2 files changed, 35 insertions, 1 deletions
diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php index 1a1fefaeab96..ca7f290b39d8 100644 --- a/includes/parser/RemexStripTagHandler.php +++ b/includes/parser/RemexStripTagHandler.php @@ -7,6 +7,7 @@ use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler; * @internal */ class RemexStripTagHandler extends NullTokenHandler { + private $insideNonVisibleTag = false; private $text = ''; public function getResult() { @@ -14,10 +15,15 @@ class RemexStripTagHandler extends NullTokenHandler { } public function characters( $text, $start, $length, $sourceStart, $sourceLength ) { - $this->text .= substr( $text, $start, $length ); + if ( !$this->insideNonVisibleTag ) { + $this->text .= substr( $text, $start, $length ); + } } public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { + if ( $this->isNonVisibleTag( $name ) ) { + $this->insideNonVisibleTag = true; + } // Inject whitespace for typical block-level tags to // prevent merging unrelated<br>words. if ( $this->isBlockLevelTag( $name ) ) { @@ -26,6 +32,9 @@ class RemexStripTagHandler extends NullTokenHandler { } public function endTag( $name, $sourceStart, $sourceLength ) { + if ( $this->isNonVisibleTag( $name ) ) { + $this->insideNonVisibleTag = false; + } // Inject whitespace for typical block-level tags to // prevent merging unrelated<br>words. if ( $this->isBlockLevelTag( $name ) ) { @@ -93,4 +102,26 @@ class RemexStripTagHandler extends NullTokenHandler { $key = strtolower( trim( $tagName ) ); return isset( self::BLOCK_LEVEL_TAGS[$key] ); } + + private const NON_VISIBLE_TAGS = [ + 'style' => true, + 'script' => true, + ]; + + /** + * Detect block tags which by default are non-visible items. + * Of course css can make anything non-visible, + * but this is still better than nothing. + * + * We use this primarily to hide TemplateStyles + * from output in notifications/emails etc. + * + * @param string $tagName HTML tag name + * @return bool True when tag is a html element which should be filtered out + */ + private function isNonVisibleTag( $tagName ) { + $key = strtolower( trim( $tagName ) ); + return isset( self::NON_VISIBLE_TAGS[$key] ); + } + } diff --git a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php index 82c73f76343f..daf3d22aaff9 100644 --- a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php +++ b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php @@ -246,6 +246,9 @@ class SanitizerUnitTest extends MediaWikiUnitTestCase { [ '1<span class="<?php">2</span>3', '123' ], [ '1<span class="<?">2</span>3', '123' ], [ '<th>1</th><td>2</td>', '1 2' ], + [ '<style>.hello { display: block; }</style>', '' ], + [ 'Foo<style>p { color: red; }</style>Bar', 'FooBar' ], + [ '<script>var test = true;</script>', '' ], ]; } |