summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerk-Jan Hartman <hartman.wiki@gmail.com>2021-12-22 00:22:46 +0100
committerTheDJ <hartman.wiki@gmail.com>2021-12-22 23:26:17 +0000
commit8e06927190922cffa27af1fab845de44765c442a (patch)
treef1a836b8185f1ad2150826bdecac3d9e698f8fa2
parent22ece356e49cb6bd1a884a4d4a59ec0f00f5dcc5 (diff)
Make Sanitizer::stripAllTags() strip css and js tag contents
We use Sanitizer::stripAllTags primarily to remove formatting from html so that we can use it in places like notifications, emails, search result blurbs etc etc. It is very unlikely we want the raw contents of css and/or js tags anywhere in those places, so lets surpress that content, to make it more readable as template styles are showing up in more and more places. Bug: T228856 Change-Id: I7930361068ddcf3a6c2fdebd0177d142f025b64f
-rw-r--r--includes/parser/RemexStripTagHandler.php33
-rw-r--r--tests/phpunit/unit/includes/parser/SanitizerUnitTest.php3
2 files changed, 35 insertions, 1 deletions
diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php
index 1a1fefaeab96..ca7f290b39d8 100644
--- a/includes/parser/RemexStripTagHandler.php
+++ b/includes/parser/RemexStripTagHandler.php
@@ -7,6 +7,7 @@ use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
* @internal
*/
class RemexStripTagHandler extends NullTokenHandler {
+ private $insideNonVisibleTag = false;
private $text = '';
public function getResult() {
@@ -14,10 +15,15 @@ class RemexStripTagHandler extends NullTokenHandler {
}
public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
- $this->text .= substr( $text, $start, $length );
+ if ( !$this->insideNonVisibleTag ) {
+ $this->text .= substr( $text, $start, $length );
+ }
}
public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
+ if ( $this->isNonVisibleTag( $name ) ) {
+ $this->insideNonVisibleTag = true;
+ }
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
@@ -26,6 +32,9 @@ class RemexStripTagHandler extends NullTokenHandler {
}
public function endTag( $name, $sourceStart, $sourceLength ) {
+ if ( $this->isNonVisibleTag( $name ) ) {
+ $this->insideNonVisibleTag = false;
+ }
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
@@ -93,4 +102,26 @@ class RemexStripTagHandler extends NullTokenHandler {
$key = strtolower( trim( $tagName ) );
return isset( self::BLOCK_LEVEL_TAGS[$key] );
}
+
+ private const NON_VISIBLE_TAGS = [
+ 'style' => true,
+ 'script' => true,
+ ];
+
+ /**
+ * Detect block tags which by default are non-visible items.
+ * Of course css can make anything non-visible,
+ * but this is still better than nothing.
+ *
+ * We use this primarily to hide TemplateStyles
+ * from output in notifications/emails etc.
+ *
+ * @param string $tagName HTML tag name
+ * @return bool True when tag is a html element which should be filtered out
+ */
+ private function isNonVisibleTag( $tagName ) {
+ $key = strtolower( trim( $tagName ) );
+ return isset( self::NON_VISIBLE_TAGS[$key] );
+ }
+
}
diff --git a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
index 82c73f76343f..daf3d22aaff9 100644
--- a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
+++ b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
@@ -246,6 +246,9 @@ class SanitizerUnitTest extends MediaWikiUnitTestCase {
[ '1<span class="<?php">2</span>3', '123' ],
[ '1<span class="<?">2</span>3', '123' ],
[ '<th>1</th><td>2</td>', '1 2' ],
+ [ '<style>.hello { display: block; }</style>', '' ],
+ [ 'Foo<style>p { color: red; }</style>Bar', 'FooBar' ],
+ [ '<script>var test = true;</script>', '' ],
];
}