summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>2021-12-23 12:44:22 +0000
committerGerrit Code Review <gerrit@wikimedia.org>2021-12-23 12:44:22 +0000
commitd974510c56c7d9abe96a5a3ca0a7cf34237896ee (patch)
tree539a3abe0f887242ecd9e1d7c30f085778361a5d
parent95fe8522bee580847200f928068dcac327b53518 (diff)
parent8e06927190922cffa27af1fab845de44765c442a (diff)
Merge "Make Sanitizer::stripAllTags() strip css and js tag contents"
-rw-r--r--includes/parser/RemexStripTagHandler.php33
-rw-r--r--tests/phpunit/unit/includes/parser/SanitizerUnitTest.php3
2 files changed, 35 insertions, 1 deletions
diff --git a/includes/parser/RemexStripTagHandler.php b/includes/parser/RemexStripTagHandler.php
index 1a1fefaeab96..ca7f290b39d8 100644
--- a/includes/parser/RemexStripTagHandler.php
+++ b/includes/parser/RemexStripTagHandler.php
@@ -7,6 +7,7 @@ use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
* @internal
*/
class RemexStripTagHandler extends NullTokenHandler {
+ private $insideNonVisibleTag = false;
private $text = '';
public function getResult() {
@@ -14,10 +15,15 @@ class RemexStripTagHandler extends NullTokenHandler {
}
public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
- $this->text .= substr( $text, $start, $length );
+ if ( !$this->insideNonVisibleTag ) {
+ $this->text .= substr( $text, $start, $length );
+ }
}
public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
+ if ( $this->isNonVisibleTag( $name ) ) {
+ $this->insideNonVisibleTag = true;
+ }
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
@@ -26,6 +32,9 @@ class RemexStripTagHandler extends NullTokenHandler {
}
public function endTag( $name, $sourceStart, $sourceLength ) {
+ if ( $this->isNonVisibleTag( $name ) ) {
+ $this->insideNonVisibleTag = false;
+ }
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
@@ -93,4 +102,26 @@ class RemexStripTagHandler extends NullTokenHandler {
$key = strtolower( trim( $tagName ) );
return isset( self::BLOCK_LEVEL_TAGS[$key] );
}
+
+ private const NON_VISIBLE_TAGS = [
+ 'style' => true,
+ 'script' => true,
+ ];
+
+ /**
+ * Detect block tags which by default are non-visible items.
+ * Of course css can make anything non-visible,
+ * but this is still better than nothing.
+ *
+ * We use this primarily to hide TemplateStyles
+ * from output in notifications/emails etc.
+ *
+ * @param string $tagName HTML tag name
+ * @return bool True when tag is a html element which should be filtered out
+ */
+ private function isNonVisibleTag( $tagName ) {
+ $key = strtolower( trim( $tagName ) );
+ return isset( self::NON_VISIBLE_TAGS[$key] );
+ }
+
}
diff --git a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
index 82c73f76343f..daf3d22aaff9 100644
--- a/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
+++ b/tests/phpunit/unit/includes/parser/SanitizerUnitTest.php
@@ -246,6 +246,9 @@ class SanitizerUnitTest extends MediaWikiUnitTestCase {
[ '1<span class="<?php">2</span>3', '123' ],
[ '1<span class="<?">2</span>3', '123' ],
[ '<th>1</th><td>2</td>', '1 2' ],
+ [ '<style>.hello { display: block; }</style>', '' ],
+ [ 'Foo<style>p { color: red; }</style>Bar', 'FooBar' ],
+ [ '<script>var test = true;</script>', '' ],
];
}