diff --git a/includes/class-hashtag.php b/includes/class-hashtag.php index d9006c1..2d03ac4 100644 --- a/includes/class-hashtag.php +++ b/includes/class-hashtag.php @@ -43,38 +43,56 @@ class Hashtag { * @return string the filtered post-content */ public static function the_content( $the_content ) { - $protected_tags = array(); - $protect = function( $m ) use ( &$protected_tags ) { - $c = \wp_rand( 100000, 999999 ); - $protect = '!#!#PROTECT' . $c . '#!#!'; - while ( isset( $protected_tags[ $protect ] ) ) { - $c = \wp_rand( 100000, 999999 ); - $protect = '!#!#PROTECT' . $c . '#!#!'; + $tag_stack = array(); + $protected_tags = array( + 'pre', + 'code', + 'textarea', + 'style', + 'a', + ); + $content_with_links = ''; + $in_protected_tag = false; + foreach ( wp_html_split( $the_content ) as $chunk ) { + if ( preg_match( '#^$#i', $chunk, $m ) ) { + $content_with_links .= $chunk; + continue; } - $protected_tags[ $protect ] = $m[0]; - return $protect; - }; - $the_content = preg_replace_callback( - '##is', - $protect, - $the_content - ); - $the_content = preg_replace_callback( - '#<(pre|code|textarea|style)\b[^>]*>.*?]*>#is', - $protect, - $the_content - ); - $the_content = preg_replace_callback( - '#<[^>]+>#i', - $protect, - $the_content - ); - $the_content = \preg_replace_callback( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', array( '\Activitypub\Hashtag', 'replace_with_links' ), $the_content ); + if ( preg_match( '#^<(/)?([a-z-]+)\b[^>]*>$#i', $chunk, $m ) ) { + $tag = strtolower( $m[2] ); + if ( '/' === $m[1] ) { + // Closing tag. + $i = array_search( $tag, $tag_stack ); + // We can only remove the tag from the stack if it is in the stack. + if ( false !== $i ) { + $tag_stack = array_slice( $tag_stack, 0, $i ); + } + } else { + // Opening tag, add it to the stack. + $tag_stack[] = $tag; + } - $the_content = str_replace( array_reverse( array_keys( $protected_tags ) ), array_reverse( array_values( $protected_tags ) ), $the_content ); + // If we're in a protected tag, the tag_stack contains at least one protected tag string. + // The protected tag state can only change when we encounter a start or end tag. + $in_protected_tag = array_intersect( $tag_stack, $protected_tags ); - return $the_content; + // Never inspect tags. + $content_with_links .= $chunk; + continue; + } + + if ( $in_protected_tag ) { + // Don't inspect a chunk inside an inspected tag. + $content_with_links .= $chunk; + continue; + } + + // Only reachable when there is no protected tag in the stack. + $content_with_links .= \preg_replace_callback( '/' . ACTIVITYPUB_HASHTAGS_REGEXP . '/i', array( '\Activitypub\Hashtag', 'replace_with_links' ), $chunk ); + } + + return $content_with_links; } /** diff --git a/includes/class-mention.php b/includes/class-mention.php index d14ad20..4aedfb0 100644 --- a/includes/class-mention.php +++ b/includes/class-mention.php @@ -25,43 +25,56 @@ class Mention { * @return string the filtered post-content */ public static function the_content( $the_content ) { - $protected_tags = array(); - $protect = function( $m ) use ( &$protected_tags ) { - $c = \wp_rand( 100000, 999999 ); - $protect = '!#!#PROTECT' . $c . '#!#!'; - while ( isset( $protected_tags[ $protect ] ) ) { - $c = \wp_rand( 100000, 999999 ); - $protect = '!#!#PROTECT' . $c . '#!#!'; + $tag_stack = array(); + $protected_tags = array( + 'pre', + 'code', + 'textarea', + 'style', + 'a', + ); + $content_with_links = ''; + $in_protected_tag = false; + foreach ( wp_html_split( $the_content ) as $chunk ) { + if ( preg_match( '#^$#i', $chunk, $m ) ) { + $content_with_links .= $chunk; + continue; } - $protected_tags[ $protect ] = $m[0]; - return $protect; - }; - $the_content = preg_replace_callback( - '##is', - $protect, - $the_content - ); - $the_content = preg_replace_callback( - '#<(pre|code|textarea|style)\b[^>]*>.*?]*>#is', - $protect, - $the_content - ); - $the_content = preg_replace_callback( - '#]+>.*?#i', - $protect, - $the_content - ); - $the_content = preg_replace_callback( - '#]+>#i', - $protect, - $the_content - ); + if ( preg_match( '#^<(/)?([a-z-]+)\b[^>]*>$#i', $chunk, $m ) ) { + $tag = strtolower( $m[2] ); + if ( '/' === $m[1] ) { + // Closing tag. + $i = array_search( $tag, $tag_stack ); + // We can only remove the tag from the stack if it is in the stack. + if ( false !== $i ) { + $tag_stack = array_slice( $tag_stack, 0, $i ); + } + } else { + // Opening tag, add it to the stack. + $tag_stack[] = $tag; + } - $the_content = \preg_replace_callback( '/@' . ACTIVITYPUB_USERNAME_REGEXP . '/', array( self::class, 'replace_with_links' ), $the_content ); - $the_content = \str_replace( array_reverse( array_keys( $protected_tags ) ), array_reverse( array_values( $protected_tags ) ), $the_content ); + // If we're in a protected tag, the tag_stack contains at least one protected tag string. + // The protected tag state can only change when we encounter a start or end tag. + $in_protected_tag = array_intersect( $tag_stack, $protected_tags ); - return $the_content; + // Never inspect tags. + $content_with_links .= $chunk; + continue; + } + + if ( $in_protected_tag ) { + // Don't inspect a chunk inside an inspected tag. + $content_with_links .= $chunk; + continue; + } + + // Only reachable when there is no protected tag in the stack. + $content_with_links .= \preg_replace_callback( '/@' . ACTIVITYPUB_USERNAME_REGEXP . '/', array( self::class, 'replace_with_links' ), $chunk ); + } + + return $content_with_links; } /** diff --git a/tests/test-class-activitypub-hashtag.php b/tests/test-class-activitypub-hashtag.php index 8a79c12..2584005 100644 --- a/tests/test-class-activitypub-hashtag.php +++ b/tests/test-class-activitypub-hashtag.php @@ -41,8 +41,9 @@ ENDPRE; array( 'hallo #test test', 'hallo #test test' ), array( '
hallo #object test
', '
hallo test
' ), array( '
hallo #object
', '
hallo
' ), - array( '
#object
', '
#object
' ), + array( '
#object
', '
' ), array( '#object', '#object' ), + array( '', '' ), array( '
object', '
object' ), array( $code, $code ), array( $style, $style ), diff --git a/tests/test-class-activitypub-mention.php b/tests/test-class-activitypub-mention.php index dce023c..0739161 100644 --- a/tests/test-class-activitypub-mention.php +++ b/tests/test-class-activitypub-mention.php @@ -33,6 +33,7 @@ ENDPRE; array( 'hallo @pfefferle@notiz.blog test', 'hallo @pfefferle@notiz.blog test' ), array( 'hallo @pfefferle@notiz.blog test', 'hallo @pfefferle@notiz.blog test' ), array( 'hallo https://notiz.blog/@pfefferle/ test', 'hallo https://notiz.blog/@pfefferle/ test' ), + array( '', '' ), array( $code, $code ), array( $pre, $pre ), );