diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php
index 721b222b77..d2ba4bb579 100644
--- a/wp-includes/formatting.php
+++ b/wp-includes/formatting.php
@@ -1387,9 +1387,17 @@ function antispambot($emailaddy, $mailto=0) {
*/
function _make_url_clickable_cb($matches) {
$url = $matches[2];
- $suffix = '';
- /** Include parentheses in the URL only if paired **/
+ if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
+ // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
+ // Then we can let the parenthesis balancer do its thing below.
+ $url .= $matches[3];
+ $suffix = '';
+ } else {
+ $suffix = $matches[3];
+ }
+
+ // Include parentheses in the URL only if paired
while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
$suffix = strrchr( $url, ')' ) . $suffix;
$url = substr( $url, 0, strrpos( $url, ')' ) );
@@ -1458,20 +1466,102 @@ function _make_email_clickable_cb($matches) {
* @param string $ret Content to convert URIs.
* @return string Content with converted URIs.
*/
-function make_clickable($ret) {
- $ret = ' ' . $ret;
- // in testing, using arrays here was found to be faster
- $save = @ini_set('pcre.recursion_limit', 10000);
- $retval = preg_replace_callback('#(?])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret);
- if (null !== $retval )
- $ret = $retval;
- @ini_set('pcre.recursion_limit', $save);
+function make_clickable( $ret ) {
+ // Long strings might contain expensive edge cases ...
+ if ( 10000 < strlen( $ret ) ) {
+ $r = '';
+ // ... break it up
+ foreach ( _split_str_by_whitespace( $ret, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
+ if ( 2101 < strlen( $chunk ) ) {
+ $r .= $chunk; // Too big, no whitespace: bail.
+ } else {
+ $r .= make_clickable( $chunk );
+ }
+ }
+ return $r;
+ }
+
+ $ret = " $ret "; // Pad with whitespace to simplify the regexes
+
+ $url_clickable = '~
+ ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation
+ ( # 2: URL
+ [\\w]{1,20}+:// # Scheme and hier-part prefix
+ (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long
+ [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character
+ (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
+ [\'.,;:!?)] # Punctuation URL character
+ [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
+ )*
+ )
+ (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing)
+ ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
+ // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
+
+ $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
+
$ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret);
$ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
- // this one is not in an array because we need it to run last, for cleanup of accidental links within links
+
+ // Cleanup of accidental links within links
$ret = preg_replace("#(]+?>|>))]+?>([^>]+?)#i", "$1$3", $ret);
- $ret = trim($ret);
- return $ret;
+ return substr( $ret, 1, -1 ); // Remove our whitespace padding.
+}
+
+/**
+ * Breaks a string into chunks by splitting at whitespace characters.
+ * The length of each returned chunk is as close to the specified length goal as possible,
+ * with the caveat that each chunk includes its trailing delimiter.
+ * Chunks longer than the goal are guaranteed to not have any inner whitespace.
+ *
+ * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
+ *
+ * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
+ *
+ *
+ * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) ==
+ * array (
+ * 0 => '1234 67890 ', // 11 characters: Perfect split
+ * 1 => '1234 ', // 5 characters: '1234 67890a' was too long
+ * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long
+ * 3 => '1234 890 ', // 11 characters: Perfect split
+ * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long
+ * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
+ * 6 => ' 45678 ', // 11 characters: Perfect split
+ * 7 => '1 3 5 7 9', // 9 characters: End of $string
+ * );
+ *
+ *
+ * @param string $string The string to split
+ * @param int $goal The desired chunk length.
+ *
+ * @return array Numeric array of chunks.
+ */
+function _split_str_by_whitespace( $string, $goal ) {
+ $chunks = array();
+
+ $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
+
+ while ( $goal < strlen( $string_nullspace ) ) {
+ $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
+
+ if ( false === $pos ) {
+ $pos = strpos( $string_nullspace, "\000", $goal + 1 );
+ if ( false === $pos ) {
+ break;
+ }
+ }
+
+ $chunks[] = substr( $string, 0, $pos + 1 );
+ $string = substr( $string, $pos + 1 );
+ $string_nullspace = substr( $string_nullspace, $pos + 1 );
+ }
+
+ if ( $string ) {
+ $chunks[] = $string;
+ }
+
+ return $chunks;
}
/**