Update autolinker regular expression in Text\BBCode
authorHypolite Petovan <hypolite@mrpetovan.com>
Sat, 9 Mar 2019 04:39:21 +0000 (23:39 -0500)
committerHypolite Petovan <hypolite@mrpetovan.com>
Sat, 9 Mar 2019 04:58:20 +0000 (23:58 -0500)
- Thanks to https://daringfireball.net/2010/07/improved_regex_for_matching_urls

src/Content/Text/BBCode.php

index e49b14f..85466b5 100644 (file)
@@ -1268,11 +1268,29 @@ class BBCode extends BaseObject
 
                // if the HTML is used to generate plain text, then don't do this search, but replace all URL of that kind to text
                if (!$for_plaintext) {
-                       // Autolink feature (thanks to http://code.seebz.net/p/autolink-php/)
-                       // Currently disabled, since the function is too greedy
-                       // $autolink_regex = "`([^\]\=\"']|^)(https?\://[^\s<]+[^\s<\.\)])`ism";
-                       $autolink_regex = "/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism";
-                       $text = preg_replace($autolink_regex, '$1[url]$2[/url]', $text);
+                       // Autolink feature (thanks to https://daringfireball.net/2010/07/improved_regex_for_matching_urls)
+                       $autolink_regex = '@(?xi)
+(?<![=\'\]"/])          # Not preceded by =, \', ], ", /
+\b
+(                       # Capture 1: entire matched URL
+  https?://                 # http or https protocol
+  (?:
+    www\d{0,3}[.]           # "www.", "www1.", "www2." … "www999."
+    |                           #   or
+    [a-z0-9.\-]+[.][a-z]{2,4}/  # looks like domain name followed by a slash
+  )
+  (?:                       # One or more:
+    [^\s()<>]+                  # Run of non-space, non-()<>
+    |                           #   or
+    \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels
+  )+
+  (?:                       # End with:
+    \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels
+    |                               #   or
+    [^\s`!()\[\]{};:\'".,<>?«»“”‘’]        # not a space or one of these punct chars
+  )
+)@';
+                       $text = preg_replace($autolink_regex, '[url]$1[/url]', $text);
                        if ($simple_html == 7) {
                                $text = preg_replace_callback("/\[url\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);
                                $text = preg_replace_callback("/\[url\=([$URLSearchString]*)\]([$URLSearchString]*)\[\/url\]/ism", 'self::convertUrlForOStatusCallback', $text);