Merge pull request #5732 from annando/fix-picture-posts
[friendica.git/.git] / src / Content / Text / BBCode.php
index 5b0aa2c..2dc13ae 100644 (file)
@@ -1,5 +1,4 @@
 <?php
-
 /**
  * @file src/Content/Text/BBCode.php
  */
@@ -16,7 +15,6 @@ use Friendica\Core\Addon;
 use Friendica\Core\Cache;
 use Friendica\Core\Config;
 use Friendica\Core\L10n;
-use Friendica\Core\PConfig;
 use Friendica\Core\Protocol;
 use Friendica\Core\System;
 use Friendica\Model\Contact;
@@ -26,10 +24,9 @@ use Friendica\Object\Image;
 use Friendica\Util\Map;
 use Friendica\Util\Network;
 use Friendica\Util\ParseUrl;
+use Friendica\Util\Proxy as ProxyUtils;
 use League\HTMLToMarkdown\HtmlConverter;
 
-require_once "mod/proxy.php";
-
 class BBCode extends BaseObject
 {
        /**
@@ -89,7 +86,7 @@ class BBCode extends BaseObject
                                        $post["url"] = $matches[1];
                                        $post["title"] = $matches[2];
                                }
-                               if (($post["url"] == "") && (in_array($post["type"], ["link", "video"]))
+                               if (!empty($post["url"]) && (in_array($post["type"], ["link", "video"]))
                                        && preg_match("/\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", $attacheddata, $matches)) {
                                        $post["url"] = $matches[1];
                                }
@@ -243,6 +240,9 @@ class BBCode extends BaseObject
                        $body = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '[img]$3[/img]', $body);
 
                        $URLSearchString = "^\[\]";
+
+                       $body = preg_replace("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism", '[img]$1[/img]', $body);
+
                        if (preg_match_all("(\[url=([$URLSearchString]*)\]\s*\[img\]([$URLSearchString]*)\[\/img\]\s*\[\/url\])ism", $body, $pictures, PREG_SET_ORDER)) {
                                if ((count($pictures) == 1) && !$has_title) {
                                        // Checking, if the link goes to a picture
@@ -340,159 +340,30 @@ class BBCode extends BaseObject
        }
 
        /**
-        * @brief Convert a message into plaintext for connectors to other networks
+        * @brief Converts a BBCode text into plaintext
         *
-        * @param array $b The message array that is about to be posted
-        * @param int $limit The maximum number of characters when posting to that network
-        * @param bool $includedlinks Has an attached link to be included into the message?
-        * @param int $htmlmode This triggers the behaviour of the bbcode conversion
-        * @param string $target_network Name of the network where the post should go to.
+        * @param bool $keep_urls Whether to keep URLs in the resulting plaintext
         *
-        * @return string The converted message
+        * @return string
         */
-       public static function toPlaintext($b, $limit = 0, $includedlinks = false, $htmlmode = 2, $target_network = "")
+       public static function toPlaintext($text, $keep_urls = true)
        {
-               // Remove the hash tags
-               $URLSearchString = "^\[\]";
-               $body = preg_replace("/([#@])\[url\=([$URLSearchString]*)\](.*?)\[\/url\]/ism", '$1$3', $b["body"]);
-
-               // Add an URL element if the text contains a raw link
-               $body = preg_replace("/([^\]\='".'"'."]|^)(https?\:\/\/[a-zA-Z0-9\:\/\-\?\&\;\.\=\_\~\#\%\$\!\+\,]+)/ism", '$1[url]$2[/url]', $body);
-
-               // Remove the abstract
-               $body = self::stripAbstract($body);
-
-               // At first look at data that is attached via "type-..." stuff
-               // This will hopefully replaced with a dedicated bbcode later
-               //$post = self::getAttachedData($b["body"]);
-               $post = self::getAttachedData($body, $b);
-
-               if (($b["title"] != "") && ($post["text"] != "")) {
-                       $post["text"] = trim($b["title"]."\n\n".$post["text"]);
-               } elseif ($b["title"] != "") {
-                       $post["text"] = trim($b["title"]);
-               }
-
-               $abstract = "";
-
-               // Fetch the abstract from the given target network
-               if ($target_network != "") {
-                       $default_abstract = self::getAbstract($b["body"]);
-                       $abstract = self::getAbstract($b["body"], $target_network);
-
-                       // If we post to a network with no limit we only fetch
-                       // an abstract exactly for this network
-                       if (($limit == 0) && ($abstract == $default_abstract)) {
-                               $abstract = "";
-                       }
-               } else {// Try to guess the correct target network
-                       switch ($htmlmode) {
-                               case 8:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_TWITTER);
-                                       break;
-                               case 7:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_STATUSNET);
-                                       break;
-                               case 6:
-                                       $abstract = self::getAbstract($b["body"], NETWORK_APPNET);
-                                       break;
-                               default: // We don't know the exact target.
-                                       // We fetch an abstract since there is a posting limit.
-                                       if ($limit > 0) {
-                                               $abstract = self::getAbstract($b["body"]);
-                                       }
-                       }
-               }
-
-               if ($abstract != "") {
-                       $post["text"] = $abstract;
-
-                       if ($post["type"] == "text") {
-                               $post["type"] = "link";
-                               $post["url"] = $b["plink"];
-                       }
-               }
-
-               $html = self::convert($post["text"].$post["after"], false, $htmlmode);
-               $msg = HTML::toPlaintext($html, 0, true);
-               $msg = trim(html_entity_decode($msg, ENT_QUOTES, 'UTF-8'));
-
-               $link = "";
-               if ($includedlinks) {
-                       if ($post["type"] == "link") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "text") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "video") {
-                               $link = $post["url"];
-                       } elseif ($post["type"] == "photo") {
-                               $link = $post["image"];
-                       }
-
-                       if (($msg == "") && isset($post["title"])) {
-                               $msg = trim($post["title"]);
-                       }
-
-                       if (($msg == "") && isset($post["description"])) {
-                               $msg = trim($post["description"]);
-                       }
-
-                       // If the link is already contained in the post, then it neeedn't to be added again
-                       // But: if the link is beyond the limit, then it has to be added.
-                       if (($link != "") && strstr($msg, $link)) {
-                               $pos = strpos($msg, $link);
-
-                               // Will the text be shortened in the link?
-                               // Or is the link the last item in the post?
-                               if (($limit > 0) && ($pos < $limit) && (($pos + 23 > $limit) || ($pos + strlen($link) == strlen($msg)))) {
-                                       $msg = trim(str_replace($link, "", $msg));
-                               } elseif (($limit == 0) || ($pos < $limit)) {
-                                       // The limit has to be increased since it will be shortened - but not now
-                                       // Only do it with Twitter (htmlmode = 8)
-                                       if (($limit > 0) && (strlen($link) > 23) && ($htmlmode == 8)) {
-                                               $limit = $limit - 23 + strlen($link);
-                                       }
-
-                                       $link = "";
-
-                                       if ($post["type"] == "text") {
-                                               unset($post["url"]);
-                                       }
-                               }
-                       }
+               $naked_text = preg_replace('/\[(.+?)\]/','', $text);
+               if (!$keep_urls) {
+                       $naked_text = preg_replace('#https?\://[^\s<]+[^\s\.\)]#i', '', $naked_text);
                }
 
-               if ($limit > 0) {
-                       // Reduce multiple spaces
-                       // When posted to a network with limited space, we try to gain space where possible
-                       while (strpos($msg, "  ") !== false) {
-                               $msg = str_replace("  ", " ", $msg);
-                       }
-
-                       // Twitter is using its own limiter, so we always assume that shortened links will have this length
-                       if (iconv_strlen($link, "UTF-8") > 0) {
-                               $limit = $limit - 23;
-                       }
+               return $naked_text;
+       }
 
-                       if (iconv_strlen($msg, "UTF-8") > $limit) {
-                               if (($post["type"] == "text") && isset($post["url"])) {
-                                       $post["url"] = $b["plink"];
-                               } elseif (!isset($post["url"])) {
-                                       $limit = $limit - 23;
-                                       $post["url"] = $b["plink"];
-                               // Which purpose has this line? It is now uncommented, but left as a reminder
-                               //} elseif (strpos($b["body"], "[share") !== false) {
-                               //      $post["url"] = $b["plink"];
-                               } elseif (PConfig::get($b["uid"], "system", "no_intelligent_shortening")) {
-                                       $post["url"] = $b["plink"];
-                               }
-                               $msg = Plaintext::shorten($msg, $limit);
-                       }
+       private static function proxyUrl($image, $simplehtml = false)
+       {
+               // Only send proxied pictures to API and for internal display
+               if (in_array($simplehtml, [false, 2])) {
+                       return ProxyUtils::proxifyUrl($image);
+               } else {
+                       return $image;
                }
-
-               $post["text"] = trim($msg);
-
-               return($post);
        }
 
        public static function scaleExternalImages($srctext, $include_link = true, $scale_replace = false)
@@ -575,7 +446,7 @@ class BBCode extends BaseObject
         */
        public static function limitBodySize($body)
        {
-               $maxlen = get_max_import_size();
+               $maxlen = Config::get('config', 'max_import_size', 0);
 
                // If the length of the body, including the embedded images, is smaller
                // than the maximum, then don't waste time looking for the images
@@ -666,16 +537,18 @@ class BBCode extends BaseObject
        private static function convertAttachment($return, $simplehtml = false, $tryoembed = true)
        {
                $data = self::getAttachmentData($return);
-               if (!$data) {
+               if (empty($data) || empty($data["url"])) {
                        return $return;
                }
 
                if (isset($data["title"])) {
                        $data["title"] = strip_tags($data["title"]);
                        $data["title"] = str_replace(["http://", "https://"], "", $data["title"]);
+               } else {
+                       $data["title"] = null;
                }
 
-               if (((strpos($data["text"], "[img=") !== false) || (strpos($data["text"], "[img]") !== false) || Config::get('system', 'always_show_preview')) && ($data["image"] != "")) {
+               if (((strpos($data["text"], "[img=") !== false) || (strpos($data["text"], "[img]") !== false) || Config::get('system', 'always_show_preview')) && !empty($data["image"])) {
                        $data["preview"] = $data["image"];
                        $data["image"] = "";
                }
@@ -693,28 +566,32 @@ class BBCode extends BaseObject
                                        throw new Exception('OEmbed is disabled for this attachment.');
                                }
                        } catch (Exception $e) {
+                               $data["title"] = defaults($data, 'title', $data['url']);
+
                                if ($simplehtml != 4) {
                                        $return = sprintf('<div class="type-%s">', $data["type"]);
                                }
 
-                               if ($data["image"] != "") {
-                                       $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br />', $data["url"], proxy_url($data["image"]), $data["title"]);
-                               } elseif ($data["preview"] != "") {
-                                       $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br />', $data["url"], proxy_url($data["preview"]), $data["title"]);
-                               }
-
-                               if (($data["type"] == "photo") && ($data["url"] != "") && ($data["image"] != "")) {
-                                       $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data["url"], proxy_url($data["image"]), $data["title"]);
-                               } else {
-                                       $return .= sprintf('<h4><a href="%s">%s</a></h4>', $data['url'], $data['title']);
+                               if (!empty($data['title']) && !empty($data['url'])) {
+                                       if (!empty($data["image"]) && empty($data["text"]) && ($data["type"] == "photo")) {
+                                               $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a>', $data["url"], self::proxyUrl($data["image"], $simplehtml), $data["title"]);
+                                       } else {
+                                               if (!empty($data["image"])) {
+                                                       $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-image" /></a><br />', $data["url"], self::proxyUrl($data["image"], $simplehtml), $data["title"]);
+                                               } elseif (!empty($data["preview"])) {
+                                                       $return .= sprintf('<a href="%s" target="_blank"><img src="%s" alt="" title="%s" class="attachment-preview" /></a><br />', $data["url"], self::proxyUrl($data["preview"], $simplehtml), $data["title"]);
+                                               }
+                                               $return .= sprintf('<h4><a href="%s">%s</a></h4>', $data['url'], $data['title']);
+                                       }
                                }
 
-                               if ($data["description"] != "" && $data["description"] != $data["title"]) {
+                               if (!empty($data["description"]) && $data["description"] != $data["title"]) {
                                        // Sanitize the HTML by converting it to BBCode
                                        $bbcode = HTML::toBBCode($data["description"]);
                                        $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
                                }
-                               if ($data["type"] == "link") {
+
+                               if (!empty($data['url'])) {
                                        $return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['url'], parse_url($data['url'], PHP_URL_HOST));
                                }
 
@@ -724,7 +601,7 @@ class BBCode extends BaseObject
                        }
                }
 
-               return trim($data["text"] . ' ' . $return . ' ' . $data["after"]);
+               return trim(defaults($data, 'text', '') . ' ' . $return . ' ' . defaults($data, 'after', ''));
        }
 
        public static function removeShareInformation($Text, $plaintext = false, $nolink = false)
@@ -734,10 +611,10 @@ class BBCode extends BaseObject
                if (!$data) {
                        return $Text;
                } elseif ($nolink) {
-                       return $data["text"] . $data["after"];
+                       return $data["text"] . defaults($data, 'after', '');
                }
 
-               $title = htmlentities($data["title"], ENT_QUOTES, 'UTF-8', false);
+               $title = htmlentities(defaults($data, 'title', ''), ENT_QUOTES, 'UTF-8', false);
                $text = htmlentities($data["text"], ENT_QUOTES, 'UTF-8', false);
                if ($plaintext || (($title != "") && strstr($text, $title))) {
                        $data["title"] = $data["url"];
@@ -751,15 +628,15 @@ class BBCode extends BaseObject
                }
 
                // If the link already is included in the post, don't add it again
-               if (($data["url"] != "") && strpos($data["text"], $data["url"])) {
+               if (!empty($data["url"]) && strpos($data["text"], $data["url"])) {
                        return $data["text"] . $data["after"];
                }
 
                $text = $data["text"];
 
-               if (($data["url"] != "") && ($data["title"] != "")) {
+               if (!empty($data["url"]) && !empty($data["title"])) {
                        $text .= "\n[url=" . $data["url"] . "]" . $data["title"] . "[/url]";
-               } elseif (($data["url"] != "")) {
+               } elseif (!empty($data["url"])) {
                        $text .= "\n[url]" . $data["url"] . "[/url]";
                }
 
@@ -975,7 +852,7 @@ class BBCode extends BaseObject
                        // it loops over the array starting from the first element and going sequentially
                        // to the last element
                        $newbody = str_replace('[$#saved_image' . $cnt . '#$]',
-                               '<img src="' . proxy_url($image) . '" alt="' . L10n::t('Image/photo') . '" />', $newbody);
+                               '<img src="' . self::proxyUrl($image) . '" alt="' . L10n::t('Image/photo') . '" />', $newbody);
                        $cnt++;
                }
 
@@ -1128,16 +1005,13 @@ class BBCode extends BaseObject
                        case 5:
                                $text = $preshare . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ": <br />" . $share[3];
                                break;
-                       case 6: // app.net
-                               $text = $preshare . "&gt;&gt; @" . $userid_compact . ": <br />" . $share[3];
-                               break;
                        case 7: // statusnet/GNU Social
                                $text = $preshare . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . " @" . $userid_compact . ": " . $share[3];
                                break;
                        case 8: // twitter
                                $text = $preshare . "RT @" . $userid_compact . ": " . $share[3];
                                break;
-                       case 9: // Google+/Facebook
+                       case 9: // Google+
                                $text = $preshare . html_entity_decode("&#x2672; ", ENT_QUOTES, 'UTF-8') . ' ' . $userid_compact . ": <br />" . $share[3];
 
                                if ($link != "") {
@@ -1157,7 +1031,7 @@ class BBCode extends BaseObject
                                } else {
                                        $text = trim($share[1]) . "\n";
 
-                                       $avatar = proxy_url($avatar, false, PROXY_SIZE_THUMB);
+                                       $avatar = ProxyUtils::proxifyUrl($avatar, false, ProxyUtils::SIZE_THUMB);
 
                                        $tpl = get_markup_template('shared_content.tpl');
                                        $text .= replace_macros($tpl, [
@@ -1314,11 +1188,11 @@ class BBCode extends BaseObject
         * Simple HTML values meaning:
         * - 0: Friendica display
         * - 1: Unused
-        * - 2: Used for Facebook, Google+, Windows Phone push, Friendica API
+        * - 2: Used for Google+, Windows Phone push, Friendica API
         * - 3: Used before converting to Markdown in bb2diaspora.php
         * - 4: Used for WordPress, Libertree (before Markdown), pump.io and tumblr
         * - 5: Unused
-        * - 6: Used for Appnet
+        * - 6: Unused
         * - 7: Used for dfrn, OStatus
         * - 8: Used for WP backlink text setting
         *
@@ -1506,6 +1380,13 @@ class BBCode extends BaseObject
                        }, $text
                );
 
+               $text = preg_replace_callback(
+                       "&\[url=/people\?q\=(.*)\](.*)\[\/url\]&Usi",
+                       function ($match) {
+                               return "[url=" . System::baseUrl() . "/search?search=%40" . $match[1] . "]" . $match[2] . "[/url]";
+                       }, $text
+               );
+
                // Server independent link to posts and comments
                // See issue: https://github.com/diaspora/diaspora_federation/issues/75
                $expression = "=diaspora://.*?/post/([0-9A-Za-z\-_@.:]{15,254}[0-9A-Za-z])=ism";
@@ -1580,7 +1461,7 @@ class BBCode extends BaseObject
                $text = preg_replace("(\[u\](.*?)\[\/u\])ism", '<u>$1</u>', $text);
 
                // Check for strike-through text
-               $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<strike>$1</strike>', $text);
+               $text = preg_replace("(\[s\](.*?)\[\/s\])ism", '<s>$1</s>', $text);
 
                // Check for over-line text
                $text = preg_replace("(\[o\](.*?)\[\/o\])ism", '<span class="overline">$1</span>', $text);
@@ -1707,12 +1588,12 @@ class BBCode extends BaseObject
                // [img=widthxheight]image source[/img]
                $text = preg_replace_callback(
                        "/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism",
-                       function ($matches) {
+                       function ($matches) use ($simple_html) {
                                if (strpos($matches[3], "data:image/") === 0) {
                                        return $matches[0];
                                }
 
-                               $matches[3] = proxy_url($matches[3]);
+                               $matches[3] = self::proxyUrl($matches[3], $simple_html);
                                return "[img=" . $matches[1] . "x" . $matches[2] . "]" . $matches[3] . "[/img]";
                        },
                        $text
@@ -1721,16 +1602,24 @@ class BBCode extends BaseObject
                $text = preg_replace("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", '<img src="$3" style="width: $1px;" >', $text);
                $text = preg_replace("/\[zmg\=([0-9]*)x([0-9]*)\](.*?)\[\/zmg\]/ism", '<img class="zrl" src="$3" style="width: $1px;" >', $text);
 
+               $text = preg_replace_callback("/\[img\=([$URLSearchString]*)\](.*?)\[\/img\]/ism",
+                       function ($matches) use ($simple_html) {
+                               $matches[1] = self::proxyUrl($matches[1], $simple_html);
+                               $matches[2] = htmlspecialchars($matches[2], ENT_COMPAT);
+                               return '<img src="' . $matches[1] . '" alt="' . $matches[2] . '">';
+                       },
+                       $text);
+
                // Images
                // [img]pathtoimage[/img]
                $text = preg_replace_callback(
                        "/\[img\](.*?)\[\/img\]/ism",
-                       function ($matches) {
+                       function ($matches) use ($simple_html) {
                                if (strpos($matches[1], "data:image/") === 0) {
                                        return $matches[0];
                                }
 
-                               $matches[1] = proxy_url($matches[1]);
+                               $matches[1] = self::proxyUrl($matches[1], $simple_html);
                                return "[img]" . $matches[1] . "[/img]";
                        },
                        $text
@@ -1751,15 +1640,15 @@ class BBCode extends BaseObject
 
                // Try to Oembed
                if ($try_oembed) {
-                       $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4))\[\/video\]/ism", '<video src="$1" controls="controls" width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
-                       $text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3))\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $text);
+                       $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism", '<video src="$1" controls="controls" width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
+                       $text = preg_replace("/\[audio\](.*?\.(ogg|ogv|oga|ogm|webm|mp4|mp3).*?)\[\/audio\]/ism", '<audio src="$1" controls="controls"><a href="$1">$1</a></audio>', $text);
 
                        $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text);
                        $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text);
                } else {
-                       $text = preg_replace("/\[video\](.*?)\[\/video\]/",
+                       $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
                                                '<a href="$1" target="_blank">$1</a>', $text);
-                       $text = preg_replace("/\[audio\](.*?)\[\/audio\]/",
+                       $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism",
                                                '<a href="$1" target="_blank">$1</a>', $text);
                }
 
@@ -1852,10 +1741,12 @@ class BBCode extends BaseObject
                $text = preg_replace_callback("/\[nobb\](.*?)\[\/nobb\]/ism", 'self::unescapeNoparseCallback', $text);
                $text = preg_replace_callback("/\[pre\](.*?)\[\/pre\]/ism", 'self::unescapeNoparseCallback', $text);
 
-
+               /// @todo What is the meaning of these lines?
                $text = preg_replace('/\[\&amp\;([#a-z0-9]+)\;\]/', '&$1;', $text);
                $text = preg_replace('/\&\#039\;/', '\'', $text);
-               $text = preg_replace('/\&quot\;/', '"', $text);
+
+               // Currently deactivated, it made problems with " inside of alt texts.
+               //$text = preg_replace('/\&quot\;/', '"', $text);
 
                // fix any escaped ampersands that may have been converted into links
                $text = preg_replace('/\<([^>]*?)(src|href)=(.*?)\&amp\;(.*?)\>/ism', '<$1$2=$3&$4>', $text);
@@ -1934,7 +1825,7 @@ class BBCode extends BaseObject
         * @param string $addon The addon for which the abstract is meant for
         * @return string The abstract
         */
-       private static function getAbstract($text, $addon = "")
+       public static function getAbstract($text, $addon = "")
        {
                $abstract = "";
                $abstracts = [];