Remove empty lines arounf the horizontal ruler, no maximum height
[friendica.git/.git] / src / Content / Text / BBCode.php
index ea818d7..d363389 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * @copyright Copyright (C) 2020, Friendica
+ * @copyright Copyright (C) 2010-2021, the Friendica project
  *
  * @license GNU AGPL version 3 or any later version
  *
@@ -27,12 +27,12 @@ use Exception;
 use Friendica\Content\ContactSelector;
 use Friendica\Content\Item;
 use Friendica\Content\OEmbed;
+use Friendica\Content\PageInfo;
 use Friendica\Content\Smilies;
 use Friendica\Core\Hook;
 use Friendica\Core\Logger;
 use Friendica\Core\Protocol;
 use Friendica\Core\Renderer;
-use Friendica\Core\System;
 use Friendica\DI;
 use Friendica\Model\Contact;
 use Friendica\Model\Event;
@@ -50,7 +50,7 @@ use Friendica\Util\XML;
 class BBCode
 {
        // Update this value to the current date whenever changes are made to BBCode::convert
-       const VERSION = '2020-12-18-video-embeds';
+       const VERSION = '2021-04-07';
 
        const INTERNAL = 0;
        const API = 2;
@@ -154,6 +154,7 @@ class BBCode
                        'after'         => '',
                        'image'         => null,
                        'url'           => '',
+                       'author_name'   => '',
                        'provider_name' => '',
                        'provider_url'  => '',
                        'title'         => '',
@@ -254,17 +255,66 @@ class BBCode
                        $data['preview'] = html_entity_decode($preview, ENT_QUOTES, 'UTF-8');
                }
 
+               $provider_name = '';
+               preg_match("/publisher_name='(.*?)'/ism", $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $provider_name = $matches[1];
+               }
+
+               preg_match('/publisher_name="(.*?)"/ism', $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $provider_name = $matches[1];
+               }
+
+               if ($provider_name != '') {
+                       $data['provider_name'] = html_entity_decode($provider_name, ENT_QUOTES, 'UTF-8');
+               }
+
+               $provider_url = '';
+               preg_match("/publisher_url='(.*?)'/ism", $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $provider_url = $matches[1];
+               }
+
+               preg_match('/publisher_url="(.*?)"/ism', $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $provider_url = $matches[1];
+               }
+
+               if ($provider_url != '') {
+                       $data['provider_url'] = html_entity_decode($provider_url, ENT_QUOTES, 'UTF-8');
+               }
+
+               $author_name = '';
+               preg_match("/author_name='(.*?)'/ism", $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $author_name = $matches[1];
+               }
+
+               preg_match('/author_name="(.*?)"/ism', $attributes, $matches);
+               if (!empty($matches[1])) {
+                       $author_name = $matches[1];
+               }
+
+               if (($author_name != '') && ($author_name != $provider_name)) {
+                       $data['author_name'] = html_entity_decode($author_name, ENT_QUOTES, 'UTF-8');
+               }
+
                $data['description'] = trim($match[3]);
 
                $data['after'] = trim($match[4]);
 
                $parts = parse_url($data['url']);
                if (!empty($parts['scheme']) && !empty($parts['host'])) {
-                       $data['provider_name'] = $parts['host'];
-                       $data['provider_url'] = $parts['scheme'] . '://' . $parts['host'];
+                       if (empty($data['provider_name'])) {
+                               $data['provider_name'] = $parts['host'];
+                       }
+                       if (empty($data['provider_url'])) {
+                               $data['provider_url'] = $parts['scheme'] . '://' . $parts['host'];
 
-                       if (!empty($parts['port'])) {
-                               $data['provider_url'] .= ':' . $parts['port'];
+                               if (!empty($parts['port'])) {
+                                       $data['provider_url'] .= ':' . $parts['port'];
+                               }
                        }
                }
 
@@ -322,14 +372,14 @@ class BBCode
                                                $data = ['url' => $url, 'type' => 'photo'];
                                        } else {
                                                // Checking, if the link goes to a picture
-                                               $data = ParseUrl::getSiteinfoCached($pictures[0][1], true);
+                                               $data = ParseUrl::getSiteinfoCached($pictures[0][1]);
                                        }
 
                                        // Workaround:
                                        // Sometimes photo posts to the own album are not detected at the start.
                                        // So we seem to cannot use the cache for these cases. That's strange.
                                        if (($data['type'] != 'photo') && strstr($pictures[0][1], "/photos/")) {
-                                               $data = ParseUrl::getSiteinfo($pictures[0][1], true);
+                                               $data = ParseUrl::getSiteinfo($pictures[0][1]);
                                        }
 
                                        if ($data['type'] == 'photo') {
@@ -416,7 +466,7 @@ class BBCode
                                $post['text'] = trim($body);
                        }
                } elseif (isset($post['url']) && ($post['type'] == 'video')) {
-                       $data = ParseUrl::getSiteinfoCached($post['url'], true);
+                       $data = ParseUrl::getSiteinfoCached($post['url']);
 
                        if (isset($data['images'][0])) {
                                $post['image'] = $data['images'][0]['src'];
@@ -688,8 +738,12 @@ class BBCode
                                $return .= sprintf('<blockquote>%s</blockquote>', trim(self::convert($bbcode)));
                        }
 
-                       if (!empty($data['url'])) {
-                               $return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['url'], parse_url($data['url'], PHP_URL_HOST));
+                       if (!empty($data['provider_url']) && !empty($data['provider_name'])) {
+                               if (!empty($data['author_name'])) {
+                                       $return .= sprintf('<sup><a href="%s">%s (%s)</a></sup>', $data['provider_url'], $data['author_name'], $data['provider_name']);
+                               } else {
+                                       $return .= sprintf('<sup><a href="%s">%s</a></sup>', $data['provider_url'], $data['provider_name']);
+                               }
                        }
 
                        if ($simplehtml != self::CONNECTORS) {
@@ -1350,10 +1404,14 @@ class BBCode
                                $search = ["\n[th]", "[th]\n", " [th]", "\n[/th]", "[/th]\n", "[/th] ",
                                        "\n[td]", "[td]\n", " [td]", "\n[/td]", "[/td]\n", "[/td] ",
                                        "\n[tr]", "[tr]\n", " [tr]", "[tr] ", "\n[/tr]", "[/tr]\n", " [/tr]", "[/tr] ",
+                                       "\n[hr]", "[hr]\n", " [hr]", "[hr] ",
+                                       "\n[attachment ", " [attachment ", "\n[/attachment]", "[/attachment]\n", " [/attachment]", "[/attachment] ",
                                        "[table]\n", "[table] ", " [table]", "\n[/table]", " [/table]", "[/table] "];
                                $replace = ["[th]", "[th]", "[th]", "[/th]", "[/th]", "[/th]",
                                        "[td]", "[td]", "[td]", "[/td]", "[/td]", "[/td]",
                                        "[tr]", "[tr]", "[tr]", "[tr]", "[/tr]", "[/tr]", "[/tr]", "[/tr]",
+                                       "[hr]", "[hr]", "[hr]", "[hr]",
+                                       "[attachment ", "[attachment ", "[/attachment]", "[/attachment]", "[/attachment]", "[/attachment]",
                                        "[table]", "[table]", "[table]", "[/table]", "[/table]", "[/table]"];
                                do {
                                        $oldtext = $text;
@@ -1393,6 +1451,16 @@ class BBCode
                                        $text = self::convertAttachment($text, $simple_html, $try_oembed);
                                }
 
+                               $nosmile = strpos($text, '[nosmile]') !== false;
+                               $text = str_replace('[nosmile]', '', $text);
+
+                               // Replace non graphical smilies for external posts
+                               if (!$nosmile && !$for_plaintext) {
+                                       $text = self::performWithEscapedTags($text, ['img'], function ($text) {
+                                               return Smilies::replace($text);
+                                       });
+                               }
+
                                // leave open the posibility of [map=something]
                                // this is replaced in Item::prepareBody() which has knowledge of the item location
                                if (strpos($text, '[/map]') !== false) {
@@ -1506,11 +1574,6 @@ class BBCode
                                        });
                                }
 
-                               // This is actually executed in Item::prepareBody()
-
-                               $nosmile = strpos($text, '[nosmile]') !== false;
-                               $text = str_replace('[nosmile]', '', $text);
-
                                // Check for font change text
                                $text = preg_replace("/\[font=(.*?)\](.*?)\[\/font\]/sm", "<span style=\"font-family: $1;\">$2</span>", $text);
 
@@ -1605,18 +1668,19 @@ class BBCode
                                //$Text = preg_replace("/\[crypt=(.*?)\](.*?)\[\/crypt\]/ism", '<br><img src="' .DI::baseUrl() . '/images/lock_icon.gif" alt="' . DI::l10n()->t('Encrypted content') . '" title="' . '$1' . ' ' . DI::l10n()->t('Encrypted content') . '" /><br>', $Text);
 
                                // Simplify "video" element
-                               $text = preg_replace('(\[video.*?\ssrc\s?=\s?([^\s\]]+).*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
+                               $text = preg_replace('(\[video[^\]]*?\ssrc\s?=\s?([^\s\]]+)[^\]]*?\].*?\[/video\])ism', '[video]$1[/video]', $text);
 
                                if ($try_oembed) {
                                        // html5 video and audio
                                        $text = preg_replace("/\[video\](.*?\.(ogg|ogv|oga|ogm|webm|mp4).*?)\[\/video\]/ism",
-                                               '<video src="$1" controls width="' . $a->videowidth . '" height="' . $a->videoheight . '" loop="true"><a href="$1">$1</a></video>', $text);
-                                       $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
-                                               '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text);
-                                       $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '<audio src="$1" controls><a href="$1">$1</a></audio>', $text);
+                                               '<video src="$1" controls width="100%" height="auto"><a href="$1">$1</a></video>', $text);
 
                                        $text = preg_replace_callback("/\[video\](.*?)\[\/video\]/ism", $try_oembed_callback, $text);
                                        $text = preg_replace_callback("/\[audio\](.*?)\[\/audio\]/ism", $try_oembed_callback, $text);
+
+                                       $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
+                                               '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text);
+                                       $text = preg_replace("/\[audio\](.*?)\[\/audio\]/ism", '<audio src="$1" controls><a href="$1">$1</a></audio>', $text);
                                } else {
                                        $text = preg_replace("/\[video\](.*?)\[\/video\]/ism",
                                                '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>', $text);
@@ -1683,13 +1747,6 @@ class BBCode
                                        $text = preg_replace("/\[event\-id\](.*?)\[\/event\-id\]/ism", '', $text);
                                }
 
-                               // Replace non graphical smilies for external posts
-                               if (!$nosmile && !$for_plaintext) {
-                                       $text = self::performWithEscapedTags($text, ['img'], function ($text) {
-                                               return Smilies::replace($text);
-                                       });
-                               }
-
                                if (!$for_plaintext && DI::config()->get('system', 'big_emojis') && ($simple_html != self::DIASPORA)) {
                                        $conv = html_entity_decode(str_replace([' ', "\n", "\r"], '', $text));
                                        // Emojis are always 4 byte Unicode characters
@@ -1876,26 +1933,23 @@ class BBCode
                        $text
                );
 
-               $config = \HTMLPurifier_HTML5Config::createDefault();
-               $config->set('HTML.Doctype', 'HTML5');
-               $config->set('HTML.SafeIframe', true);
-               $config->set('URI.SafeIframeRegexp', '%^(?:
-                       https://www.youtube.com/embed/
-                       |
-                       https://player.vimeo.com/video/
-                       |
-                       ' . DI::baseUrl() . '/oembed/ # Has to change with the source in Content\Oembed::iframe
-               )%xi');
-               $config->set('Attr.AllowedRel', [
-                       'noreferrer' => true,
-                       'noopener' => true,
-               ]);
-               $config->set('Attr.AllowedFrameTargets', [
-                       '_blank' => true,
-               ]);
-
-               $HTMLPurifier = new \HTMLPurifier($config);
-               $text = $HTMLPurifier->purify($text);
+               // Default iframe allowed domains/path
+               $allowedIframeDomains = [
+                       DI::baseUrl()->getHostname()
+                       . (DI::baseUrl()->getUrlPath() ? '/' . DI::baseUrl()->getUrlPath() : '')
+                       . '/oembed/', # The path part has to change with the source in Content\Oembed::iframe
+                       'www.youtube.com/embed/',
+                       'player.vimeo.com/video/',
+               ];
+
+               $allowedIframeDomains = array_merge(
+                       $allowedIframeDomains,
+                       DI::config()->get('system', 'allowed_oembed') ?
+                               explode(',', DI::config()->get('system', 'allowed_oembed'))
+                               : []
+               );
+
+               $text = HTML::purify($text, $allowedIframeDomains);
 
                return $text;
        }
@@ -2068,7 +2122,7 @@ class BBCode
        {
                $ret = [];
 
-               BBCode::performWithEscapedTags($string, ['noparse', 'pre', 'code'], function ($string) use (&$ret) {
+               BBCode::performWithEscapedTags($string, ['noparse', 'pre', 'code', 'img'], function ($string) use (&$ret) {
                        // Convert hashtag links to hashtags
                        $string = preg_replace('/#\[url\=([^\[\]]*)\](.*?)\[\/url\]/ism', '#$2 ', $string);
 
@@ -2106,11 +2160,6 @@ class BBCode
                                                continue;
                                        }
 
-                                       // ignore strictly numeric tags like #1
-                                       if ((strpos($match, '#') === 0) && ctype_digit(substr($match, 1))) {
-                                               continue;
-                                       }
-
                                        // try not to catch url fragments
                                        if (strpos($string, $match) && preg_match('/[a-zA-z0-9\/]/', substr($string, strpos($string, $match) - 1, 1))) {
                                                continue;
@@ -2216,4 +2265,75 @@ class BBCode
 
                return $header;
        }
+
+       /**
+        * Returns the BBCode relevant to embed the provided URL in a post body.
+        * For media type, it will return [img], [video] and [audio] tags.
+        * For regular web pages, it will either output a [bookmark] tag if title and description were provided,
+        * an [attachment] tag or a simple [url] tag depending on $tryAttachment.
+        *
+        * @param string      $url
+        * @param bool        $tryAttachment
+        * @param string|null $title
+        * @param string|null $description
+        * @param string|null $tags
+        * @return string
+        * @throws \Friendica\Network\HTTPException\InternalServerErrorException
+        *@see ParseUrl::getSiteinfoCached
+        *
+        */
+       public static function embedURL(string $url, bool $tryAttachment = true, string $title = null, string $description = null, string $tags = null): string
+       {
+               DI::logger()->info($url);
+
+               // If there is already some content information submitted we don't
+               // need to parse the url for content.
+               if (!empty($title) && !empty($description)) {
+                       $title = str_replace(["\r", "\n"], ['', ''], $title);
+
+                       $description = '[quote]' . trim($description) . '[/quote]' . "\n";
+
+                       $str_tags = '';
+                       if (!empty($tags)) {
+                               $arr_tags = ParseUrl::convertTagsToArray($tags);
+                               if (count($arr_tags)) {
+                                       $str_tags = "\n" . implode(' ', $arr_tags) . "\n";
+                               }
+                       }
+
+                       $result = sprintf('[bookmark=%s]%s[/bookmark]%s', $url, ($title) ? $title : $url, $description) . $str_tags;
+
+                       DI::logger()->info('(unparsed): returns: ' . $result);
+
+                       return $result;
+               }
+
+               $siteinfo = ParseUrl::getSiteinfoCached($url);
+
+               if (in_array($siteinfo['type'], ['image', 'video', 'audio'])) {
+                       switch ($siteinfo['type']) {
+                               case 'video':
+                                       $bbcode = "\n" . '[video]' . $url . '[/video]' . "\n";
+                                       break;
+                               case 'audio':
+                                       $bbcode = "\n" . '[audio]' . $url . '[/audio]' . "\n";
+                                       break;
+                               default:
+                                       $bbcode = "\n" . '[img]' . $url . '[/img]' . "\n";
+                                       break;
+                       }
+
+                       return $bbcode;
+               }
+
+               unset($siteinfo['keywords']);
+
+               // Bypass attachment if parse url for a comment
+               if (!$tryAttachment) {
+                       return "\n" . '[url=' . $url . ']' . $siteinfo['title'] . '[/url]';
+               }
+
+               // Format it as BBCode attachment
+               return "\n" . PageInfo::getFooterFromData($siteinfo);
+       }
 }