From 558189e9d12cf88461506f971be1729c68cdb3d5 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 17 Mar 2021 22:29:12 +0000 Subject: [PATCH] Reorganized functions --- src/Util/ParseUrl.php | 193 +++++++++++++++++++++--------------------- 1 file changed, 97 insertions(+), 96 deletions(-) diff --git a/src/Util/ParseUrl.php b/src/Util/ParseUrl.php index 745ab5c749..389c94a728 100644 --- a/src/Util/ParseUrl.php +++ b/src/Util/ParseUrl.php @@ -508,10 +508,92 @@ class ParseUrl } /** - * Parse the Json-Ld parts + * Convert tags from CSV to an array + * + * @param string $string Tags + * @return array with formatted Hashtags + */ + public static function convertTagsToArray($string) + { + $arr_tags = str_getcsv($string); + if (count($arr_tags)) { + // add the # sign to every tag + array_walk($arr_tags, ["self", "arrAddHashes"]); + + return $arr_tags; + } + } + + /** + * Add a hasht sign to a string + * + * This method is used as callback function + * + * @param string $tag The pure tag name + * @param int $k Counter for internal use + * @return void + */ + private static function arrAddHashes(&$tag, $k) + { + $tag = "#" . $tag; + } + + /** + * Add a scheme to an url * - * @param array $siteinfo - * @param array $jsonld + * The src attribute of some html elements (e.g. images) + * can miss the scheme so we need to add the correct + * scheme + * + * @param string $url The url which possibly does have + * a missing scheme (a link to an image) + * @param string $scheme The url with a correct scheme + * (e.g. the url from the webpage which does contain the image) + * + * @return string The url with a scheme + */ + private static function completeUrl($url, $scheme) + { + $urlarr = parse_url($url); + + // If the url does allready have an scheme + // we can stop the process here + if (isset($urlarr["scheme"])) { + return($url); + } + + $schemearr = parse_url($scheme); + + $complete = $schemearr["scheme"]."://".$schemearr["host"]; + + if (!empty($schemearr["port"])) { + $complete .= ":".$schemearr["port"]; + } + + if (!empty($urlarr["path"])) { + if (strpos($urlarr["path"], "/") !== 0) { + $complete .= "/"; + } + + $complete .= $urlarr["path"]; + } + + if (!empty($urlarr["query"])) { + $complete .= "?".$urlarr["query"]; + } + + if (!empty($urlarr["fragment"])) { + $complete .= "#".$urlarr["fragment"]; + } + + return($complete); + } + + /** + * Parse the Json-Ld parts of a web page + * + * @param array $siteinfo + * @param array $jsonld * @return array siteinfo */ private static function parseParts(array $siteinfo, array $jsonld) @@ -543,6 +625,7 @@ class ParseUrl /** * Improve the siteinfo with information from the provided JSON-LD information * @see https://jsonld.com/ + * @see https://schema.org/ * * @param array $siteinfo * @param array $jsonld @@ -557,7 +640,7 @@ class ParseUrl } // Silently ignore some types that aren't processed - if (in_array($type, ['SiteNavigationElement', 'JobPosting', 'CreativeWork', + if (in_array($type, ['SiteNavigationElement', 'JobPosting', 'CreativeWork', 'MusicAlbum', 'WPHeader', 'WPSideBar', 'WPFooter', 'LegalService', 'ItemList', 'BreadcrumbList', 'Blog', 'Dataset', 'Product'])) { return $siteinfo; @@ -616,7 +699,7 @@ class ParseUrl return self::parseJsonLdWebOrganization($siteinfo, $jsonld); case 'Person': case 'Patient': - case 'PerformingGroup': + case 'PerformingGroup': case 'DanceGroup'; case 'MusicGroup': case 'TheaterGroup': @@ -635,7 +718,7 @@ class ParseUrl } /** - * Improve the siteinfo with information from the provided JSON-LD information concerning authors and publishers + * Fetch author and publisher data * * @param array $siteinfo * @param array $jsonld @@ -695,13 +778,13 @@ class ParseUrl $jsonldinfo['author_name'] = trim($jsonld['author']); } - Logger::info('Fetched author information', ['fetched' => $jsonldinfo]); + Logger::info('Fetched Author information', ['fetched' => $jsonldinfo]); return array_merge($siteinfo, $jsonldinfo); } /** - * Improve the siteinfo with information from the provided JSON-LD Article information + * Fetch data from the provided JSON-LD Article type * @see https://schema.org/Article * * @param array $siteinfo @@ -761,7 +844,7 @@ class ParseUrl } /** - * Improve the siteinfo with information from the provided JSON-LD WebPage information + * Fetch data from the provided JSON-LD WebPage type * @see https://schema.org/WebPage * * @param array $siteinfo @@ -794,13 +877,13 @@ class ParseUrl $jsonldinfo = self::parseJsonLdAuthor($jsonldinfo, $jsonld); - Logger::info('Fetched webpage information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]); + Logger::info('Fetched WebPage information', ['url' => $siteinfo['url'], 'fetched' => $jsonldinfo]); return array_merge($siteinfo, $jsonldinfo); } /** - * Improve the siteinfo with information from the provided JSON-LD WebSite information + * Fetch data from the provided JSON-LD WebSite type * @see https://schema.org/WebSite * * @param array $siteinfo @@ -838,7 +921,7 @@ class ParseUrl } /** - * Improve the siteinfo with information from the provided JSON-LD Organization information + * Fetch data from the provided JSON-LD Organization type * @see https://schema.org/Organization * * @param array $siteinfo @@ -889,7 +972,7 @@ class ParseUrl } /** - * Improve the siteinfo with information from the provided JSON-LD Person information + * Fetch data from the provided JSON-LD Person type * @see https://schema.org/Person * * @param array $siteinfo @@ -930,7 +1013,7 @@ class ParseUrl } /** - * Improve the siteinfo with information from the provided JSON-LD MediaObject + * Fetch data from the provided JSON-LD MediaObject type * @see https://schema.org/MediaObject * * @param array $siteinfo @@ -995,86 +1078,4 @@ class ParseUrl $siteinfo[$name][] = $media; return $siteinfo; } - - /** - * Convert tags from CSV to an array - * - * @param string $string Tags - * @return array with formatted Hashtags - */ - public static function convertTagsToArray($string) - { - $arr_tags = str_getcsv($string); - if (count($arr_tags)) { - // add the # sign to every tag - array_walk($arr_tags, ["self", "arrAddHashes"]); - - return $arr_tags; - } - } - - /** - * Add a hasht sign to a string - * - * This method is used as callback function - * - * @param string $tag The pure tag name - * @param int $k Counter for internal use - * @return void - */ - private static function arrAddHashes(&$tag, $k) - { - $tag = "#" . $tag; - } - - /** - * Add a scheme to an url - * - * The src attribute of some html elements (e.g. images) - * can miss the scheme so we need to add the correct - * scheme - * - * @param string $url The url which possibly does have - * a missing scheme (a link to an image) - * @param string $scheme The url with a correct scheme - * (e.g. the url from the webpage which does contain the image) - * - * @return string The url with a scheme - */ - private static function completeUrl($url, $scheme) - { - $urlarr = parse_url($url); - - // If the url does allready have an scheme - // we can stop the process here - if (isset($urlarr["scheme"])) { - return($url); - } - - $schemearr = parse_url($scheme); - - $complete = $schemearr["scheme"]."://".$schemearr["host"]; - - if (!empty($schemearr["port"])) { - $complete .= ":".$schemearr["port"]; - } - - if (!empty($urlarr["path"])) { - if (strpos($urlarr["path"], "/") !== 0) { - $complete .= "/"; - } - - $complete .= $urlarr["path"]; - } - - if (!empty($urlarr["query"])) { - $complete .= "?".$urlarr["query"]; - } - - if (!empty($urlarr["fragment"])) { - $complete .= "#".$urlarr["fragment"]; - } - - return($complete); - } } -- 2.20.1