Restore partial condition in OEmbed block of ParseUrl::getSiteInfo
[friendica.git/.git] / src / Util / XML.php
1 <?php
2 /**
3  * @file src/Util/XML.php
4  */
5 namespace Friendica\Util;
6
7 use DOMXPath;
8 use SimpleXMLElement;
9
10 /**
11  * @brief This class contain methods to work with XML data
12  */
13 class XML
14 {
15         /**
16          * @brief Creates an XML structure out of a given array
17          *
18          * @param array  $array         The array of the XML structure that will be generated
19          * @param object $xml           The createdXML will be returned by reference
20          * @param bool   $remove_header Should the XML header be removed or not?
21          * @param array  $namespaces    List of namespaces
22          * @param bool   $root          interally used parameter. Mustn't be used from outside.
23          *
24          * @return string The created XML
25          */
26         public static function fromArray($array, &$xml, $remove_header = false, $namespaces = [], $root = true)
27         {
28                 if ($root) {
29                         foreach ($array as $key => $value) {
30                                 foreach ($namespaces as $nskey => $nsvalue) {
31                                         $key .= " xmlns".($nskey == "" ? "":":").$nskey.'="'.$nsvalue.'"';
32                                 }
33
34                                 if (is_array($value)) {
35                                         $root = new SimpleXMLElement("<".$key."/>");
36                                         self::fromArray($value, $root, $remove_header, $namespaces, false);
37                                 } else {
38                                         $root = new SimpleXMLElement("<".$key.">".xmlify($value)."</".$key.">");
39                                 }
40
41                                 $dom = dom_import_simplexml($root)->ownerDocument;
42                                 $dom->formatOutput = true;
43                                 $xml = $dom;
44
45                                 $xml_text = $dom->saveXML();
46
47                                 if ($remove_header) {
48                                         $xml_text = trim(substr($xml_text, 21));
49                                 }
50
51                                 return $xml_text;
52                         }
53                 }
54
55                 $element = null;
56                 foreach ($array as $key => $value) {
57                         if (!isset($element) && isset($xml)) {
58                                 $element = $xml;
59                         }
60
61                         if (is_integer($key)) {
62                                 if (isset($element)) {
63                                         if (is_scalar($value)) {
64                                                 $element[0] = $value;
65                                         } else {
66                                                 /// @todo: handle nested array values
67                                         }
68                                 }
69                                 continue;
70                         }
71
72                         $element_parts = explode(":", $key);
73                         if ((count($element_parts) > 1) && isset($namespaces[$element_parts[0]])) {
74                                 $namespace = $namespaces[$element_parts[0]];
75                         } elseif (isset($namespaces[""])) {
76                                 $namespace = $namespaces[""];
77                         } else {
78                                 $namespace = null;
79                         }
80
81                         // Remove undefined namespaces from the key
82                         if ((count($element_parts) > 1) && is_null($namespace)) {
83                                 $key = $element_parts[1];
84                         }
85
86                         if (substr($key, 0, 11) == "@attributes") {
87                                 if (!isset($element) || !is_array($value)) {
88                                         continue;
89                                 }
90
91                                 foreach ($value as $attr_key => $attr_value) {
92                                         $element_parts = explode(":", $attr_key);
93                                         if ((count($element_parts) > 1) && isset($namespaces[$element_parts[0]])) {
94                                                 $namespace = $namespaces[$element_parts[0]];
95                                         } else {
96                                                 $namespace = null;
97                                         }
98
99                                         $element->addAttribute($attr_key, $attr_value, $namespace);
100                                 }
101
102                                 continue;
103                         }
104
105                         if (!is_array($value)) {
106                                 $element = $xml->addChild($key, xmlify($value), $namespace);
107                         } elseif (is_array($value)) {
108                                 $element = $xml->addChild($key, null, $namespace);
109                                 self::fromArray($value, $element, $remove_header, $namespaces, false);
110                         }
111                 }
112         }
113
114         /**
115          * @brief Copies an XML object
116          *
117          * @param object $source      The XML source
118          * @param object $target      The XML target
119          * @param string $elementname Name of the XML element of the target
120          * @return void
121          */
122         public static function copy(&$source, &$target, $elementname)
123         {
124                 if (count($source->children()) == 0) {
125                         $target->addChild($elementname, xmlify($source));
126                 } else {
127                         $child = $target->addChild($elementname);
128                         foreach ($source->children() as $childfield => $childentry) {
129                                 self::copy($childentry, $child, $childfield);
130                         }
131                 }
132         }
133
134         /**
135          * @brief Create an XML element
136          *
137          * @param object $doc        XML root
138          * @param string $element    XML element name
139          * @param string $value      XML value
140          * @param array  $attributes array containing the attributes
141          *
142          * @return object XML element object
143          */
144         public static function createElement($doc, $element, $value = "", $attributes = [])
145         {
146                 $element = $doc->createElement($element, xmlify($value));
147
148                 foreach ($attributes as $key => $value) {
149                         $attribute = $doc->createAttribute($key);
150                         $attribute->value = xmlify($value);
151                         $element->appendChild($attribute);
152                 }
153                 return $element;
154         }
155
156         /**
157          * @brief Create an XML and append it to the parent object
158          *
159          * @param object $doc        XML root
160          * @param object $parent     parent object
161          * @param string $element    XML element name
162          * @param string $value      XML value
163          * @param array  $attributes array containing the attributes
164          * @return void
165          */
166         public static function addElement($doc, $parent, $element, $value = "", $attributes = [])
167         {
168                 $element = self::createElement($doc, $element, $value, $attributes);
169                 $parent->appendChild($element);
170         }
171
172         /**
173          * @brief Convert an XML document to a normalised, case-corrected array
174          *   used by webfinger
175          *
176          * @param object  $xml_element     The XML document
177          * @param integer $recursion_depth recursion counter for internal use - default 0
178          *                                 internal use, recursion counter
179          *
180          * @return array | string The array from the xml element or the string
181          */
182         public static function elementToArray($xml_element, &$recursion_depth = 0)
183         {
184                 // If we're getting too deep, bail out
185                 if ($recursion_depth > 512) {
186                         return(null);
187                 }
188
189                 $xml_element_copy = '';
190                 if (!is_string($xml_element)
191                         && !is_array($xml_element)
192                         && (get_class($xml_element) == 'SimpleXMLElement')
193                 ) {
194                         $xml_element_copy = $xml_element;
195                         $xml_element = get_object_vars($xml_element);
196                 }
197
198                 if (is_array($xml_element)) {
199                         $result_array = [];
200                         if (count($xml_element) <= 0) {
201                                 return (trim(strval($xml_element_copy)));
202                         }
203
204                         foreach ($xml_element as $key => $value) {
205                                 $recursion_depth++;
206                                 $result_array[strtolower($key)] = self::elementToArray($value, $recursion_depth);
207                                 $recursion_depth--;
208                         }
209
210                         if ($recursion_depth == 0) {
211                                 $temp_array = $result_array;
212                                 $result_array = [
213                                         strtolower($xml_element_copy->getName()) => $temp_array,
214                                 ];
215                         }
216
217                         return ($result_array);
218                 } else {
219                         return (trim(strval($xml_element)));
220                 }
221         }
222
223         /**
224          * @brief Convert the given XML text to an array in the XML structure.
225          *
226          * Xml::toArray() will convert the given XML text to an array in the XML structure.
227          * Link: http://www.bin-co.com/php/scripts/xml2array/
228          * Portions significantly re-written by mike@macgirvin.com for Friendica
229          * (namespaces, lowercase tags, get_attribute default changed, more...)
230          *
231          * Examples: $array =  Xml::toArray(file_get_contents('feed.xml'));
232          *              $array =  Xml::toArray(file_get_contents('feed.xml', true, 1, 'attribute'));
233          *
234          * @param object  $contents       The XML text
235          * @param boolean $namespaces     True or false include namespace information
236          *                                    in the returned array as array elements.
237          * @param integer $get_attributes 1 or 0. If this is 1 the function will get the attributes as well as the tag values -
238          *                                    this results in a different array structure in the return value.
239          * @param string  $priority       Can be 'tag' or 'attribute'. This will change the way the resulting
240          *                                    array sturcture. For 'tag', the tags are given more importance.
241          *
242          * @return array The parsed XML in an array form. Use print_r() to see the resulting array structure.
243          */
244         public static function toArray($contents, $namespaces = true, $get_attributes = 1, $priority = 'attribute')
245         {
246                 if (!$contents) {
247                         return [];
248                 }
249
250                 if (!function_exists('xml_parser_create')) {
251                         logger('Xml::toArray: parser function missing');
252                         return [];
253                 }
254
255
256                 libxml_use_internal_errors(true);
257                 libxml_clear_errors();
258
259                 if ($namespaces) {
260                         $parser = @xml_parser_create_ns("UTF-8", ':');
261                 } else {
262                         $parser = @xml_parser_create();
263                 }
264
265                 if (! $parser) {
266                         logger('Xml::toArray: xml_parser_create: no resource');
267                         return [];
268                 }
269
270                 xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, "UTF-8");
271                 // http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
272                 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
273                 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
274                 @xml_parse_into_struct($parser, trim($contents), $xml_values);
275                 @xml_parser_free($parser);
276
277                 if (! $xml_values) {
278                         logger('Xml::toArray: libxml: parse error: ' . $contents, LOGGER_DATA);
279                         foreach (libxml_get_errors() as $err) {
280                                 logger('libxml: parse: ' . $err->code . " at " . $err->line . ":" . $err->column . " : " . $err->message, LOGGER_DATA);
281                         }
282                         libxml_clear_errors();
283                         return;
284                 }
285
286                 //Initializations
287                 $xml_array = [];
288                 $parents = [];
289                 $opened_tags = [];
290                 $arr = [];
291
292                 $current = &$xml_array; // Reference
293
294                 // Go through the tags.
295                 $repeated_tag_index = []; // Multiple tags with same name will be turned into an array
296                 foreach ($xml_values as $data) {
297                         $tag        = $data['tag'];
298                         $type       = $data['type'];
299                         $level      = $data['level'];
300                         $attributes = isset($data['attributes']) ? $data['attributes'] : null;
301                         $value      = isset($data['value']) ? $data['value'] : null;
302
303                         $result = [];
304                         $attributes_data = [];
305
306                         if (isset($value)) {
307                                 if ($priority == 'tag') {
308                                         $result = $value;
309                                 } else {
310                                         $result['value'] = $value; // Put the value in a assoc array if we are in the 'Attribute' mode
311                                 }
312                         }
313
314                         //Set the attributes too.
315                         if (isset($attributes) and $get_attributes) {
316                                 foreach ($attributes as $attr => $val) {
317                                         if ($priority == 'tag') {
318                                                 $attributes_data[$attr] = $val;
319                                         } else {
320                                                 $result['@attributes'][$attr] = $val; // Set all the attributes in a array called 'attr'
321                                         }
322                                 }
323                         }
324
325                         // See tag status and do the needed.
326                         if ($namespaces && strpos($tag, ':')) {
327                                 $namespc = substr($tag, 0, strrpos($tag, ':'));
328                                 $tag = strtolower(substr($tag, strlen($namespc)+1));
329                                 $result['@namespace'] = $namespc;
330                         }
331                         $tag = strtolower($tag);
332
333                         if ($type == "open") {   // The starting of the tag '<tag>'
334                                 $parent[$level-1] = &$current;
335                                 if (!is_array($current) || (!in_array($tag, array_keys($current)))) { // Insert New tag
336                                         $current[$tag] = $result;
337                                         if ($attributes_data) {
338                                                 $current[$tag. '_attr'] = $attributes_data;
339                                         }
340                                         $repeated_tag_index[$tag.'_'.$level] = 1;
341
342                                         $current = &$current[$tag];
343                                 } else { // There was another element with the same tag name
344
345                                         if (isset($current[$tag][0])) { // If there is a 0th element it is already an array
346                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;
347                                                 $repeated_tag_index[$tag.'_'.$level]++;
348                                         } else { // This section will make the value an array if multiple tags with the same name appear together
349                                                 $current[$tag] = [$current[$tag], $result]; // This will combine the existing item and the new item together to make an array
350                                                 $repeated_tag_index[$tag.'_'.$level] = 2;
351
352                                                 if (isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well
353                                                         $current[$tag]['0_attr'] = $current[$tag.'_attr'];
354                                                         unset($current[$tag.'_attr']);
355                                                 }
356                                         }
357                                         $last_item_index = $repeated_tag_index[$tag.'_'.$level]-1;
358                                         $current = &$current[$tag][$last_item_index];
359                                 }
360                         } elseif ($type == "complete") { // Tags that ends in 1 line '<tag />'
361                                 //See if the key is already taken.
362                                 if (!isset($current[$tag])) { //New Key
363                                         $current[$tag] = $result;
364                                         $repeated_tag_index[$tag.'_'.$level] = 1;
365                                         if ($priority == 'tag' and $attributes_data) {
366                                                 $current[$tag. '_attr'] = $attributes_data;
367                                         }
368                                 } else { // If taken, put all things inside a list(array)
369                                         if (isset($current[$tag][0]) and is_array($current[$tag])) { // If it is already an array...
370
371                                                 // ...push the new element into that array.
372                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;
373
374                                                 if ($priority == 'tag' and $get_attributes and $attributes_data) {
375                                                         $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
376                                                 }
377                                                 $repeated_tag_index[$tag.'_'.$level]++;
378                                         } else { // If it is not an array...
379                                                 $current[$tag] = [$current[$tag], $result]; //...Make it an array using using the existing value and the new value
380                                                 $repeated_tag_index[$tag.'_'.$level] = 1;
381                                                 if ($priority == 'tag' and $get_attributes) {
382                                                         if (isset($current[$tag.'_attr'])) { // The attribute of the last(0th) tag must be moved as well
383
384                                                                 $current[$tag]['0_attr'] = $current[$tag.'_attr'];
385                                                                 unset($current[$tag.'_attr']);
386                                                         }
387
388                                                         if ($attributes_data) {
389                                                                 $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
390                                                         }
391                                                 }
392                                                 $repeated_tag_index[$tag.'_'.$level]++; // 0 and 1 indexes are already taken
393                                         }
394                                 }
395                         } elseif ($type == 'close') { // End of tag '</tag>'
396                                 $current = &$parent[$level-1];
397                         }
398                 }
399
400                 return($xml_array);
401         }
402
403         /**
404          * @brief Delete a node in a XML object
405          *
406          * @param object $doc  XML document
407          * @param string $node Node name
408          * @return void
409          */
410         public static function deleteNode(&$doc, $node)
411         {
412                 $xpath = new DOMXPath($doc);
413                 $list = $xpath->query("//".$node);
414                 foreach ($list as $child) {
415                         $child->parentNode->removeChild($child);
416                 }
417         }
418
419         public static function parseString($s, $strict = true)
420         {
421                 // the "strict" parameter is deactivated
422                 libxml_use_internal_errors(true);
423
424                 $x = @simplexml_load_string($s);
425                 if (!$x) {
426                         logger('libxml: parse: error: ' . $s, LOGGER_DATA);
427                         foreach (libxml_get_errors() as $err) {
428                                 logger('libxml: parse: ' . $err->code." at ".$err->line.":".$err->column." : ".$err->message, LOGGER_DATA);
429                         }
430                         libxml_clear_errors();
431                 }
432                 return $x;
433         }
434
435         public static function getFirstNodeValue($xpath, $element, $context = null)
436         {
437                 $result = $xpath->evaluate($element, $context);
438                 if (!is_object($result)) {
439                         return '';
440                 }
441
442                 $first_item = $result->item(0);
443                 if (!is_object($first_item)) {
444                         return '';
445                 }
446
447                 return $first_item->nodeValue;
448         }
449
450         public static function getFirstAttributes($xpath, $element, $context = null)
451         {
452                 $result = $xpath->query($element, $context);
453                 if (!is_object($result)) {
454                         return false;
455                 }
456
457                 $first_item = $result->item(0);
458                 if (!is_object($first_item)) {
459                         return false;
460                 }
461
462                 return $first_item->attributes;
463         }
464 }