LinkUtils.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. <?php
  2. /**
  3. * Extract title from an HTML document.
  4. *
  5. * @param string $html HTML content where to look for a title.
  6. *
  7. * @return bool|string Extracted title if found, false otherwise.
  8. */
  9. function html_extract_title($html)
  10. {
  11. if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
  12. return trim(str_replace("\n", '', $matches[1]));
  13. }
  14. return false;
  15. }
  16. /**
  17. * Determine charset from downloaded page.
  18. * Priority:
  19. * 1. HTTP headers (Content type).
  20. * 2. HTML content page (tag <meta charset>).
  21. * 3. Use a default charset (default: UTF-8).
  22. *
  23. * @param array $headers HTTP headers array.
  24. * @param string $htmlContent HTML content where to look for charset.
  25. * @param string $defaultCharset Default charset to apply if other methods failed.
  26. *
  27. * @return string Determined charset.
  28. */
  29. function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
  30. {
  31. if ($charset = headers_extract_charset($headers)) {
  32. return $charset;
  33. }
  34. if ($charset = html_extract_charset($htmlContent)) {
  35. return $charset;
  36. }
  37. return $defaultCharset;
  38. }
  39. /**
  40. * Extract charset from HTTP headers if it's defined.
  41. *
  42. * @param array $headers HTTP headers array.
  43. *
  44. * @return bool|string Charset string if found (lowercase), false otherwise.
  45. */
  46. function headers_extract_charset($headers)
  47. {
  48. if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
  49. preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
  50. if (! empty($match[1])) {
  51. return strtolower(trim($match[1]));
  52. }
  53. }
  54. return false;
  55. }
  56. /**
  57. * Extract charset HTML content (tag <meta charset>).
  58. *
  59. * @param string $html HTML content where to look for charset.
  60. *
  61. * @return bool|string Charset string if found, false otherwise.
  62. */
  63. function html_extract_charset($html)
  64. {
  65. // Get encoding specified in HTML header.
  66. preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
  67. if (!empty($enc[1])) {
  68. return strtolower($enc[1]);
  69. }
  70. return false;
  71. }
  72. /**
  73. * Count private links in given linklist.
  74. *
  75. * @param array|Countable $links Linklist.
  76. *
  77. * @return int Number of private links.
  78. */
  79. function count_private($links)
  80. {
  81. $cpt = 0;
  82. foreach ($links as $link) {
  83. $cpt = $link['private'] == true ? $cpt + 1 : $cpt;
  84. }
  85. return $cpt;
  86. }
  87. /**
  88. * In a string, converts URLs to clickable links.
  89. *
  90. * @param string $text input string.
  91. * @param string $redirector if a redirector is set, use it to gerenate links.
  92. *
  93. * @return string returns $text with all links converted to HTML links.
  94. *
  95. * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
  96. */
  97. function text2clickable($text, $redirector = '')
  98. {
  99. $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si';
  100. if (empty($redirector)) {
  101. return preg_replace($regex, '<a href="$1">$1</a>', $text);
  102. }
  103. // Redirector is set, urlencode the final URL.
  104. return preg_replace_callback(
  105. $regex,
  106. function ($matches) use ($redirector) {
  107. return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';
  108. },
  109. $text
  110. );
  111. }
  112. /**
  113. * Auto-link hashtags.
  114. *
  115. * @param string $description Given description.
  116. * @param string $indexUrl Root URL.
  117. *
  118. * @return string Description with auto-linked hashtags.
  119. */
  120. function hashtag_autolink($description, $indexUrl = '')
  121. {
  122. /*
  123. * To support unicode: http://stackoverflow.com/a/35498078/1484919
  124. * \p{Pc} - to match underscore
  125. * \p{N} - numeric character in any script
  126. * \p{L} - letter from any language
  127. * \p{Mn} - any non marking space (accents, umlauts, etc)
  128. */
  129. $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
  130. $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
  131. return preg_replace($regex, $replacement, $description);
  132. }
  133. /**
  134. * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
  135. * even in the absence of <pre> (This is used in description to keep text formatting).
  136. *
  137. * @param string $text input text.
  138. *
  139. * @return string formatted text.
  140. */
  141. function space2nbsp($text)
  142. {
  143. return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
  144. }
  145. /**
  146. * Format Shaarli's description
  147. *
  148. * @param string $description shaare's description.
  149. * @param string $redirector if a redirector is set, use it to gerenate links.
  150. * @param string $indexUrl URL to Shaarli's index.
  151. *
  152. * @return string formatted description.
  153. */
  154. function format_description($description, $redirector = '', $indexUrl = '') {
  155. return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl)));
  156. }
  157. /**
  158. * Generate a small hash for a link.
  159. *
  160. * @param DateTime $date Link creation date.
  161. * @param int $id Link ID.
  162. *
  163. * @return string the small hash generated from link data.
  164. */
  165. function link_small_hash($date, $id)
  166. {
  167. return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
  168. }