LinkUtils.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. <?php
  2. /**
  3. * Extract title from an HTML document.
  4. *
  5. * @param string $html HTML content where to look for a title.
  6. *
  7. * @return bool|string Extracted title if found, false otherwise.
  8. */
  9. function html_extract_title($html)
  10. {
  11. if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
  12. return trim(str_replace("\n", '', $matches[1]));
  13. }
  14. return false;
  15. }
  16. /**
  17. * Determine charset from downloaded page.
  18. * Priority:
  19. * 1. HTTP headers (Content type).
  20. * 2. HTML content page (tag <meta charset>).
  21. * 3. Use a default charset (default: UTF-8).
  22. *
  23. * @param array $headers HTTP headers array.
  24. * @param string $htmlContent HTML content where to look for charset.
  25. * @param string $defaultCharset Default charset to apply if other methods failed.
  26. *
  27. * @return string Determined charset.
  28. */
  29. function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
  30. {
  31. if ($charset = headers_extract_charset($headers)) {
  32. return $charset;
  33. }
  34. if ($charset = html_extract_charset($htmlContent)) {
  35. return $charset;
  36. }
  37. return $defaultCharset;
  38. }
  39. /**
  40. * Extract charset from HTTP headers if it's defined.
  41. *
  42. * @param array $headers HTTP headers array.
  43. *
  44. * @return bool|string Charset string if found (lowercase), false otherwise.
  45. */
  46. function headers_extract_charset($headers)
  47. {
  48. if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
  49. preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
  50. if (! empty($match[1])) {
  51. return strtolower(trim($match[1]));
  52. }
  53. }
  54. return false;
  55. }
  56. /**
  57. * Extract charset HTML content (tag <meta charset>).
  58. *
  59. * @param string $html HTML content where to look for charset.
  60. *
  61. * @return bool|string Charset string if found, false otherwise.
  62. */
  63. function html_extract_charset($html)
  64. {
  65. // Get encoding specified in HTML header.
  66. preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
  67. if (!empty($enc[1])) {
  68. return strtolower($enc[1]);
  69. }
  70. return false;
  71. }
  72. /**
  73. * Count private links in given linklist.
  74. *
  75. * @param array|Countable $links Linklist.
  76. *
  77. * @return int Number of private links.
  78. */
  79. function count_private($links)
  80. {
  81. $cpt = 0;
  82. foreach ($links as $link) {
  83. if ($link['private']) {
  84. $cpt += 1;
  85. }
  86. }
  87. return $cpt;
  88. }
  89. /**
  90. * In a string, converts URLs to clickable links.
  91. *
  92. * @param string $text input string.
  93. * @param string $redirector if a redirector is set, use it to gerenate links.
  94. *
  95. * @return string returns $text with all links converted to HTML links.
  96. *
  97. * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
  98. */
  99. function text2clickable($text, $redirector = '')
  100. {
  101. $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
  102. if (empty($redirector)) {
  103. return preg_replace($regex, '<a href="$1">$1</a>', $text);
  104. }
  105. // Redirector is set, urlencode the final URL.
  106. return preg_replace_callback(
  107. $regex,
  108. function ($matches) use ($redirector) {
  109. return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';
  110. },
  111. $text
  112. );
  113. }
  114. /**
  115. * Auto-link hashtags.
  116. *
  117. * @param string $description Given description.
  118. * @param string $indexUrl Root URL.
  119. *
  120. * @return string Description with auto-linked hashtags.
  121. */
  122. function hashtag_autolink($description, $indexUrl = '')
  123. {
  124. /*
  125. * To support unicode: http://stackoverflow.com/a/35498078/1484919
  126. * \p{Pc} - to match underscore
  127. * \p{N} - numeric character in any script
  128. * \p{L} - letter from any language
  129. * \p{Mn} - any non marking space (accents, umlauts, etc)
  130. */
  131. $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
  132. $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
  133. return preg_replace($regex, $replacement, $description);
  134. }
  135. /**
  136. * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
  137. * even in the absence of <pre> (This is used in description to keep text formatting).
  138. *
  139. * @param string $text input text.
  140. *
  141. * @return string formatted text.
  142. */
  143. function space2nbsp($text)
  144. {
  145. return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
  146. }
  147. /**
  148. * Format Shaarli's description
  149. *
  150. * @param string $description shaare's description.
  151. * @param string $redirector if a redirector is set, use it to gerenate links.
  152. * @param string $indexUrl URL to Shaarli's index.
  153. *
  154. * @return string formatted description.
  155. */
  156. function format_description($description, $redirector = '', $indexUrl = '') {
  157. return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl)));
  158. }
  159. /**
  160. * Generate a small hash for a link.
  161. *
  162. * @param DateTime $date Link creation date.
  163. * @param int $id Link ID.
  164. *
  165. * @return string the small hash generated from link data.
  166. */
  167. function link_small_hash($date, $id)
  168. {
  169. return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
  170. }