LinkUtils.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. <?php
  2. /**
  3. * Get cURL callback function for CURLOPT_WRITEFUNCTION
  4. *
  5. * @param string $charset to extract from the downloaded page (reference)
  6. * @param string $title to extract from the downloaded page (reference)
  7. * @param string $curlGetInfo Optionnaly overrides curl_getinfo function
  8. *
  9. * @return Closure
  10. */
  11. function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
  12. {
  13. /**
  14. * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
  15. *
  16. * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
  17. * Then we extract the title and the charset and stop the download when it's done.
  18. *
  19. * @param resource $ch cURL resource
  20. * @param string $data chunk of data being downloaded
  21. *
  22. * @return int|bool length of $data or false if we need to stop the download
  23. */
  24. return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) {
  25. $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
  26. if (!empty($responseCode) && $responseCode != 200) {
  27. return false;
  28. }
  29. $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
  30. if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
  31. return false;
  32. }
  33. if (empty($charset)) {
  34. $charset = header_extract_charset($contentType);
  35. }
  36. if (empty($charset)) {
  37. $charset = html_extract_charset($data);
  38. }
  39. if (empty($title)) {
  40. $title = html_extract_title($data);
  41. }
  42. // We got everything we want, stop the download.
  43. if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
  44. return false;
  45. }
  46. return strlen($data);
  47. };
  48. }
  49. /**
  50. * Extract title from an HTML document.
  51. *
  52. * @param string $html HTML content where to look for a title.
  53. *
  54. * @return bool|string Extracted title if found, false otherwise.
  55. */
  56. function html_extract_title($html)
  57. {
  58. if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
  59. return trim(str_replace("\n", '', $matches[1]));
  60. }
  61. return false;
  62. }
  63. /**
  64. * Extract charset from HTTP header if it's defined.
  65. *
  66. * @param string $header HTTP header Content-Type line.
  67. *
  68. * @return bool|string Charset string if found (lowercase), false otherwise.
  69. */
  70. function header_extract_charset($header)
  71. {
  72. preg_match('/charset="?([^; ]+)/i', $header, $match);
  73. if (! empty($match[1])) {
  74. return strtolower(trim($match[1]));
  75. }
  76. return false;
  77. }
  78. /**
  79. * Extract charset HTML content (tag <meta charset>).
  80. *
  81. * @param string $html HTML content where to look for charset.
  82. *
  83. * @return bool|string Charset string if found, false otherwise.
  84. */
  85. function html_extract_charset($html)
  86. {
  87. // Get encoding specified in HTML header.
  88. preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
  89. if (!empty($enc[1])) {
  90. return strtolower($enc[1]);
  91. }
  92. return false;
  93. }
  94. /**
  95. * Count private links in given linklist.
  96. *
  97. * @param array|Countable $links Linklist.
  98. *
  99. * @return int Number of private links.
  100. */
  101. function count_private($links)
  102. {
  103. $cpt = 0;
  104. foreach ($links as $link) {
  105. if ($link['private']) {
  106. $cpt += 1;
  107. }
  108. }
  109. return $cpt;
  110. }
  111. /**
  112. * In a string, converts URLs to clickable links.
  113. *
  114. * @param string $text input string.
  115. * @param string $redirector if a redirector is set, use it to gerenate links.
  116. * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
  117. *
  118. * @return string returns $text with all links converted to HTML links.
  119. *
  120. * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
  121. */
  122. function text2clickable($text, $redirector = '', $urlEncode = true)
  123. {
  124. $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
  125. if (empty($redirector)) {
  126. return preg_replace($regex, '<a href="$1">$1</a>', $text);
  127. }
  128. // Redirector is set, urlencode the final URL.
  129. return preg_replace_callback(
  130. $regex,
  131. function ($matches) use ($redirector, $urlEncode) {
  132. $url = $urlEncode ? urlencode($matches[1]) : $matches[1];
  133. return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
  134. },
  135. $text
  136. );
  137. }
  138. /**
  139. * Auto-link hashtags.
  140. *
  141. * @param string $description Given description.
  142. * @param string $indexUrl Root URL.
  143. *
  144. * @return string Description with auto-linked hashtags.
  145. */
  146. function hashtag_autolink($description, $indexUrl = '')
  147. {
  148. /*
  149. * To support unicode: http://stackoverflow.com/a/35498078/1484919
  150. * \p{Pc} - to match underscore
  151. * \p{N} - numeric character in any script
  152. * \p{L} - letter from any language
  153. * \p{Mn} - any non marking space (accents, umlauts, etc)
  154. */
  155. $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
  156. $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
  157. return preg_replace($regex, $replacement, $description);
  158. }
  159. /**
  160. * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
  161. * even in the absence of <pre> (This is used in description to keep text formatting).
  162. *
  163. * @param string $text input text.
  164. *
  165. * @return string formatted text.
  166. */
  167. function space2nbsp($text)
  168. {
  169. return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
  170. }
  171. /**
  172. * Format Shaarli's description
  173. *
  174. * @param string $description shaare's description.
  175. * @param string $redirector if a redirector is set, use it to gerenate links.
  176. * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
  177. * @param string $indexUrl URL to Shaarli's index.
  178. * @return string formatted description.
  179. */
  180. function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '') {
  181. return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
  182. }
  183. /**
  184. * Generate a small hash for a link.
  185. *
  186. * @param DateTime $date Link creation date.
  187. * @param int $id Link ID.
  188. *
  189. * @return string the small hash generated from link data.
  190. */
  191. function link_small_hash($date, $id)
  192. {
  193. return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
  194. }