LinkDB.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. <?php
  2. /**
  3. * Data storage for links.
  4. *
  5. * This object behaves like an associative array.
  6. *
  7. * Example:
  8. * $myLinks = new LinkDB();
  9. * echo $myLinks[350]['title'];
  10. * foreach ($myLinks as $link)
  11. * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
  12. *
  13. * Available keys:
  14. * - id: primary key, incremental integer identifier (persistent)
  15. * - description: description of the entry
  16. * - created: creation date of this entry, DateTime object.
  17. * - updated: last modification date of this entry, DateTime object.
  18. * - private: Is this link private? 0=no, other value=yes
  19. * - tags: tags attached to this entry (separated by spaces)
  20. * - title Title of the link
  21. * - url URL of the link. Used for displayable links (no redirector, relative, etc.).
  22. * Can be absolute or relative.
  23. * Relative URLs are permalinks (e.g.'?m-ukcw')
  24. * - real_url Absolute processed URL.
  25. * - shorturl Permalink smallhash
  26. *
  27. * Implements 3 interfaces:
  28. * - ArrayAccess: behaves like an associative array;
  29. * - Countable: there is a count() method;
  30. * - Iterator: usable in foreach () loops.
  31. *
  32. * ID mechanism:
  33. * ArrayAccess is implemented in a way that will allow to access a link
  34. * with the unique identifier ID directly with $link[ID].
  35. * Note that it's not the real key of the link array attribute.
  36. * This mechanism is in place to have persistent link IDs,
  37. * even though the internal array is reordered by date.
  38. * Example:
  39. * - DB: link #1 (2010-01-01) link #2 (2016-01-01)
  40. * - Order: #2 #1
  41. * - Import links containing: link #3 (2013-01-01)
  42. * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
  43. * - Real order: #2 #3 #1
  44. */
  45. class LinkDB implements Iterator, Countable, ArrayAccess
  46. {
  47. // Links are stored as a PHP serialized string
  48. private $datastore;
  49. // Link date storage format
  50. const LINK_DATE_FORMAT = 'Ymd_His';
  51. // List of links (associative array)
  52. // - key: link date (e.g. "20110823_124546"),
  53. // - value: associative array (keys: title, description...)
  54. private $links;
  55. // List of all recorded URLs (key=url, value=link offset)
  56. // for fast reserve search (url-->link offset)
  57. private $urls;
  58. /**
  59. * @var array List of all links IDS mapped with their array offset.
  60. * Map: id->offset.
  61. */
  62. protected $ids;
  63. // List of offset keys (for the Iterator interface implementation)
  64. private $keys;
  65. // Position in the $this->keys array (for the Iterator interface)
  66. private $position;
  67. // Is the user logged in? (used to filter private links)
  68. private $loggedIn;
  69. // Hide public links
  70. private $hidePublicLinks;
  71. // link redirector set in user settings.
  72. private $redirector;
  73. /**
  74. * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
  75. *
  76. * Example:
  77. * anonym.to needs clean URL while dereferer.org needs urlencoded URL.
  78. *
  79. * @var boolean $redirectorEncode parameter: true or false
  80. */
  81. private $redirectorEncode;
  82. /**
  83. * Creates a new LinkDB
  84. *
  85. * Checks if the datastore exists; else, attempts to create a dummy one.
  86. *
  87. * @param string $datastore datastore file path.
  88. * @param boolean $isLoggedIn is the user logged in?
  89. * @param boolean $hidePublicLinks if true all links are private.
  90. * @param string $redirector link redirector set in user settings.
  91. * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
  92. */
  93. public function __construct(
  94. $datastore,
  95. $isLoggedIn,
  96. $hidePublicLinks,
  97. $redirector = '',
  98. $redirectorEncode = true
  99. ) {
  100. $this->datastore = $datastore;
  101. $this->loggedIn = $isLoggedIn;
  102. $this->hidePublicLinks = $hidePublicLinks;
  103. $this->redirector = $redirector;
  104. $this->redirectorEncode = $redirectorEncode === true;
  105. $this->check();
  106. $this->read();
  107. }
  108. /**
  109. * Countable - Counts elements of an object
  110. */
  111. public function count()
  112. {
  113. return count($this->links);
  114. }
  115. /**
  116. * ArrayAccess - Assigns a value to the specified offset
  117. */
  118. public function offsetSet($offset, $value)
  119. {
  120. // TODO: use exceptions instead of "die"
  121. if (!$this->loggedIn) {
  122. die(t('You are not authorized to add a link.'));
  123. }
  124. if (!isset($value['id']) || empty($value['url'])) {
  125. die(t('Internal Error: A link should always have an id and URL.'));
  126. }
  127. if (($offset !== null && ! is_int($offset)) || ! is_int($value['id'])) {
  128. die(t('You must specify an integer as a key.'));
  129. }
  130. if ($offset !== null && $offset !== $value['id']) {
  131. die(t('Array offset and link ID must be equal.'));
  132. }
  133. // If the link exists, we reuse the real offset, otherwise new entry
  134. $existing = $this->getLinkOffset($offset);
  135. if ($existing !== null) {
  136. $offset = $existing;
  137. } else {
  138. $offset = count($this->links);
  139. }
  140. $this->links[$offset] = $value;
  141. $this->urls[$value['url']] = $offset;
  142. $this->ids[$value['id']] = $offset;
  143. }
  144. /**
  145. * ArrayAccess - Whether or not an offset exists
  146. */
  147. public function offsetExists($offset)
  148. {
  149. return array_key_exists($this->getLinkOffset($offset), $this->links);
  150. }
  151. /**
  152. * ArrayAccess - Unsets an offset
  153. */
  154. public function offsetUnset($offset)
  155. {
  156. if (!$this->loggedIn) {
  157. // TODO: raise an exception
  158. die('You are not authorized to delete a link.');
  159. }
  160. $realOffset = $this->getLinkOffset($offset);
  161. $url = $this->links[$realOffset]['url'];
  162. unset($this->urls[$url]);
  163. unset($this->ids[$realOffset]);
  164. unset($this->links[$realOffset]);
  165. }
  166. /**
  167. * ArrayAccess - Returns the value at specified offset
  168. */
  169. public function offsetGet($offset)
  170. {
  171. $realOffset = $this->getLinkOffset($offset);
  172. return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
  173. }
  174. /**
  175. * Iterator - Returns the current element
  176. */
  177. public function current()
  178. {
  179. return $this[$this->keys[$this->position]];
  180. }
  181. /**
  182. * Iterator - Returns the key of the current element
  183. */
  184. public function key()
  185. {
  186. return $this->keys[$this->position];
  187. }
  188. /**
  189. * Iterator - Moves forward to next element
  190. */
  191. public function next()
  192. {
  193. ++$this->position;
  194. }
  195. /**
  196. * Iterator - Rewinds the Iterator to the first element
  197. *
  198. * Entries are sorted by date (latest first)
  199. */
  200. public function rewind()
  201. {
  202. $this->keys = array_keys($this->ids);
  203. $this->position = 0;
  204. }
  205. /**
  206. * Iterator - Checks if current position is valid
  207. */
  208. public function valid()
  209. {
  210. return isset($this->keys[$this->position]);
  211. }
  212. /**
  213. * Checks if the DB directory and file exist
  214. *
  215. * If no DB file is found, creates a dummy DB.
  216. */
  217. private function check()
  218. {
  219. if (file_exists($this->datastore)) {
  220. return;
  221. }
  222. // Create a dummy database for example
  223. $this->links = array();
  224. $link = array(
  225. 'id' => 1,
  226. 'title'=> t('The personal, minimalist, super-fast, database free, bookmarking service'),
  227. 'url'=>'https://shaarli.readthedocs.io',
  228. 'description'=>t(
  229. 'Welcome to Shaarli! This is your first public bookmark. '
  230. .'To edit or delete me, you must first login.
  231. To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page.
  232. You use the community supported version of the original Shaarli project, by Sebastien Sauvage.'
  233. ),
  234. 'private'=>0,
  235. 'created'=> new DateTime(),
  236. 'tags'=>'opensource software'
  237. );
  238. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  239. $this->links[1] = $link;
  240. $link = array(
  241. 'id' => 0,
  242. 'title'=> t('My secret stuff... - Pastebin.com'),
  243. 'url'=>'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
  244. 'description'=> t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'),
  245. 'private'=>1,
  246. 'created'=> new DateTime('1 minute ago'),
  247. 'tags'=>'secretstuff',
  248. );
  249. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  250. $this->links[0] = $link;
  251. // Write database to disk
  252. $this->write();
  253. }
  254. /**
  255. * Reads database from disk to memory
  256. */
  257. private function read()
  258. {
  259. // Public links are hidden and user not logged in => nothing to show
  260. if ($this->hidePublicLinks && !$this->loggedIn) {
  261. $this->links = array();
  262. return;
  263. }
  264. $this->urls = [];
  265. $this->ids = [];
  266. $this->links = FileUtils::readFlatDB($this->datastore, []);
  267. $toremove = array();
  268. foreach ($this->links as $key => &$link) {
  269. if (! $this->loggedIn && $link['private'] != 0) {
  270. // Transition for not upgraded databases.
  271. unset($this->links[$key]);
  272. continue;
  273. }
  274. // Sanitize data fields.
  275. sanitizeLink($link);
  276. // Remove private tags if the user is not logged in.
  277. if (! $this->loggedIn) {
  278. $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
  279. }
  280. // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
  281. if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
  282. $link['real_url'] = $this->redirector;
  283. if ($this->redirectorEncode) {
  284. $link['real_url'] .= urlencode(unescape($link['url']));
  285. } else {
  286. $link['real_url'] .= $link['url'];
  287. }
  288. } else {
  289. $link['real_url'] = $link['url'];
  290. }
  291. // To be able to load links before running the update, and prepare the update
  292. if (! isset($link['created'])) {
  293. $link['id'] = $link['linkdate'];
  294. $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
  295. if (! empty($link['updated'])) {
  296. $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
  297. }
  298. $link['shorturl'] = smallHash($link['linkdate']);
  299. }
  300. $this->urls[$link['url']] = $key;
  301. $this->ids[$link['id']] = $key;
  302. }
  303. }
  304. /**
  305. * Saves the database from memory to disk
  306. *
  307. * @throws IOException the datastore is not writable
  308. */
  309. private function write()
  310. {
  311. $this->reorder();
  312. FileUtils::writeFlatDB($this->datastore, $this->links);
  313. }
  314. /**
  315. * Saves the database from memory to disk
  316. *
  317. * @param string $pageCacheDir page cache directory
  318. */
  319. public function save($pageCacheDir)
  320. {
  321. if (!$this->loggedIn) {
  322. // TODO: raise an Exception instead
  323. die('You are not authorized to change the database.');
  324. }
  325. $this->write();
  326. invalidateCaches($pageCacheDir);
  327. }
  328. /**
  329. * Returns the link for a given URL, or False if it does not exist.
  330. *
  331. * @param string $url URL to search for
  332. *
  333. * @return mixed the existing link if it exists, else 'false'
  334. */
  335. public function getLinkFromUrl($url)
  336. {
  337. if (isset($this->urls[$url])) {
  338. return $this->links[$this->urls[$url]];
  339. }
  340. return false;
  341. }
  342. /**
  343. * Returns the shaare corresponding to a smallHash.
  344. *
  345. * @param string $request QUERY_STRING server parameter.
  346. *
  347. * @return array $filtered array containing permalink data.
  348. *
  349. * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
  350. */
  351. public function filterHash($request)
  352. {
  353. $request = substr($request, 0, 6);
  354. $linkFilter = new LinkFilter($this->links);
  355. return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
  356. }
  357. /**
  358. * Returns the list of articles for a given day.
  359. *
  360. * @param string $request day to filter. Format: YYYYMMDD.
  361. *
  362. * @return array list of shaare found.
  363. */
  364. public function filterDay($request)
  365. {
  366. $linkFilter = new LinkFilter($this->links);
  367. return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
  368. }
  369. /**
  370. * Filter links according to search parameters.
  371. *
  372. * @param array $filterRequest Search request content. Supported keys:
  373. * - searchtags: list of tags
  374. * - searchterm: term search
  375. * @param bool $casesensitive Optional: Perform case sensitive filter
  376. * @param string $visibility return only all/private/public links
  377. * @param string $untaggedonly return only untagged links
  378. *
  379. * @return array filtered links, all links if no suitable filter was provided.
  380. */
  381. public function filterSearch(
  382. $filterRequest = array(),
  383. $casesensitive = false,
  384. $visibility = 'all',
  385. $untaggedonly = false
  386. ) {
  387. // Filter link database according to parameters.
  388. $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
  389. $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
  390. // Search tags + fullsearch - blank string parameter will return all links.
  391. $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
  392. $request = [$searchtags, $searchterm];
  393. $linkFilter = new LinkFilter($this);
  394. return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
  395. }
  396. /**
  397. * Returns the list tags appearing in the links with the given tags
  398. *
  399. * @param array $filteringTags tags selecting the links to consider
  400. * @param string $visibility process only all/private/public links
  401. *
  402. * @return array tag => linksCount
  403. */
  404. public function linksCountPerTag($filteringTags = [], $visibility = 'all')
  405. {
  406. $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
  407. $tags = [];
  408. $caseMapping = [];
  409. foreach ($links as $link) {
  410. foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
  411. if (empty($tag)) {
  412. continue;
  413. }
  414. // The first case found will be displayed.
  415. if (!isset($caseMapping[strtolower($tag)])) {
  416. $caseMapping[strtolower($tag)] = $tag;
  417. $tags[$caseMapping[strtolower($tag)]] = 0;
  418. }
  419. $tags[$caseMapping[strtolower($tag)]]++;
  420. }
  421. }
  422. /*
  423. * Formerly used arsort(), which doesn't define the sort behaviour for equal values.
  424. * Also, this function doesn't produce the same result between PHP 5.6 and 7.
  425. *
  426. * So we now use array_multisort() to sort tags by DESC occurrences,
  427. * then ASC alphabetically for equal values.
  428. *
  429. * @see https://github.com/shaarli/Shaarli/issues/1142
  430. */
  431. $keys = array_keys($tags);
  432. $tmpTags = array_combine($keys, $keys);
  433. array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags);
  434. return $tags;
  435. }
  436. /**
  437. * Rename or delete a tag across all links.
  438. *
  439. * @param string $from Tag to rename
  440. * @param string $to New tag. If none is provided, the from tag will be deleted
  441. *
  442. * @return array|bool List of altered links or false on error
  443. */
  444. public function renameTag($from, $to)
  445. {
  446. if (empty($from)) {
  447. return false;
  448. }
  449. $delete = empty($to);
  450. // True for case-sensitive tag search.
  451. $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
  452. foreach ($linksToAlter as $key => &$value) {
  453. $tags = preg_split('/\s+/', trim($value['tags']));
  454. if (($pos = array_search($from, $tags)) !== false) {
  455. if ($delete) {
  456. unset($tags[$pos]); // Remove tag.
  457. } else {
  458. $tags[$pos] = trim($to);
  459. }
  460. $value['tags'] = trim(implode(' ', array_unique($tags)));
  461. $this[$value['id']] = $value;
  462. }
  463. }
  464. return $linksToAlter;
  465. }
  466. /**
  467. * Returns the list of days containing articles (oldest first)
  468. * Output: An array containing days (in format YYYYMMDD).
  469. */
  470. public function days()
  471. {
  472. $linkDays = array();
  473. foreach ($this->links as $link) {
  474. $linkDays[$link['created']->format('Ymd')] = 0;
  475. }
  476. $linkDays = array_keys($linkDays);
  477. sort($linkDays);
  478. return $linkDays;
  479. }
  480. /**
  481. * Reorder links by creation date (newest first).
  482. *
  483. * Also update the urls and ids mapping arrays.
  484. *
  485. * @param string $order ASC|DESC
  486. */
  487. public function reorder($order = 'DESC')
  488. {
  489. $order = $order === 'ASC' ? -1 : 1;
  490. // Reorder array by dates.
  491. usort($this->links, function ($a, $b) use ($order) {
  492. if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) {
  493. return $a['sticky'] ? -1 : 1;
  494. }
  495. return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
  496. });
  497. $this->urls = [];
  498. $this->ids = [];
  499. foreach ($this->links as $key => $link) {
  500. $this->urls[$link['url']] = $key;
  501. $this->ids[$link['id']] = $key;
  502. }
  503. }
  504. /**
  505. * Return the next key for link creation.
  506. * E.g. If the last ID is 597, the next will be 598.
  507. *
  508. * @return int next ID.
  509. */
  510. public function getNextId()
  511. {
  512. if (!empty($this->ids)) {
  513. return max(array_keys($this->ids)) + 1;
  514. }
  515. return 0;
  516. }
  517. /**
  518. * Returns a link offset in links array from its unique ID.
  519. *
  520. * @param int $id Persistent ID of a link.
  521. *
  522. * @return int Real offset in local array, or null if doesn't exist.
  523. */
  524. protected function getLinkOffset($id)
  525. {
  526. if (isset($this->ids[$id])) {
  527. return $this->ids[$id];
  528. }
  529. return null;
  530. }
  531. }