LinkDB.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. <?php
  2. /**
  3. * Data storage for links.
  4. *
  5. * This object behaves like an associative array.
  6. *
  7. * Example:
  8. * $myLinks = new LinkDB();
  9. * echo $myLinks[350]['title'];
  10. * foreach ($myLinks as $link)
  11. * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
  12. *
  13. * Available keys:
  14. * - id: primary key, incremental integer identifier (persistent)
  15. * - description: description of the entry
  16. * - created: creation date of this entry, DateTime object.
  17. * - updated: last modification date of this entry, DateTime object.
  18. * - private: Is this link private? 0=no, other value=yes
  19. * - tags: tags attached to this entry (separated by spaces)
  20. * - title Title of the link
  21. * - url URL of the link. Used for displayable links (no redirector, relative, etc.).
  22. * Can be absolute or relative.
  23. * Relative URLs are permalinks (e.g.'?m-ukcw')
  24. * - real_url Absolute processed URL.
  25. * - shorturl Permalink smallhash
  26. *
  27. * Implements 3 interfaces:
  28. * - ArrayAccess: behaves like an associative array;
  29. * - Countable: there is a count() method;
  30. * - Iterator: usable in foreach () loops.
  31. *
  32. * ID mechanism:
  33. * ArrayAccess is implemented in a way that will allow to access a link
  34. * with the unique identifier ID directly with $link[ID].
  35. * Note that it's not the real key of the link array attribute.
  36. * This mechanism is in place to have persistent link IDs,
  37. * even though the internal array is reordered by date.
  38. * Example:
  39. * - DB: link #1 (2010-01-01) link #2 (2016-01-01)
  40. * - Order: #2 #1
  41. * - Import links containing: link #3 (2013-01-01)
  42. * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
  43. * - Real order: #2 #3 #1
  44. */
  45. class LinkDB implements Iterator, Countable, ArrayAccess
  46. {
  47. // Links are stored as a PHP serialized string
  48. private $datastore;
  49. // Link date storage format
  50. const LINK_DATE_FORMAT = 'Ymd_His';
  51. // List of links (associative array)
  52. // - key: link date (e.g. "20110823_124546"),
  53. // - value: associative array (keys: title, description...)
  54. private $links;
  55. // List of all recorded URLs (key=url, value=link offset)
  56. // for fast reserve search (url-->link offset)
  57. private $urls;
  58. /**
  59. * @var array List of all links IDS mapped with their array offset.
  60. * Map: id->offset.
  61. */
  62. protected $ids;
  63. // List of offset keys (for the Iterator interface implementation)
  64. private $keys;
  65. // Position in the $this->keys array (for the Iterator interface)
  66. private $position;
  67. // Is the user logged in? (used to filter private links)
  68. private $loggedIn;
  69. // Hide public links
  70. private $hidePublicLinks;
  71. // link redirector set in user settings.
  72. private $redirector;
  73. /**
  74. * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
  75. *
  76. * Example:
  77. * anonym.to needs clean URL while dereferer.org needs urlencoded URL.
  78. *
  79. * @var boolean $redirectorEncode parameter: true or false
  80. */
  81. private $redirectorEncode;
  82. /**
  83. * Creates a new LinkDB
  84. *
  85. * Checks if the datastore exists; else, attempts to create a dummy one.
  86. *
  87. * @param string $datastore datastore file path.
  88. * @param boolean $isLoggedIn is the user logged in?
  89. * @param boolean $hidePublicLinks if true all links are private.
  90. * @param string $redirector link redirector set in user settings.
  91. * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
  92. */
  93. public function __construct(
  94. $datastore,
  95. $isLoggedIn,
  96. $hidePublicLinks,
  97. $redirector = '',
  98. $redirectorEncode = true
  99. )
  100. {
  101. $this->datastore = $datastore;
  102. $this->loggedIn = $isLoggedIn;
  103. $this->hidePublicLinks = $hidePublicLinks;
  104. $this->redirector = $redirector;
  105. $this->redirectorEncode = $redirectorEncode === true;
  106. $this->check();
  107. $this->read();
  108. }
  109. /**
  110. * Countable - Counts elements of an object
  111. */
  112. public function count()
  113. {
  114. return count($this->links);
  115. }
  116. /**
  117. * ArrayAccess - Assigns a value to the specified offset
  118. */
  119. public function offsetSet($offset, $value)
  120. {
  121. // TODO: use exceptions instead of "die"
  122. if (!$this->loggedIn) {
  123. die('You are not authorized to add a link.');
  124. }
  125. if (!isset($value['id']) || empty($value['url'])) {
  126. die('Internal Error: A link should always have an id and URL.');
  127. }
  128. if (($offset !== null && ! is_int($offset)) || ! is_int($value['id'])) {
  129. die('You must specify an integer as a key.');
  130. }
  131. if ($offset !== null && $offset !== $value['id']) {
  132. die('Array offset and link ID must be equal.');
  133. }
  134. // If the link exists, we reuse the real offset, otherwise new entry
  135. $existing = $this->getLinkOffset($offset);
  136. if ($existing !== null) {
  137. $offset = $existing;
  138. } else {
  139. $offset = count($this->links);
  140. }
  141. $this->links[$offset] = $value;
  142. $this->urls[$value['url']] = $offset;
  143. $this->ids[$value['id']] = $offset;
  144. }
  145. /**
  146. * ArrayAccess - Whether or not an offset exists
  147. */
  148. public function offsetExists($offset)
  149. {
  150. return array_key_exists($this->getLinkOffset($offset), $this->links);
  151. }
  152. /**
  153. * ArrayAccess - Unsets an offset
  154. */
  155. public function offsetUnset($offset)
  156. {
  157. if (!$this->loggedIn) {
  158. // TODO: raise an exception
  159. die('You are not authorized to delete a link.');
  160. }
  161. $realOffset = $this->getLinkOffset($offset);
  162. $url = $this->links[$realOffset]['url'];
  163. unset($this->urls[$url]);
  164. unset($this->ids[$realOffset]);
  165. unset($this->links[$realOffset]);
  166. }
  167. /**
  168. * ArrayAccess - Returns the value at specified offset
  169. */
  170. public function offsetGet($offset)
  171. {
  172. $realOffset = $this->getLinkOffset($offset);
  173. return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
  174. }
  175. /**
  176. * Iterator - Returns the current element
  177. */
  178. public function current()
  179. {
  180. return $this[$this->keys[$this->position]];
  181. }
  182. /**
  183. * Iterator - Returns the key of the current element
  184. */
  185. public function key()
  186. {
  187. return $this->keys[$this->position];
  188. }
  189. /**
  190. * Iterator - Moves forward to next element
  191. */
  192. public function next()
  193. {
  194. ++$this->position;
  195. }
  196. /**
  197. * Iterator - Rewinds the Iterator to the first element
  198. *
  199. * Entries are sorted by date (latest first)
  200. */
  201. public function rewind()
  202. {
  203. $this->keys = array_keys($this->ids);
  204. $this->position = 0;
  205. }
  206. /**
  207. * Iterator - Checks if current position is valid
  208. */
  209. public function valid()
  210. {
  211. return isset($this->keys[$this->position]);
  212. }
  213. /**
  214. * Checks if the DB directory and file exist
  215. *
  216. * If no DB file is found, creates a dummy DB.
  217. */
  218. private function check()
  219. {
  220. if (file_exists($this->datastore)) {
  221. return;
  222. }
  223. // Create a dummy database for example
  224. $this->links = array();
  225. $link = array(
  226. 'id' => 1,
  227. 'title'=>' Shaarli: the personal, minimalist, super-fast, no-database delicious clone',
  228. 'url'=>'https://shaarli.readthedocs.io',
  229. 'description'=>'Welcome to Shaarli! This is your first public bookmark. To edit or delete me, you must first login.
  230. To learn how to use Shaarli, consult the link "Help/documentation" at the bottom of this page.
  231. You use the community supported version of the original Shaarli project, by Sebastien Sauvage.',
  232. 'private'=>0,
  233. 'created'=> new DateTime(),
  234. 'tags'=>'opensource software'
  235. );
  236. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  237. $this->links[1] = $link;
  238. $link = array(
  239. 'id' => 0,
  240. 'title'=>'My secret stuff... - Pastebin.com',
  241. 'url'=>'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
  242. 'description'=>'Shhhh! I\'m a private link only YOU can see. You can delete me too.',
  243. 'private'=>1,
  244. 'created'=> new DateTime('1 minute ago'),
  245. 'tags'=>'secretstuff',
  246. );
  247. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  248. $this->links[0] = $link;
  249. // Write database to disk
  250. $this->write();
  251. }
  252. /**
  253. * Reads database from disk to memory
  254. */
  255. private function read()
  256. {
  257. // Public links are hidden and user not logged in => nothing to show
  258. if ($this->hidePublicLinks && !$this->loggedIn) {
  259. $this->links = array();
  260. return;
  261. }
  262. $this->links = FileUtils::readFlatDB($this->datastore, []);
  263. $toremove = array();
  264. foreach ($this->links as $key => &$link) {
  265. if (! $this->loggedIn && $link['private'] != 0) {
  266. // Transition for not upgraded databases.
  267. $toremove[] = $key;
  268. continue;
  269. }
  270. // Sanitize data fields.
  271. sanitizeLink($link);
  272. // Remove private tags if the user is not logged in.
  273. if (! $this->loggedIn) {
  274. $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
  275. }
  276. // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
  277. if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
  278. $link['real_url'] = $this->redirector;
  279. if ($this->redirectorEncode) {
  280. $link['real_url'] .= urlencode(unescape($link['url']));
  281. } else {
  282. $link['real_url'] .= $link['url'];
  283. }
  284. }
  285. else {
  286. $link['real_url'] = $link['url'];
  287. }
  288. // To be able to load links before running the update, and prepare the update
  289. if (! isset($link['created'])) {
  290. $link['id'] = $link['linkdate'];
  291. $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
  292. if (! empty($link['updated'])) {
  293. $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
  294. }
  295. $link['shorturl'] = smallHash($link['linkdate']);
  296. }
  297. }
  298. // If user is not logged in, filter private links.
  299. foreach ($toremove as $offset) {
  300. unset($this->links[$offset]);
  301. }
  302. $this->reorder();
  303. }
  304. /**
  305. * Saves the database from memory to disk
  306. *
  307. * @throws IOException the datastore is not writable
  308. */
  309. private function write()
  310. {
  311. FileUtils::writeFlatDB($this->datastore, $this->links);
  312. }
  313. /**
  314. * Saves the database from memory to disk
  315. *
  316. * @param string $pageCacheDir page cache directory
  317. */
  318. public function save($pageCacheDir)
  319. {
  320. if (!$this->loggedIn) {
  321. // TODO: raise an Exception instead
  322. die('You are not authorized to change the database.');
  323. }
  324. $this->write();
  325. invalidateCaches($pageCacheDir);
  326. }
  327. /**
  328. * Returns the link for a given URL, or False if it does not exist.
  329. *
  330. * @param string $url URL to search for
  331. *
  332. * @return mixed the existing link if it exists, else 'false'
  333. */
  334. public function getLinkFromUrl($url)
  335. {
  336. if (isset($this->urls[$url])) {
  337. return $this->links[$this->urls[$url]];
  338. }
  339. return false;
  340. }
  341. /**
  342. * Returns the shaare corresponding to a smallHash.
  343. *
  344. * @param string $request QUERY_STRING server parameter.
  345. *
  346. * @return array $filtered array containing permalink data.
  347. *
  348. * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
  349. */
  350. public function filterHash($request)
  351. {
  352. $request = substr($request, 0, 6);
  353. $linkFilter = new LinkFilter($this->links);
  354. return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
  355. }
  356. /**
  357. * Returns the list of articles for a given day.
  358. *
  359. * @param string $request day to filter. Format: YYYYMMDD.
  360. *
  361. * @return array list of shaare found.
  362. */
  363. public function filterDay($request) {
  364. $linkFilter = new LinkFilter($this->links);
  365. return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
  366. }
  367. /**
  368. * Filter links according to search parameters.
  369. *
  370. * @param array $filterRequest Search request content. Supported keys:
  371. * - searchtags: list of tags
  372. * - searchterm: term search
  373. * @param bool $casesensitive Optional: Perform case sensitive filter
  374. * @param string $visibility return only all/private/public links
  375. * @param string $untaggedonly return only untagged links
  376. *
  377. * @return array filtered links, all links if no suitable filter was provided.
  378. */
  379. public function filterSearch($filterRequest = array(), $casesensitive = false, $visibility = 'all', $untaggedonly = false)
  380. {
  381. // Filter link database according to parameters.
  382. $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
  383. $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
  384. // Search tags + fullsearch - blank string parameter will return all links.
  385. $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
  386. $request = [$searchtags, $searchterm];
  387. $linkFilter = new LinkFilter($this);
  388. return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
  389. }
  390. /**
  391. * Returns the list tags appearing in the links with the given tags
  392. * @param $filteringTags: tags selecting the links to consider
  393. * @param $visibility: process only all/private/public links
  394. * @return: a tag=>linksCount array
  395. */
  396. public function linksCountPerTag($filteringTags = [], $visibility = 'all')
  397. {
  398. $links = empty($filteringTags) ? $this->links : $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
  399. $tags = array();
  400. $caseMapping = array();
  401. foreach ($links as $link) {
  402. foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
  403. if (empty($tag)) {
  404. continue;
  405. }
  406. // The first case found will be displayed.
  407. if (!isset($caseMapping[strtolower($tag)])) {
  408. $caseMapping[strtolower($tag)] = $tag;
  409. $tags[$caseMapping[strtolower($tag)]] = 0;
  410. }
  411. $tags[$caseMapping[strtolower($tag)]]++;
  412. }
  413. }
  414. // Sort tags by usage (most used tag first)
  415. arsort($tags);
  416. return $tags;
  417. }
  418. /**
  419. * Rename or delete a tag across all links.
  420. *
  421. * @param string $from Tag to rename
  422. * @param string $to New tag. If none is provided, the from tag will be deleted
  423. *
  424. * @return array|bool List of altered links or false on error
  425. */
  426. public function renameTag($from, $to)
  427. {
  428. if (empty($from)) {
  429. return false;
  430. }
  431. $delete = empty($to);
  432. // True for case-sensitive tag search.
  433. $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
  434. foreach($linksToAlter as $key => &$value)
  435. {
  436. $tags = preg_split('/\s+/', trim($value['tags']));
  437. if (($pos = array_search($from, $tags)) !== false) {
  438. if ($delete) {
  439. unset($tags[$pos]); // Remove tag.
  440. } else {
  441. $tags[$pos] = trim($to);
  442. }
  443. $value['tags'] = trim(implode(' ', array_unique($tags)));
  444. $this[$value['id']] = $value;
  445. }
  446. }
  447. return $linksToAlter;
  448. }
  449. /**
  450. * Returns the list of days containing articles (oldest first)
  451. * Output: An array containing days (in format YYYYMMDD).
  452. */
  453. public function days()
  454. {
  455. $linkDays = array();
  456. foreach ($this->links as $link) {
  457. $linkDays[$link['created']->format('Ymd')] = 0;
  458. }
  459. $linkDays = array_keys($linkDays);
  460. sort($linkDays);
  461. return $linkDays;
  462. }
  463. /**
  464. * Reorder links by creation date (newest first).
  465. *
  466. * Also update the urls and ids mapping arrays.
  467. *
  468. * @param string $order ASC|DESC
  469. */
  470. public function reorder($order = 'DESC')
  471. {
  472. $order = $order === 'ASC' ? -1 : 1;
  473. // Reorder array by dates.
  474. usort($this->links, function($a, $b) use ($order) {
  475. return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
  476. });
  477. $this->urls = array();
  478. $this->ids = array();
  479. foreach ($this->links as $key => $link) {
  480. $this->urls[$link['url']] = $key;
  481. $this->ids[$link['id']] = $key;
  482. }
  483. }
  484. /**
  485. * Return the next key for link creation.
  486. * E.g. If the last ID is 597, the next will be 598.
  487. *
  488. * @return int next ID.
  489. */
  490. public function getNextId()
  491. {
  492. if (!empty($this->ids)) {
  493. return max(array_keys($this->ids)) + 1;
  494. }
  495. return 0;
  496. }
  497. /**
  498. * Returns a link offset in links array from its unique ID.
  499. *
  500. * @param int $id Persistent ID of a link.
  501. *
  502. * @return int Real offset in local array, or null if doesn't exist.
  503. */
  504. protected function getLinkOffset($id)
  505. {
  506. if (isset($this->ids[$id])) {
  507. return $this->ids[$id];
  508. }
  509. return null;
  510. }
  511. }