LinkDB.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. <?php
  2. /**
  3. * Data storage for links.
  4. *
  5. * This object behaves like an associative array.
  6. *
  7. * Example:
  8. * $myLinks = new LinkDB();
  9. * echo $myLinks[350]['title'];
  10. * foreach ($myLinks as $link)
  11. * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
  12. *
  13. * Available keys:
  14. * - id: primary key, incremental integer identifier (persistent)
  15. * - description: description of the entry
  16. * - created: creation date of this entry, DateTime object.
  17. * - updated: last modification date of this entry, DateTime object.
  18. * - private: Is this link private? 0=no, other value=yes
  19. * - tags: tags attached to this entry (separated by spaces)
  20. * - title Title of the link
  21. * - url URL of the link. Used for displayable links (no redirector, relative, etc.).
  22. * Can be absolute or relative.
  23. * Relative URLs are permalinks (e.g.'?m-ukcw')
  24. * - real_url Absolute processed URL.
  25. * - shorturl Permalink smallhash
  26. *
  27. * Implements 3 interfaces:
  28. * - ArrayAccess: behaves like an associative array;
  29. * - Countable: there is a count() method;
  30. * - Iterator: usable in foreach () loops.
  31. *
  32. * ID mechanism:
  33. * ArrayAccess is implemented in a way that will allow to access a link
  34. * with the unique identifier ID directly with $link[ID].
  35. * Note that it's not the real key of the link array attribute.
  36. * This mechanism is in place to have persistent link IDs,
  37. * even though the internal array is reordered by date.
  38. * Example:
  39. * - DB: link #1 (2010-01-01) link #2 (2016-01-01)
  40. * - Order: #2 #1
  41. * - Import links containing: link #3 (2013-01-01)
  42. * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
  43. * - Real order: #2 #3 #1
  44. */
  45. class LinkDB implements Iterator, Countable, ArrayAccess
  46. {
  47. // Links are stored as a PHP serialized string
  48. private $datastore;
  49. // Link date storage format
  50. const LINK_DATE_FORMAT = 'Ymd_His';
  51. // Datastore PHP prefix
  52. protected static $phpPrefix = '<?php /* ';
  53. // Datastore PHP suffix
  54. protected static $phpSuffix = ' */ ?>';
  55. // List of links (associative array)
  56. // - key: link date (e.g. "20110823_124546"),
  57. // - value: associative array (keys: title, description...)
  58. private $links;
  59. // List of all recorded URLs (key=url, value=link offset)
  60. // for fast reserve search (url-->link offset)
  61. private $urls;
  62. /**
  63. * @var array List of all links IDS mapped with their array offset.
  64. * Map: id->offset.
  65. */
  66. protected $ids;
  67. // List of offset keys (for the Iterator interface implementation)
  68. private $keys;
  69. // Position in the $this->keys array (for the Iterator interface)
  70. private $position;
  71. // Is the user logged in? (used to filter private links)
  72. private $loggedIn;
  73. // Hide public links
  74. private $hidePublicLinks;
  75. // link redirector set in user settings.
  76. private $redirector;
  77. /**
  78. * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
  79. *
  80. * Example:
  81. * anonym.to needs clean URL while dereferer.org needs urlencoded URL.
  82. *
  83. * @var boolean $redirectorEncode parameter: true or false
  84. */
  85. private $redirectorEncode;
  86. /**
  87. * Creates a new LinkDB
  88. *
  89. * Checks if the datastore exists; else, attempts to create a dummy one.
  90. *
  91. * @param string $datastore datastore file path.
  92. * @param boolean $isLoggedIn is the user logged in?
  93. * @param boolean $hidePublicLinks if true all links are private.
  94. * @param string $redirector link redirector set in user settings.
  95. * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
  96. */
  97. public function __construct(
  98. $datastore,
  99. $isLoggedIn,
  100. $hidePublicLinks,
  101. $redirector = '',
  102. $redirectorEncode = true
  103. )
  104. {
  105. $this->datastore = $datastore;
  106. $this->loggedIn = $isLoggedIn;
  107. $this->hidePublicLinks = $hidePublicLinks;
  108. $this->redirector = $redirector;
  109. $this->redirectorEncode = $redirectorEncode === true;
  110. $this->check();
  111. $this->read();
  112. }
  113. /**
  114. * Countable - Counts elements of an object
  115. */
  116. public function count()
  117. {
  118. return count($this->links);
  119. }
  120. /**
  121. * ArrayAccess - Assigns a value to the specified offset
  122. */
  123. public function offsetSet($offset, $value)
  124. {
  125. // TODO: use exceptions instead of "die"
  126. if (!$this->loggedIn) {
  127. die('You are not authorized to add a link.');
  128. }
  129. if (!isset($value['id']) || empty($value['url'])) {
  130. die('Internal Error: A link should always have an id and URL.');
  131. }
  132. if ((! empty($offset) && ! is_int($offset)) || ! is_int($value['id'])) {
  133. die('You must specify an integer as a key.');
  134. }
  135. if (! empty($offset) && $offset !== $value['id']) {
  136. die('Array offset and link ID must be equal.');
  137. }
  138. // If the link exists, we reuse the real offset, otherwise new entry
  139. $existing = $this->getLinkOffset($offset);
  140. if ($existing !== null) {
  141. $offset = $existing;
  142. } else {
  143. $offset = count($this->links);
  144. }
  145. $this->links[$offset] = $value;
  146. $this->urls[$value['url']] = $offset;
  147. $this->ids[$value['id']] = $offset;
  148. }
  149. /**
  150. * ArrayAccess - Whether or not an offset exists
  151. */
  152. public function offsetExists($offset)
  153. {
  154. return array_key_exists($this->getLinkOffset($offset), $this->links);
  155. }
  156. /**
  157. * ArrayAccess - Unsets an offset
  158. */
  159. public function offsetUnset($offset)
  160. {
  161. if (!$this->loggedIn) {
  162. // TODO: raise an exception
  163. die('You are not authorized to delete a link.');
  164. }
  165. $realOffset = $this->getLinkOffset($offset);
  166. $url = $this->links[$realOffset]['url'];
  167. unset($this->urls[$url]);
  168. unset($this->ids[$realOffset]);
  169. unset($this->links[$realOffset]);
  170. }
  171. /**
  172. * ArrayAccess - Returns the value at specified offset
  173. */
  174. public function offsetGet($offset)
  175. {
  176. $realOffset = $this->getLinkOffset($offset);
  177. return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
  178. }
  179. /**
  180. * Iterator - Returns the current element
  181. */
  182. public function current()
  183. {
  184. return $this[$this->keys[$this->position]];
  185. }
  186. /**
  187. * Iterator - Returns the key of the current element
  188. */
  189. public function key()
  190. {
  191. return $this->keys[$this->position];
  192. }
  193. /**
  194. * Iterator - Moves forward to next element
  195. */
  196. public function next()
  197. {
  198. ++$this->position;
  199. }
  200. /**
  201. * Iterator - Rewinds the Iterator to the first element
  202. *
  203. * Entries are sorted by date (latest first)
  204. */
  205. public function rewind()
  206. {
  207. $this->keys = array_keys($this->ids);
  208. $this->position = 0;
  209. }
  210. /**
  211. * Iterator - Checks if current position is valid
  212. */
  213. public function valid()
  214. {
  215. return isset($this->keys[$this->position]);
  216. }
  217. /**
  218. * Checks if the DB directory and file exist
  219. *
  220. * If no DB file is found, creates a dummy DB.
  221. */
  222. private function check()
  223. {
  224. if (file_exists($this->datastore)) {
  225. return;
  226. }
  227. // Create a dummy database for example
  228. $this->links = array();
  229. $link = array(
  230. 'id' => 1,
  231. 'title'=>' Shaarli: the personal, minimalist, super-fast, no-database delicious clone',
  232. 'url'=>'https://github.com/shaarli/Shaarli/wiki',
  233. 'description'=>'Welcome to Shaarli! This is your first public bookmark. To edit or delete me, you must first login.
  234. To learn how to use Shaarli, consult the link "Help/documentation" at the bottom of this page.
  235. You use the community supported version of the original Shaarli project, by Sebastien Sauvage.',
  236. 'private'=>0,
  237. 'created'=> new DateTime(),
  238. 'tags'=>'opensource software'
  239. );
  240. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  241. $this->links[1] = $link;
  242. $link = array(
  243. 'id' => 0,
  244. 'title'=>'My secret stuff... - Pastebin.com',
  245. 'url'=>'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
  246. 'description'=>'Shhhh! I\'m a private link only YOU can see. You can delete me too.',
  247. 'private'=>1,
  248. 'created'=> new DateTime('1 minute ago'),
  249. 'tags'=>'secretstuff',
  250. );
  251. $link['shorturl'] = link_small_hash($link['created'], $link['id']);
  252. $this->links[0] = $link;
  253. // Write database to disk
  254. $this->write();
  255. }
  256. /**
  257. * Reads database from disk to memory
  258. */
  259. private function read()
  260. {
  261. // Public links are hidden and user not logged in => nothing to show
  262. if ($this->hidePublicLinks && !$this->loggedIn) {
  263. $this->links = array();
  264. return;
  265. }
  266. // Read data
  267. // Note that gzinflate is faster than gzuncompress.
  268. // See: http://www.php.net/manual/en/function.gzdeflate.php#96439
  269. $this->links = array();
  270. if (file_exists($this->datastore)) {
  271. $this->links = unserialize(gzinflate(base64_decode(
  272. substr(file_get_contents($this->datastore),
  273. strlen(self::$phpPrefix), -strlen(self::$phpSuffix)))));
  274. }
  275. $toremove = array();
  276. foreach ($this->links as $key => &$link) {
  277. if (! $this->loggedIn && $link['private'] != 0) {
  278. // Transition for not upgraded databases.
  279. $toremove[] = $key;
  280. continue;
  281. }
  282. // Sanitize data fields.
  283. sanitizeLink($link);
  284. // Remove private tags if the user is not logged in.
  285. if (! $this->loggedIn) {
  286. $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
  287. }
  288. // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
  289. if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
  290. $link['real_url'] = $this->redirector;
  291. if ($this->redirectorEncode) {
  292. $link['real_url'] .= urlencode(unescape($link['url']));
  293. } else {
  294. $link['real_url'] .= $link['url'];
  295. }
  296. }
  297. else {
  298. $link['real_url'] = $link['url'];
  299. }
  300. // To be able to load links before running the update, and prepare the update
  301. if (! isset($link['created'])) {
  302. $link['id'] = $link['linkdate'];
  303. $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
  304. if (! empty($link['updated'])) {
  305. $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
  306. }
  307. $link['shorturl'] = smallHash($link['linkdate']);
  308. }
  309. }
  310. // If user is not logged in, filter private links.
  311. foreach ($toremove as $offset) {
  312. unset($this->links[$offset]);
  313. }
  314. $this->reorder();
  315. }
  316. /**
  317. * Saves the database from memory to disk
  318. *
  319. * @throws IOException the datastore is not writable
  320. */
  321. private function write()
  322. {
  323. if (is_file($this->datastore) && !is_writeable($this->datastore)) {
  324. // The datastore exists but is not writeable
  325. throw new IOException($this->datastore);
  326. } else if (!is_file($this->datastore) && !is_writeable(dirname($this->datastore))) {
  327. // The datastore does not exist and its parent directory is not writeable
  328. throw new IOException(dirname($this->datastore));
  329. }
  330. file_put_contents(
  331. $this->datastore,
  332. self::$phpPrefix.base64_encode(gzdeflate(serialize($this->links))).self::$phpSuffix
  333. );
  334. }
  335. /**
  336. * Saves the database from memory to disk
  337. *
  338. * @param string $pageCacheDir page cache directory
  339. */
  340. public function save($pageCacheDir)
  341. {
  342. if (!$this->loggedIn) {
  343. // TODO: raise an Exception instead
  344. die('You are not authorized to change the database.');
  345. }
  346. $this->write();
  347. invalidateCaches($pageCacheDir);
  348. }
  349. /**
  350. * Returns the link for a given URL, or False if it does not exist.
  351. *
  352. * @param string $url URL to search for
  353. *
  354. * @return mixed the existing link if it exists, else 'false'
  355. */
  356. public function getLinkFromUrl($url)
  357. {
  358. if (isset($this->urls[$url])) {
  359. return $this->links[$this->urls[$url]];
  360. }
  361. return false;
  362. }
  363. /**
  364. * Returns the shaare corresponding to a smallHash.
  365. *
  366. * @param string $request QUERY_STRING server parameter.
  367. *
  368. * @return array $filtered array containing permalink data.
  369. *
  370. * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
  371. */
  372. public function filterHash($request)
  373. {
  374. $request = substr($request, 0, 6);
  375. $linkFilter = new LinkFilter($this->links);
  376. return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
  377. }
  378. /**
  379. * Returns the list of articles for a given day.
  380. *
  381. * @param string $request day to filter. Format: YYYYMMDD.
  382. *
  383. * @return array list of shaare found.
  384. */
  385. public function filterDay($request) {
  386. $linkFilter = new LinkFilter($this->links);
  387. return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
  388. }
  389. /**
  390. * Filter links according to search parameters.
  391. *
  392. * @param array $filterRequest Search request content. Supported keys:
  393. * - searchtags: list of tags
  394. * - searchterm: term search
  395. * @param bool $casesensitive Optional: Perform case sensitive filter
  396. * @param bool $privateonly Optional: Returns private links only if true.
  397. *
  398. * @return array filtered links, all links if no suitable filter was provided.
  399. */
  400. public function filterSearch($filterRequest = array(), $casesensitive = false, $privateonly = false)
  401. {
  402. // Filter link database according to parameters.
  403. $searchtags = !empty($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
  404. $searchterm = !empty($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
  405. // Search tags + fullsearch.
  406. if (! empty($searchtags) && ! empty($searchterm)) {
  407. $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT;
  408. $request = array($searchtags, $searchterm);
  409. }
  410. // Search by tags.
  411. elseif (! empty($searchtags)) {
  412. $type = LinkFilter::$FILTER_TAG;
  413. $request = $searchtags;
  414. }
  415. // Fulltext search.
  416. elseif (! empty($searchterm)) {
  417. $type = LinkFilter::$FILTER_TEXT;
  418. $request = $searchterm;
  419. }
  420. // Otherwise, display without filtering.
  421. else {
  422. $type = '';
  423. $request = '';
  424. }
  425. $linkFilter = new LinkFilter($this);
  426. return $linkFilter->filter($type, $request, $casesensitive, $privateonly);
  427. }
  428. /**
  429. * Returns the list of all tags
  430. * Output: associative array key=tags, value=0
  431. */
  432. public function allTags()
  433. {
  434. $tags = array();
  435. $caseMapping = array();
  436. foreach ($this->links as $link) {
  437. foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
  438. if (empty($tag)) {
  439. continue;
  440. }
  441. // The first case found will be displayed.
  442. if (!isset($caseMapping[strtolower($tag)])) {
  443. $caseMapping[strtolower($tag)] = $tag;
  444. $tags[$caseMapping[strtolower($tag)]] = 0;
  445. }
  446. $tags[$caseMapping[strtolower($tag)]]++;
  447. }
  448. }
  449. // Sort tags by usage (most used tag first)
  450. arsort($tags);
  451. return $tags;
  452. }
  453. /**
  454. * Returns the list of days containing articles (oldest first)
  455. * Output: An array containing days (in format YYYYMMDD).
  456. */
  457. public function days()
  458. {
  459. $linkDays = array();
  460. foreach ($this->links as $link) {
  461. $linkDays[$link['created']->format('Ymd')] = 0;
  462. }
  463. $linkDays = array_keys($linkDays);
  464. sort($linkDays);
  465. return $linkDays;
  466. }
  467. /**
  468. * Reorder links by creation date (newest first).
  469. *
  470. * Also update the urls and ids mapping arrays.
  471. *
  472. * @param string $order ASC|DESC
  473. */
  474. public function reorder($order = 'DESC')
  475. {
  476. $order = $order === 'ASC' ? -1 : 1;
  477. // Reorder array by dates.
  478. usort($this->links, function($a, $b) use ($order) {
  479. return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
  480. });
  481. $this->urls = array();
  482. $this->ids = array();
  483. foreach ($this->links as $key => $link) {
  484. $this->urls[$link['url']] = $key;
  485. $this->ids[$link['id']] = $key;
  486. }
  487. }
  488. /**
  489. * Return the next key for link creation.
  490. * E.g. If the last ID is 597, the next will be 598.
  491. *
  492. * @return int next ID.
  493. */
  494. public function getNextId()
  495. {
  496. if (!empty($this->ids)) {
  497. return max(array_keys($this->ids)) + 1;
  498. }
  499. return 0;
  500. }
  501. /**
  502. * Returns a link offset in links array from its unique ID.
  503. *
  504. * @param int $id Persistent ID of a link.
  505. *
  506. * @return int Real offset in local array, or null if doesn't exist.
  507. */
  508. protected function getLinkOffset($id)
  509. {
  510. if (isset($this->ids[$id])) {
  511. return $this->ids[$id];
  512. }
  513. return null;
  514. }
  515. }