markdown.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. <?php
  2. /**
  3. * Plugin Markdown.
  4. *
  5. * Shaare's descriptions are parsed with Markdown.
  6. */
  7. use Shaarli\Config\ConfigManager;
  8. /*
  9. * If this tag is used on a shaare, the description won't be processed by Parsedown.
  10. */
  11. define('NO_MD_TAG', 'nomarkdown');
  12. /**
  13. * Parse linklist descriptions.
  14. *
  15. * @param array $data linklist data.
  16. * @param ConfigManager $conf instance.
  17. *
  18. * @return mixed linklist data parsed in markdown (and converted to HTML).
  19. */
  20. function hook_markdown_render_linklist($data, $conf)
  21. {
  22. foreach ($data['links'] as &$value) {
  23. if (!empty($value['tags']) && noMarkdownTag($value['tags'])) {
  24. $value = stripNoMarkdownTag($value);
  25. continue;
  26. }
  27. $value['description_src'] = $value['description'];
  28. $value['description'] = process_markdown(
  29. $value['description'],
  30. $conf->get('security.markdown_escape', true),
  31. $conf->get('security.allowed_protocols')
  32. );
  33. }
  34. return $data;
  35. }
  36. /**
  37. * Parse feed linklist descriptions.
  38. *
  39. * @param array $data linklist data.
  40. * @param ConfigManager $conf instance.
  41. *
  42. * @return mixed linklist data parsed in markdown (and converted to HTML).
  43. */
  44. function hook_markdown_render_feed($data, $conf)
  45. {
  46. foreach ($data['links'] as &$value) {
  47. if (!empty($value['tags']) && noMarkdownTag($value['tags'])) {
  48. $value = stripNoMarkdownTag($value);
  49. continue;
  50. }
  51. $value['description'] = reverse_feed_permalink($value['description']);
  52. $value['description'] = process_markdown(
  53. $value['description'],
  54. $conf->get('security.markdown_escape', true),
  55. $conf->get('security.allowed_protocols')
  56. );
  57. }
  58. return $data;
  59. }
  60. /**
  61. * Parse daily descriptions.
  62. *
  63. * @param array $data daily data.
  64. * @param ConfigManager $conf instance.
  65. *
  66. * @return mixed daily data parsed in markdown (and converted to HTML).
  67. */
  68. function hook_markdown_render_daily($data, $conf)
  69. {
  70. //var_dump($data);die;
  71. // Manipulate columns data
  72. foreach ($data['linksToDisplay'] as &$value) {
  73. if (!empty($value['tags']) && noMarkdownTag($value['tags'])) {
  74. $value = stripNoMarkdownTag($value);
  75. continue;
  76. }
  77. $value['formatedDescription'] = process_markdown(
  78. $value['formatedDescription'],
  79. $conf->get('security.markdown_escape', true),
  80. $conf->get('security.allowed_protocols')
  81. );
  82. }
  83. return $data;
  84. }
  85. /**
  86. * Check if noMarkdown is set in tags.
  87. *
  88. * @param string $tags tag list
  89. *
  90. * @return bool true if markdown should be disabled on this link.
  91. */
  92. function noMarkdownTag($tags)
  93. {
  94. return preg_match('/(^|\s)'. NO_MD_TAG .'(\s|$)/', $tags);
  95. }
  96. /**
  97. * Remove the no-markdown meta tag so it won't be displayed.
  98. *
  99. * @param array $link Link data.
  100. *
  101. * @return array Updated link without no markdown tag.
  102. */
  103. function stripNoMarkdownTag($link)
  104. {
  105. if (! empty($link['taglist'])) {
  106. $offset = array_search(NO_MD_TAG, $link['taglist']);
  107. if ($offset !== false) {
  108. unset($link['taglist'][$offset]);
  109. }
  110. }
  111. if (!empty($link['tags'])) {
  112. str_replace(NO_MD_TAG, '', $link['tags']);
  113. }
  114. return $link;
  115. }
  116. /**
  117. * When link list is displayed, include markdown CSS.
  118. *
  119. * @param array $data includes data.
  120. *
  121. * @return mixed - includes data with markdown CSS file added.
  122. */
  123. function hook_markdown_render_includes($data)
  124. {
  125. if ($data['_PAGE_'] == Router::$PAGE_LINKLIST
  126. || $data['_PAGE_'] == Router::$PAGE_DAILY
  127. || $data['_PAGE_'] == Router::$PAGE_EDITLINK
  128. ) {
  129. $data['css_files'][] = PluginManager::$PLUGINS_PATH . '/markdown/markdown.css';
  130. }
  131. return $data;
  132. }
  133. /**
  134. * Hook render_editlink.
  135. * Adds an help link to markdown syntax.
  136. *
  137. * @param array $data data passed to plugin
  138. *
  139. * @return array altered $data.
  140. */
  141. function hook_markdown_render_editlink($data)
  142. {
  143. // Load help HTML into a string
  144. $txt = file_get_contents(PluginManager::$PLUGINS_PATH .'/markdown/help.html');
  145. $translations = [
  146. t('Description will be rendered with'),
  147. t('Markdown syntax documentation'),
  148. t('Markdown syntax'),
  149. ];
  150. $data['edit_link_plugin'][] = vsprintf($txt, $translations);
  151. // Add no markdown 'meta-tag' in tag list if it was never used, for autocompletion.
  152. if (! in_array(NO_MD_TAG, $data['tags'])) {
  153. $data['tags'][NO_MD_TAG] = 0;
  154. }
  155. return $data;
  156. }
  157. /**
  158. * Remove HTML links auto generated by Shaarli core system.
  159. * Keeps HREF attributes.
  160. *
  161. * @param string $description input description text.
  162. *
  163. * @return string $description without HTML links.
  164. */
  165. function reverse_text2clickable($description)
  166. {
  167. $descriptionLines = explode(PHP_EOL, $description);
  168. $descriptionOut = '';
  169. $codeBlockOn = false;
  170. $lineCount = 0;
  171. foreach ($descriptionLines as $descriptionLine) {
  172. // Detect line of code: starting with 4 spaces,
  173. // except lists which can start with +/*/- or `2.` after spaces.
  174. $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
  175. // Detect and toggle block of code
  176. if (!$codeBlockOn) {
  177. $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
  178. } elseif (preg_match('/^```/', $descriptionLine) > 0) {
  179. $codeBlockOn = false;
  180. }
  181. $hashtagTitle = ' title="Hashtag [^"]+"';
  182. // Reverse `inline code` hashtags.
  183. $descriptionLine = preg_replace(
  184. '!(`[^`\n]*)<a href="[^ ]*"'. $hashtagTitle .'>([^<]+)</a>([^`\n]*`)!m',
  185. '$1$2$3',
  186. $descriptionLine
  187. );
  188. // Reverse all links in code blocks, only non hashtag elsewhere.
  189. $hashtagFilter = (!$codeBlockOn && !$codeLineOn) ? '(?!'. $hashtagTitle .')': '(?:'. $hashtagTitle .')?';
  190. $descriptionLine = preg_replace(
  191. '#<a href="[^ ]*"'. $hashtagFilter .'>([^<]+)</a>#m',
  192. '$1',
  193. $descriptionLine
  194. );
  195. // Make hashtag links markdown ready, otherwise the links will be ignored with escape set to true
  196. if (!$codeBlockOn && !$codeLineOn) {
  197. $descriptionLine = preg_replace(
  198. '#<a href="([^ ]*)"'. $hashtagTitle .'>([^<]+)</a>#m',
  199. '[$2]($1)',
  200. $descriptionLine
  201. );
  202. }
  203. $descriptionOut .= $descriptionLine;
  204. if ($lineCount++ < count($descriptionLines) - 1) {
  205. $descriptionOut .= PHP_EOL;
  206. }
  207. }
  208. return $descriptionOut;
  209. }
  210. /**
  211. * Remove <br> tag to let markdown handle it.
  212. *
  213. * @param string $description input description text.
  214. *
  215. * @return string $description without <br> tags.
  216. */
  217. function reverse_nl2br($description)
  218. {
  219. return preg_replace('!<br */?>!im', '', $description);
  220. }
  221. /**
  222. * Remove HTML spaces '&nbsp;' auto generated by Shaarli core system.
  223. *
  224. * @param string $description input description text.
  225. *
  226. * @return string $description without HTML links.
  227. */
  228. function reverse_space2nbsp($description)
  229. {
  230. return preg_replace('/(^| )&nbsp;/m', '$1 ', $description);
  231. }
  232. function reverse_feed_permalink($description)
  233. {
  234. return preg_replace('@&#8212; <a href="([^"]+)" title="[^"]+">(\w+)</a>$@im', '&#8212; [$2]($1)', $description);
  235. }
  236. /**
  237. * Replace not whitelisted protocols with http:// in given description.
  238. *
  239. * @param string $description input description text.
  240. * @param array $allowedProtocols list of allowed protocols.
  241. *
  242. * @return string $description without malicious link.
  243. */
  244. function filter_protocols($description, $allowedProtocols)
  245. {
  246. return preg_replace_callback(
  247. '#]\((.*?)\)#is',
  248. function ($match) use ($allowedProtocols) {
  249. return ']('. whitelist_protocols($match[1], $allowedProtocols) .')';
  250. },
  251. $description
  252. );
  253. }
  254. /**
  255. * Remove dangerous HTML tags (tags, iframe, etc.).
  256. * Doesn't affect <code> content (already escaped by Parsedown).
  257. *
  258. * @param string $description input description text.
  259. *
  260. * @return string given string escaped.
  261. */
  262. function sanitize_html($description)
  263. {
  264. $escapeTags = array(
  265. 'script',
  266. 'style',
  267. 'link',
  268. 'iframe',
  269. 'frameset',
  270. 'frame',
  271. );
  272. foreach ($escapeTags as $tag) {
  273. $description = preg_replace_callback(
  274. '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
  275. function ($match) {
  276. return escape($match[0]);
  277. },
  278. $description
  279. );
  280. }
  281. $description = preg_replace(
  282. '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
  283. '$1',
  284. $description
  285. );
  286. return $description;
  287. }
  288. /**
  289. * Render shaare contents through Markdown parser.
  290. * 1. Remove HTML generated by Shaarli core.
  291. * 2. Reverse the escape function.
  292. * 3. Generate markdown descriptions.
  293. * 4. Sanitize sensible HTML tags for security.
  294. * 5. Wrap description in 'markdown' CSS class.
  295. *
  296. * @param string $description input description text.
  297. * @param bool $escape escape HTML entities
  298. *
  299. * @return string HTML processed $description.
  300. */
  301. function process_markdown($description, $escape = true, $allowedProtocols = [])
  302. {
  303. $parsedown = new Parsedown();
  304. $processedDescription = $description;
  305. $processedDescription = reverse_nl2br($processedDescription);
  306. $processedDescription = reverse_space2nbsp($processedDescription);
  307. $processedDescription = reverse_text2clickable($processedDescription);
  308. $processedDescription = filter_protocols($processedDescription, $allowedProtocols);
  309. $processedDescription = unescape($processedDescription);
  310. $processedDescription = $parsedown
  311. ->setMarkupEscaped($escape)
  312. ->setBreaksEnabled(true)
  313. ->text($processedDescription);
  314. $processedDescription = sanitize_html($processedDescription);
  315. if (!empty($processedDescription)) {
  316. $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
  317. }
  318. return $processedDescription;
  319. }
  320. /**
  321. * This function is never called, but contains translation calls for GNU gettext extraction.
  322. */
  323. function markdown_dummy_translation()
  324. {
  325. // meta
  326. t('Render shaare description with Markdown syntax.<br><strong>Warning</strong>:
  327. If your shaared descriptions contained HTML tags before enabling the markdown plugin,
  328. enabling it might break your page.
  329. See the <a href="https://github.com/shaarli/Shaarli/tree/master/plugins/markdown#html-rendering">README</a>.');
  330. }