markdown.php 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. <?php
  2. /**
  3. * Plugin Markdown.
  4. *
  5. * Shaare's descriptions are parsed with Markdown.
  6. */
  7. /*
  8. * If this tag is used on a shaare, the description won't be processed by Parsedown.
  9. */
  10. define('NO_MD_TAG', 'nomarkdown');
  11. /**
  12. * Parse linklist descriptions.
  13. *
  14. * @param array $data linklist data.
  15. * @param ConfigManager $conf instance.
  16. *
  17. * @return mixed linklist data parsed in markdown (and converted to HTML).
  18. */
  19. function hook_markdown_render_linklist($data, $conf)
  20. {
  21. foreach ($data['links'] as &$value) {
  22. if (!empty($value['tags']) && noMarkdownTag($value['tags'])) {
  23. $value = stripNoMarkdownTag($value);
  24. continue;
  25. }
  26. $value['description'] = process_markdown(
  27. $value['description'],
  28. $conf->get('security.markdown_escape', true),
  29. $conf->get('security.allowed_protocols')
  30. );
  31. }
  32. return $data;
  33. }
  34. /**
  35. * Parse feed linklist descriptions.
  36. *
  37. * @param array $data linklist data.
  38. * @param ConfigManager $conf instance.
  39. *
  40. * @return mixed linklist data parsed in markdown (and converted to HTML).
  41. */
  42. function hook_markdown_render_feed($data, $conf)
  43. {
  44. foreach ($data['links'] as &$value) {
  45. if (!empty($value['tags']) && noMarkdownTag($value['tags'])) {
  46. $value = stripNoMarkdownTag($value);
  47. continue;
  48. }
  49. $value['description'] = process_markdown(
  50. $value['description'],
  51. $conf->get('security.markdown_escape', true),
  52. $conf->get('security.allowed_protocols')
  53. );
  54. }
  55. return $data;
  56. }
  57. /**
  58. * Parse daily descriptions.
  59. *
  60. * @param array $data daily data.
  61. * @param ConfigManager $conf instance.
  62. *
  63. * @return mixed daily data parsed in markdown (and converted to HTML).
  64. */
  65. function hook_markdown_render_daily($data, $conf)
  66. {
  67. // Manipulate columns data
  68. foreach ($data['cols'] as &$value) {
  69. foreach ($value as &$value2) {
  70. if (!empty($value2['tags']) && noMarkdownTag($value2['tags'])) {
  71. $value2 = stripNoMarkdownTag($value2);
  72. continue;
  73. }
  74. $value2['formatedDescription'] = process_markdown(
  75. $value2['formatedDescription'],
  76. $conf->get('security.markdown_escape', true),
  77. $conf->get('security.allowed_protocols')
  78. );
  79. }
  80. }
  81. return $data;
  82. }
  83. /**
  84. * Check if noMarkdown is set in tags.
  85. *
  86. * @param string $tags tag list
  87. *
  88. * @return bool true if markdown should be disabled on this link.
  89. */
  90. function noMarkdownTag($tags)
  91. {
  92. return preg_match('/(^|\s)'. NO_MD_TAG .'(\s|$)/', $tags);
  93. }
  94. /**
  95. * Remove the no-markdown meta tag so it won't be displayed.
  96. *
  97. * @param array $link Link data.
  98. *
  99. * @return array Updated link without no markdown tag.
  100. */
  101. function stripNoMarkdownTag($link)
  102. {
  103. if (! empty($link['taglist'])) {
  104. $offset = array_search(NO_MD_TAG, $link['taglist']);
  105. if ($offset !== false) {
  106. unset($link['taglist'][$offset]);
  107. }
  108. }
  109. if (!empty($link['tags'])) {
  110. str_replace(NO_MD_TAG, '', $link['tags']);
  111. }
  112. return $link;
  113. }
  114. /**
  115. * When link list is displayed, include markdown CSS.
  116. *
  117. * @param array $data includes data.
  118. *
  119. * @return mixed - includes data with markdown CSS file added.
  120. */
  121. function hook_markdown_render_includes($data)
  122. {
  123. if ($data['_PAGE_'] == Router::$PAGE_LINKLIST
  124. || $data['_PAGE_'] == Router::$PAGE_DAILY
  125. || $data['_PAGE_'] == Router::$PAGE_EDITLINK
  126. ) {
  127. $data['css_files'][] = PluginManager::$PLUGINS_PATH . '/markdown/markdown.css';
  128. }
  129. return $data;
  130. }
  131. /**
  132. * Hook render_editlink.
  133. * Adds an help link to markdown syntax.
  134. *
  135. * @param array $data data passed to plugin
  136. *
  137. * @return array altered $data.
  138. */
  139. function hook_markdown_render_editlink($data)
  140. {
  141. // Load help HTML into a string
  142. $txt = file_get_contents(PluginManager::$PLUGINS_PATH .'/markdown/help.html');
  143. $translations = [
  144. t('Description will be rendered with'),
  145. t('Markdown syntax documentation'),
  146. t('Markdown syntax'),
  147. ];
  148. $data['edit_link_plugin'][] = vsprintf($txt, $translations);
  149. // Add no markdown 'meta-tag' in tag list if it was never used, for autocompletion.
  150. if (! in_array(NO_MD_TAG, $data['tags'])) {
  151. $data['tags'][NO_MD_TAG] = 0;
  152. }
  153. return $data;
  154. }
  155. /**
  156. * Remove HTML links auto generated by Shaarli core system.
  157. * Keeps HREF attributes.
  158. *
  159. * @param string $description input description text.
  160. *
  161. * @return string $description without HTML links.
  162. */
  163. function reverse_text2clickable($description)
  164. {
  165. $descriptionLines = explode(PHP_EOL, $description);
  166. $descriptionOut = '';
  167. $codeBlockOn = false;
  168. $lineCount = 0;
  169. foreach ($descriptionLines as $descriptionLine) {
  170. // Detect line of code: starting with 4 spaces,
  171. // except lists which can start with +/*/- or `2.` after spaces.
  172. $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
  173. // Detect and toggle block of code
  174. if (!$codeBlockOn) {
  175. $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
  176. }
  177. elseif (preg_match('/^```/', $descriptionLine) > 0) {
  178. $codeBlockOn = false;
  179. }
  180. $hashtagTitle = ' title="Hashtag [^"]+"';
  181. // Reverse `inline code` hashtags.
  182. $descriptionLine = preg_replace(
  183. '!(`[^`\n]*)<a href="[^ ]*"'. $hashtagTitle .'>([^<]+)</a>([^`\n]*`)!m',
  184. '$1$2$3',
  185. $descriptionLine
  186. );
  187. // Reverse all links in code blocks, only non hashtag elsewhere.
  188. $hashtagFilter = (!$codeBlockOn && !$codeLineOn) ? '(?!'. $hashtagTitle .')': '(?:'. $hashtagTitle .')?';
  189. $descriptionLine = preg_replace(
  190. '#<a href="[^ ]*"'. $hashtagFilter .'>([^<]+)</a>#m',
  191. '$1',
  192. $descriptionLine
  193. );
  194. $descriptionOut .= $descriptionLine;
  195. if ($lineCount++ < count($descriptionLines) - 1) {
  196. $descriptionOut .= PHP_EOL;
  197. }
  198. }
  199. return $descriptionOut;
  200. }
  201. /**
  202. * Remove <br> tag to let markdown handle it.
  203. *
  204. * @param string $description input description text.
  205. *
  206. * @return string $description without <br> tags.
  207. */
  208. function reverse_nl2br($description)
  209. {
  210. return preg_replace('!<br */?>!im', '', $description);
  211. }
  212. /**
  213. * Remove HTML spaces '&nbsp;' auto generated by Shaarli core system.
  214. *
  215. * @param string $description input description text.
  216. *
  217. * @return string $description without HTML links.
  218. */
  219. function reverse_space2nbsp($description)
  220. {
  221. return preg_replace('/(^| )&nbsp;/m', '$1 ', $description);
  222. }
  223. /**
  224. * Replace not whitelisted protocols with http:// in given description.
  225. *
  226. * @param string $description input description text.
  227. * @param array $allowedProtocols list of allowed protocols.
  228. *
  229. * @return string $description without malicious link.
  230. */
  231. function filter_protocols($description, $allowedProtocols)
  232. {
  233. return preg_replace_callback(
  234. '#]\((.*?)\)#is',
  235. function ($match) use ($allowedProtocols) {
  236. return ']('. whitelist_protocols($match[1], $allowedProtocols) .')';
  237. },
  238. $description
  239. );
  240. }
  241. /**
  242. * Remove dangerous HTML tags (tags, iframe, etc.).
  243. * Doesn't affect <code> content (already escaped by Parsedown).
  244. *
  245. * @param string $description input description text.
  246. *
  247. * @return string given string escaped.
  248. */
  249. function sanitize_html($description)
  250. {
  251. $escapeTags = array(
  252. 'script',
  253. 'style',
  254. 'link',
  255. 'iframe',
  256. 'frameset',
  257. 'frame',
  258. );
  259. foreach ($escapeTags as $tag) {
  260. $description = preg_replace_callback(
  261. '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
  262. function ($match) { return escape($match[0]); },
  263. $description);
  264. }
  265. $description = preg_replace(
  266. '#(<[^>]+)on[a-z]*="?[^ "]*"?#is',
  267. '$1',
  268. $description);
  269. return $description;
  270. }
  271. /**
  272. * Render shaare contents through Markdown parser.
  273. * 1. Remove HTML generated by Shaarli core.
  274. * 2. Reverse the escape function.
  275. * 3. Generate markdown descriptions.
  276. * 4. Sanitize sensible HTML tags for security.
  277. * 5. Wrap description in 'markdown' CSS class.
  278. *
  279. * @param string $description input description text.
  280. * @param bool $escape escape HTML entities
  281. *
  282. * @return string HTML processed $description.
  283. */
  284. function process_markdown($description, $escape = true, $allowedProtocols = [])
  285. {
  286. $parsedown = new Parsedown();
  287. $processedDescription = $description;
  288. $processedDescription = reverse_nl2br($processedDescription);
  289. $processedDescription = reverse_space2nbsp($processedDescription);
  290. $processedDescription = reverse_text2clickable($processedDescription);
  291. $processedDescription = filter_protocols($processedDescription, $allowedProtocols);
  292. $processedDescription = unescape($processedDescription);
  293. $processedDescription = $parsedown
  294. ->setMarkupEscaped($escape)
  295. ->setBreaksEnabled(true)
  296. ->text($processedDescription);
  297. $processedDescription = sanitize_html($processedDescription);
  298. if(!empty($processedDescription)){
  299. $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
  300. }
  301. return $processedDescription;
  302. }
  303. /**
  304. * This function is never called, but contains translation calls for GNU gettext extraction.
  305. */
  306. function markdown_dummy_translation()
  307. {
  308. // meta
  309. t('Render shaare description with Markdown syntax.<br><strong>Warning</strong>:
  310. If your shaared descriptions contained HTML tags before enabling the markdown plugin,
  311. enabling it might break your page.
  312. See the <a href="https://github.com/shaarli/Shaarli/tree/master/plugins/markdown#html-rendering">README</a>.');
  313. }