MDL-50268 core: proper deprecation of get_file_url() method
[moodle.git] / lib / wiki_to_markdown.php
CommitLineData
90bf11b2 1<?php
90bf11b2 2
b868d3d9 3// This file is part of Moodle - http://moodle.org/
4//
5// Moodle is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// Moodle is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17
18/**
19 * Utility function to convert wiki-like to Markdown format
20 *
78bfb562
PS
21 * @package core
22 * @subpackage lib
23 * @copyright Howard Miller, 2005
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
b868d3d9 25 */
26
78bfb562
PS
27defined('MOODLE_INTERNAL') || die();
28
b868d3d9 29/**#@+
117bd748 30 * state defines
b868d3d9 31 */
90bf11b2 32define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para
33define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph
34define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section
35define( "STATE_PREFORM",4 ); // currently processing preformatted text
36define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting
b868d3d9 37/**#@-*/
38/**#@+
39 * list defines
40 */
90bf11b2 41define( "LIST_NONE", 1 ); // no lists active
42define( "LIST_UNORDERED", 2 ); // unordered list active
43define( "LIST_ORDERED", 3 ); // ordered list active
44define( "LIST_DEFINITION", 4 ); // definition list active
b868d3d9 45/**#@-*/
90bf11b2 46
b868d3d9 47/**
48 * @package moodlecore
49 * @copyright Howard Miller, 2005
50 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
51 */
90bf11b2 52class WikiToMarkdown {
53
54 var $block_state;
55 var $list_state;
56 var $list_depth;
90bf11b2 57 var $list_backtrack;
58 var $output; // output buffer
59 var $courseid;
60
61 function close_block( $state ) {
62 // provide appropriate closure for block according to state
117bd748 63
90bf11b2 64 // if in list close this first
65 $lclose = "";
66 if ($this->list_state != LIST_NONE) {
67 $lclose = $this->do_list( " ",true );
68 }
d16abbbd 69
90bf11b2 70 $sclose = "";
71 switch ($state) {
72 case STATE_PARAGRAPH:
92f72f47 73 $sclose = "\n";
90bf11b2 74 break;
75 case STATE_BLOCKQUOTE:
92f72f47 76 $sclose = "\n";
90bf11b2 77 break;
78 case STATE_PREFORM:
79 $sclose = "</pre>\n";
80 break;
81 case STATE_NOTIKI:
92f72f47 82 $sclose = "\n";
117bd748 83 break;
90bf11b2 84 }
85
86 return $lclose . $sclose;
87 }
88
89 function do_replace( $line, $mark, $tag ) {
90 // do the regex thingy for things like bold, italic etc
91 // $mark is the magic character, and $tag the HTML tag to insert
92
93 // BODGE: replace inline $mark characters in places where we want them ignored
94 // they will be put back after main substitutue, stops problems with eg, and/or
95 $bodge = chr(1);
c78a948e 96 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
90bf11b2 97
c78a948e 98 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
90bf11b2 99 $replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3';
c78a948e 100 $line = preg_replace( $regex, $replace, $line );
90bf11b2 101
102 // BODGE: back we go
c78a948e 103 $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
90bf11b2 104
105 return $line;
106 }
107
108
92f72f47 109 function do_replace_markdown( $line, $mark, $tag ) {
110 // do the regex thingy for things like bold, italic etc
111 // $mark is the magic character, and $tag the HTML tag to insert
112 // MARKDOWN version does not generate HTML tags, just straigt replace
113
114 // BODGE: replace inline $mark characters in places where we want them ignored
115 // they will be put back after main substitutue, stops problems with eg, and/or
116 $bodge = chr(1);
c78a948e 117 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
92f72f47 118
c78a948e 119 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
92f72f47 120 $replace = '\\1'.$tag.'\\2'.$tag.'\\3';
c78a948e 121 $line = preg_replace( $regex, $replace, $line );
92f72f47 122
123 // BODGE: back we go
c78a948e 124 $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
92f72f47 125
126 return $line;
127 }
128
90bf11b2 129
130 function do_replace_sub( $line, $mark, $tag ) {
131 // do regex for subscript and superscript (slightly different)
132 // $mark is the magic character and $tag the HTML tag to insert
133
c78a948e 134 $regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i';
90bf11b2 135 $replace = '<'.$tag.'>\\1</'.$tag.'>';
136
c78a948e 137 return preg_replace( $regex, $replace, $line );
90bf11b2 138 }
139
140 function do_list( $line, $blank=false ) {
141 // handle line with list character on it
142 // if blank line implies drop to level 0
117bd748 143
90bf11b2 144 // get magic character and then delete it from the line if not blank
145 if ($blank) {
146 $listchar="";
147 $count = 0;
148 }
149 else {
d16abbbd 150 $listchar = $line{0};
90bf11b2 151 $count = strspn( $line, $listchar );
c78a948e 152 $line = preg_replace( "/^[".$listchar."]+ /i", "", $line );
90bf11b2 153 }
117bd748 154
90bf11b2 155 // find what sort of list this character represents
156 $list_tag = "";
b323d8f8 157 $list_close_tag = "";
90bf11b2 158 $item_tag = "";
b323d8f8 159 $item_close_tag = "";
90bf11b2 160 $list_style = LIST_NONE;
161 switch ($listchar) {
162 case '*':
163 $list_tag = "";
b323d8f8 164 $list_close_tag = "";
90bf11b2 165 $item_tag = "*";
b323d8f8 166 $item_close_tag = "";
90bf11b2 167 $list_style = LIST_UNORDERED;
168 break;
169 case '#':
170 $list_tag = "";
b323d8f8 171 $list_close_tag = "";
90bf11b2 172 $item_tag = "1.";
b323d8f8 173 $item_close_tag = "";
90bf11b2 174 $list_style = LIST_ORDERED;
175 break;
176 case ';':
b323d8f8 177 $list_tag = "<dl>";
178 $list_close_tag = "</dl>";
179 $item_tag = "<dd>";
180 $item_close_tag = "</dd>";
90bf11b2 181 $list_style = LIST_DEFINITION;
182 break;
183 case ':':
b323d8f8 184 $list_tag = "<dl>";
185 $list_close_tag = "</dl>";
186 $item_tag = "<dt>";
187 $item_close_tag = "</dt>";
90bf11b2 188 $list_style = LIST_DEFINITION;
117bd748
PS
189 break;
190 }
90bf11b2 191
192 // tag opening/closing regime now - fun bit :-)
193 $tags = "";
194
195 // if depth has reduced do number of closes to restore level
196 for ($i=$this->list_depth; $i>$count; $i-- ) {
197 $close_tag = array_pop( $this->list_backtrack );
198 $tags = $tags . $close_tag;
199 }
200
201 // if depth has increased do number of opens to balance
202 for ($i=$this->list_depth; $i<$count; $i++ ) {
b323d8f8 203 array_push( $this->list_backtrack, "$list_close_tag" );
204 $tags = $tags . "$list_tag";
90bf11b2 205 }
206
207 // ok, so list state is now same as style and depth same as count
208 $this->list_state = $list_style;
209 $this->list_depth = $count;
210
b323d8f8 211 // get indent
212 $indent = substr( " ",1,$count-1 );
117bd748 213
90bf11b2 214 if ($blank) {
215 $newline = $tags;
216 }
117bd748 217 else {
b323d8f8 218 $newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag";
90bf11b2 219 }
220
221 return $newline;
117bd748 222 }
90bf11b2 223
224
225 function line_replace( $line ) {
226 // return line after various formatting replacements
227 // have been made - order is vital to stop them interfering with each other
117bd748 228
92f72f47 229 global $CFG;
117bd748 230
90bf11b2 231 // ---- (at least) means a <hr />
232 // MARKDOWN: no change so leave
233
117bd748 234 // is this a list line (starts with * # ; :)
69593309 235 if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) {
117bd748
PS
236 $line = $this->do_list( $line );
237 }
90bf11b2 238
239 // typographic conventions
240 // MARKDOWN: no equiv. so convert to entity as before
33b3562d 241 // $line = str_replace( "--", "&#8212;", $line );
242 // $line = str_replace( " - ", " &#8211; ", $line );
90bf11b2 243 $line = str_replace( "...", " &#8230; ", $line );
244 $line = str_replace( "(R)", "&#174;", $line );
245 $line = str_replace( "(r)", "&#174;", $line );
246 $line = str_replace( "(TM)", "&#8482;", $line );
247 $line = str_replace( "(tm)", "&#8482;", $line );
248 $line = str_replace( "(C)", "&#169;", $line );
90bf11b2 249 $line = str_replace( "1/4", "&#188;", $line );
250 $line = str_replace( "1/2", "&#189;", $line );
251 $line = str_replace( "3/4", "&#190;", $line );
c78a948e 252 $line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1&#215;\\2", $line ); // (digits) x (digits) - multiply
90bf11b2 253 // do formatting tags
117bd748 254 // NOTE: The / replacement *has* to be first, or it will screw the
33b3562d 255 // HTML tags that are added by the other ones
92f72f47 256 // MARKDOWN: only bold and italic change, rest are just HTML
257 $line = $this->do_replace_markdown( $line, "\*", "**" );
258 $line = $this->do_replace_markdown( $line, "/", "*" );
90bf11b2 259 $line = $this->do_replace( $line, "\+", "ins" );
33b3562d 260 // $line = $this->do_replace( $line, "-", "del" );
90bf11b2 261 $line = $this->do_replace_sub( $line, "~", "sub" );
262 $line = $this->do_replace_sub( $line, "\^", "sup" );
90bf11b2 263 $line = $this->do_replace( $line, "%", "code" );
264 $line = $this->do_replace( $line, "@", "cite" );
117bd748 265
90bf11b2 266 // convert urls into proper link with optional link text URL(text)
267 // MARDOWN: HTML conversion should work fine
c78a948e 268 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
92f72f47 269 "\\1[\\5](\\2://\\3\\4)", $line);
c78a948e 270 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
92f72f47 271 "\\1[\\5](http://www.\\2\\3)", $line);
90bf11b2 272
273 // make urls (with and without httpd) into proper links
c78a948e 274 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i",
90bf11b2 275 "\\1<\\2://\\3\\4>", $line);
c78a948e 276 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i",
90bf11b2 277 "\\1<http://www.\\2\\3\>", $line);
278
279 // make email addresses into mailtos....
92f72f47 280 // MARKDOWN doesn't quite support this, so do as html
c78a948e 281 $line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i",
92f72f47 282 "\\1<a href=\"mailto:\\2\">\\3</a>", $line);
90bf11b2 283
284 // !# at the beginning of any lines means a heading
285 // MARKDOWN: value (1-6) becomes number of hashes
c78a948e 286 if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) {
90bf11b2 287 $depth = substr( $line, 1, 1 );
288 $out = substr( '##########', 0, $depth);
c78a948e 289 $line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line );
90bf11b2 290 }
291
292 // acronym handing, example HTML(Hypertext Markyp Language)
293 // MARKDOWN: no equiv. so just leave as HTML
c78a948e 294 $line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line );
90bf11b2 295
90bf11b2 296 // Replace resource link >>##(Description Text)
297 // MARKDOWN: change to MD web link style
c78a948e 298 $line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i",
90bf11b2 299 " [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line );
300
11d4bea2 301 $coursefileurl = array(moodle_url::make_legacyfile_url($this->courseid, null));
5a254a29 302
117bd748 303 // Replace picture resource link
c78a948e 304 $line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i",
5a254a29 305 "![\\3](".$coursefileurl."/\\1\\2)", $line );
90bf11b2 306
92f72f47 307 // Replace file resource link
c78a948e 308 $line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i",
5a254a29 309 "[\\2](".$coursefileurl."/\\1)", $line );
90bf11b2 310
311 return $line;
312 }
313
2fbc787c 314 function convert( $content,$courseid ) {
90bf11b2 315
316 // main entry point for processing Wiki-like text
317 // $content is string containing text with Wiki-Like formatting
318 // return: string containing Markdown formatting
319
320 // initialisation stuff
321 $this->output = "";
322 $this->block_state = STATE_NONE;
323 $this->list_state = LIST_NONE;
324 $this->list_depth = 0;
325 $this->list_backtrack = array();
326 $this->spelling_on = false;
327 $this->courseid = $courseid;
328
329 // split content into array of single lines
330 $lines = explode( "\n",$content );
331 $buffer = "";
332
333 // run through lines
334 foreach( $lines as $line ) {
335 // is this a blank line?
69593309 336 $blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line );
90bf11b2 337 if ($blank_line) {
338 // first end current block according to state
339 $buffer = $buffer . $this->close_block( $this->block_state );
340 $this->block_state = STATE_NONE;
341 continue;
342 }
343
344 // act now depending on current block state
345 if ($this->block_state == STATE_NONE) {
346 // first character of line defines block type
69593309 347 if (preg_match( "/^> /i",$line )) {
90bf11b2 348 // blockquote
92f72f47 349 $buffer = $buffer . $this->line_replace( $line ). "\n";
90bf11b2 350 $this->block_state = STATE_BLOCKQUOTE;
351 }
352 else
69593309 353 if (preg_match( "/^ /i",$line) ) {
90bf11b2 354 // preformatted text
d16abbbd 355 // MARKDOWN: no real equiv. so just use <pre>
90bf11b2 356 $buffer = $buffer . "<pre>\n";
357 $buffer = $buffer . $this->line_replace($line) . "\n";
358 $this->block_state = STATE_PREFORM;
359 }
117bd748 360 else
69593309 361 if (preg_match("/^\% /i",$line) ) {
d16abbbd 362 // preformatted text - no processing
363 // MARKDOWN: this is MD code form of a paragraph
c78a948e 364 $buffer = $buffer . " " . preg_replace( "/^\%/i","",$line) . "\n";
d16abbbd 365 $this->block_state = STATE_NOTIKI;
366 }
90bf11b2 367 else {
368 // ordinary paragraph
90bf11b2 369 $buffer = $buffer . $this->line_replace($line) . "\n";
117bd748 370 $this->block_state = STATE_PARAGRAPH;
90bf11b2 371 }
372 continue;
373 }
374
375 if (($this->block_state == STATE_PARAGRAPH) |
376 ($this->block_state == STATE_BLOCKQUOTE) |
377 ($this->block_state == STATE_PREFORM) ) {
378 $buffer = $buffer . $this->line_replace($line) . "\n";
379 continue;
380 }
381 elseif ($this->block_state == STATE_NOTIKI) {
92f72f47 382 $buffer = $buffer . " " .$line . "\n";
d16abbbd 383 }
90bf11b2 384 }
385
386 // close off any block level tags
387 $buffer = $buffer . $this->close_block( $this->block_state );
388
117bd748 389 //return $buffer;
90bf11b2 390 return $buffer;
391 }
90bf11b2 392}