MDL-53518 libraries: upgrade mardown to 1.6.0 version
[moodle.git] / lib / markdown / MarkdownExtra.php
1 <?php
2 #
3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown Extra
6 # Copyright (c) 2004-2015 Michel Fortin  
7 # <https://michelf.ca/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber  
11 # <https://daringfireball.net/projects/markdown/>
12 #
13 namespace Michelf;
16 #
17 # Markdown Extra Parser Class
18 #
20 class MarkdownExtra extends \Michelf\Markdown {
22         ### Configuration Variables ###
24         # Prefix for footnote ids.
25         public $fn_id_prefix = "";
26         
27         # Optional title attribute for footnote links and backlinks.
28         public $fn_link_title = "";
29         public $fn_backlink_title = "";
30         
31         # Optional class attribute for footnote links and backlinks.
32         public $fn_link_class = "footnote-ref";
33         public $fn_backlink_class = "footnote-backref";
35         # Content to be displayed within footnote backlinks. The default is '↩';
36         # the U+FE0E on the end is a Unicode variant selector used to prevent iOS
37         # from displaying the arrow character as an emoji.
38         public $fn_backlink_html = '&#8617;&#xFE0E;';
40         # Class name for table cell alignment (%% replaced left/center/right)
41         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
42         # If empty, the align attribute is used instead of a class name.
43         public $table_align_class_tmpl = '';
45         # Optional class prefix for fenced code block.
46         public $code_class_prefix = "";
47         # Class attribute for code blocks goes on the `code` tag;
48         # setting this to true will put attributes on the `pre` tag instead.
49         public $code_attr_on_pre = false;
51         # Predefined abbreviations.
52         public $predef_abbr = array();
54         ### Parser Implementation ###
56         public function __construct() {
57         #
58         # Constructor function. Initialize the parser object.
59         #
60                 # Add extra escapable characters before parent constructor 
61                 # initialize the table.
62                 $this->escape_chars .= ':|';
63                 
64                 # Insert extra document, block, and span transformations. 
65                 # Parent constructor will do the sorting.
66                 $this->document_gamut += array(
67                         "doFencedCodeBlocks" => 5,
68                         "stripFootnotes"     => 15,
69                         "stripAbbreviations" => 25,
70                         "appendFootnotes"    => 50,
71                         );
72                 $this->block_gamut += array(
73                         "doFencedCodeBlocks" => 5,
74                         "doTables"           => 15,
75                         "doDefLists"         => 45,
76                         );
77                 $this->span_gamut += array(
78                         "doFootnotes"        => 5,
79                         "doAbbreviations"    => 70,
80                         );
81                 
82                 $this->enhanced_ordered_list = true;
83                 parent::__construct();
84         }
85         
86         
87         # Extra variables used during extra transformations.
88         protected $footnotes = array();
89         protected $footnotes_ordered = array();
90         protected $footnotes_ref_count = array();
91         protected $footnotes_numbers = array();
92         protected $abbr_desciptions = array();
93         protected $abbr_word_re = '';
94         
95         # Give the current footnote number.
96         protected $footnote_counter = 1;
97         
98         
99         protected function setup() {
100         #
101         # Setting up Extra-specific variables.
102         #
103                 parent::setup();
104                 
105                 $this->footnotes = array();
106                 $this->footnotes_ordered = array();
107                 $this->footnotes_ref_count = array();
108                 $this->footnotes_numbers = array();
109                 $this->abbr_desciptions = array();
110                 $this->abbr_word_re = '';
111                 $this->footnote_counter = 1;
112                 
113                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
114                         if ($this->abbr_word_re)
115                                 $this->abbr_word_re .= '|';
116                         $this->abbr_word_re .= preg_quote($abbr_word);
117                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
118                 }
119         }
120         
121         protected function teardown() {
122         #
123         # Clearing Extra-specific variables.
124         #
125                 $this->footnotes = array();
126                 $this->footnotes_ordered = array();
127                 $this->footnotes_ref_count = array();
128                 $this->footnotes_numbers = array();
129                 $this->abbr_desciptions = array();
130                 $this->abbr_word_re = '';
131                 
132                 parent::teardown();
133         }
134         
135         
136         ### Extra Attribute Parser ###
138         # Expression to use to catch attributes (includes the braces)
139         protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
140         # Expression to use when parsing in a context when no capture is desired
141         protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
143         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
144         #
145         # Parse attributes caught by the $this->id_class_attr_catch_re expression
146         # and return the HTML-formatted list of attributes.
147         #
148         # Currently supported attributes are .class and #id.
149         #
150         # In addition, this method also supports supplying a default Id value,
151         # which will be used to populate the id attribute in case it was not
152         # overridden.
153                 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
154                 
155                 # Split on components
156                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
157                 $elements = $matches[0];
159                 # handle classes and ids (only first id taken into account)
160                 $attributes = array();
161                 $id = false;
162                 foreach ($elements as $element) {
163                         if ($element{0} == '.') {
164                                 $classes[] = substr($element, 1);
165                         } else if ($element{0} == '#') {
166                                 if ($id === false) $id = substr($element, 1);
167                         } else if (strpos($element, '=') > 0) {
168                                 $parts = explode('=', $element, 2);
169                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
170                         }
171                 }
173                 if (!$id) $id = $defaultIdValue;
175                 # compose attributes as string
176                 $attr_str = "";
177                 if (!empty($id)) {
178                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
179                 }
180                 if (!empty($classes)) {
181                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
182                 }
183                 if (!$this->no_markup && !empty($attributes)) {
184                         $attr_str .= ' '.implode(" ", $attributes);
185                 }
186                 return $attr_str;
187         }
190         protected function stripLinkDefinitions($text) {
191         #
192         # Strips link definitions from text, stores the URLs and titles in
193         # hash references.
194         #
195                 $less_than_tab = $this->tab_width - 1;
197                 # Link defs are in the form: ^[id]: url "optional title"
198                 $text = preg_replace_callback('{
199                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
200                                                           [ ]*
201                                                           \n?                           # maybe *one* newline
202                                                           [ ]*
203                                                         (?:
204                                                           <(.+?)>                       # url = $2
205                                                         |
206                                                           (\S+?)                        # url = $3
207                                                         )
208                                                           [ ]*
209                                                           \n?                           # maybe one newline
210                                                           [ ]*
211                                                         (?:
212                                                                 (?<=\s)                 # lookbehind for whitespace
213                                                                 ["(]
214                                                                 (.*?)                   # title = $4
215                                                                 [")]
216                                                                 [ ]*
217                                                         )?      # title is optional
218                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
219                                                         (?:\n+|\Z)
220                         }xm',
221                         array($this, '_stripLinkDefinitions_callback'),
222                         $text);
223                 return $text;
224         }
225         protected function _stripLinkDefinitions_callback($matches) {
226                 $link_id = strtolower($matches[1]);
227                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
228                 $this->urls[$link_id] = $url;
229                 $this->titles[$link_id] =& $matches[4];
230                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
231                 return ''; # String that will replace the block
232         }
235         ### HTML Block Parser ###
236         
237         # Tags that are always treated as block tags:
238         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
239                                                    
240         # Tags treated as block tags only if the opening tag is alone on its line:
241         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
242         
243         # Tags where markdown="1" default to span mode:
244         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
245         
246         # Tags which must not have their contents modified, no matter where 
247         # they appear:
248         protected $clean_tags_re = 'script|style|math|svg';
249         
250         # Tags that do not need to be closed.
251         protected $auto_close_tags_re = 'hr|img|param|source|track';
252         
254         protected function hashHTMLBlocks($text) {
255         #
256         # Hashify HTML Blocks and "clean tags".
257         #
258         # We only want to do this for block-level HTML tags, such as headers,
259         # lists, and tables. That's because we still want to wrap <p>s around
260         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
261         # phrase emphasis, and spans. The list of tags we're looking for is
262         # hard-coded.
263         #
264         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
265         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
266         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
267         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
268         # These two functions are calling each other. It's recursive!
269         #
270                 if ($this->no_markup)  return $text;
272                 #
273                 # Call the HTML-in-Markdown hasher.
274                 #
275                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
276                 
277                 return $text;
278         }
279         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
280                                                                                 $enclosing_tag_re = '', $span = false)
281         {
282         #
283         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
284         #
285         # *   $indent is the number of space to be ignored when checking for code 
286         #     blocks. This is important because if we don't take the indent into 
287         #     account, something like this (which looks right) won't work as expected:
288         #
289         #     <div>
290         #         <div markdown="1">
291         #         Hello World.  <-- Is this a Markdown code block or text?
292         #         </div>  <-- Is this a Markdown code block or a real tag?
293         #     <div>
294         #
295         #     If you don't like this, just don't indent the tag on which
296         #     you apply the markdown="1" attribute.
297         #
298         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
299         #     tag with that name. Nested tags supported.
300         #
301         # *   If $span is true, text inside must treated as span. So any double 
302         #     newline will be replaced by a single newline so that it does not create 
303         #     paragraphs.
304         #
305         # Returns an array of that form: ( processed text , remaining text )
306         #
307                 if ($text === '') return array('', '');
309                 # Regex to check for the presense of newlines around a block tag.
310                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
311                 $newline_after_re = 
312                         '{
313                                 ^                                               # Start of text following the tag.
314                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
315                                 [ ]*\n                                  # Must be followed by newline.
316                         }xs';
317                 
318                 # Regex to match any tag.
319                 $block_tag_re =
320                         '{
321                                 (                                       # $2: Capture whole tag.
322                                         </?                                     # Any opening or closing tag.
323                                                 (?>                             # Tag name.
324                                                         '.$this->block_tags_re.'                        |
325                                                         '.$this->context_block_tags_re.'        |
326                                                         '.$this->clean_tags_re.'                |
327                                                         (?!\s)'.$enclosing_tag_re.'
328                                                 )
329                                                 (?:
330                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
331                                                         (?>
332                                                                 ".*?"           |       # Double quotes (can contain `>`)
333                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
334                                                                 .+?                             # Anything but quotes and `>`.
335                                                         )*?
336                                                 )?
337                                         >                                       # End of tag.
338                                 |
339                                         <!--    .*?     -->     # HTML Comment
340                                 |
341                                         <\?.*?\?> | <%.*?%>     # Processing instruction
342                                 |
343                                         <!\[CDATA\[.*?\]\]>     # CData Block
344                                 '. ( !$span ? ' # If not in span.
345                                 |
346                                         # Indented code block
347                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
348                                         [ ]{'.($indent+4).'}[^\n]* \n
349                                         (?>
350                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
351                                         )*
352                                 |
353                                         # Fenced code block marker
354                                         (?<= ^ | \n )
355                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
356                                         [ ]*
357                                         (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
358                                         [ ]*
359                                         (?: '.$this->id_class_attr_nocatch_re.' )? # extra attributes
360                                         [ ]*
361                                         (?= \n )
362                                 ' : '' ). ' # End (if not is span).
363                                 |
364                                         # Code span marker
365                                         # Note, this regex needs to go after backtick fenced
366                                         # code blocks but it should also be kept outside of the
367                                         # "if not in span" condition adding backticks to the parser
368                                         `+
369                                 )
370                         }xs';
372                 
373                 $depth = 0;             # Current depth inside the tag tree.
374                 $parsed = "";   # Parsed text that will be returned.
376                 #
377                 # Loop through every tag until we find the closing tag of the parent
378                 # or loop until reaching the end of text if no parent tag specified.
379                 #
380                 do {
381                         #
382                         # Split the text using the first $tag_match pattern found.
383                         # Text before  pattern will be first in the array, text after
384                         # pattern will be at the end, and between will be any catches made 
385                         # by the pattern.
386                         #
387                         $parts = preg_split($block_tag_re, $text, 2, 
388                                                                 PREG_SPLIT_DELIM_CAPTURE);
389                         
390                         # If in Markdown span mode, add a empty-string span-level hash 
391                         # after each newline to prevent triggering any block element.
392                         if ($span) {
393                                 $void = $this->hashPart("", ':');
394                                 $newline = "$void\n";
395                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
396                         }
397                         
398                         $parsed .= $parts[0]; # Text before current tag.
399                         
400                         # If end of $text has been reached. Stop loop.
401                         if (count($parts) < 3) {
402                                 $text = "";
403                                 break;
404                         }
405                         
406                         $tag  = $parts[1]; # Tag to handle.
407                         $text = $parts[2]; # Remaining text after current tag.
408                         $tag_re = preg_quote($tag); # For use in a regular expression.
409                         
410                         #
411                         # Check for: Fenced code block marker.
412                         # Note: need to recheck the whole tag to disambiguate backtick
413                         # fences from code spans
414                         #
415                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
416                                 # Fenced code block marker: find matching end marker.
417                                 $fence_indent = strlen($capture[1]); # use captured indent in re
418                                 $fence_re = $capture[2]; # use captured fence in re
419                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
420                                         $matches)) 
421                                 {
422                                         # End marker found: pass text unchanged until marker.
423                                         $parsed .= $tag . $matches[0];
424                                         $text = substr($text, strlen($matches[0]));
425                                 }
426                                 else {
427                                         # No end marker: just skip it.
428                                         $parsed .= $tag;
429                                 }
430                         }
431                         #
432                         # Check for: Indented code block.
433                         #
434                         else if ($tag{0} == "\n" || $tag{0} == " ") {
435                                 # Indented code block: pass it unchanged, will be handled 
436                                 # later.
437                                 $parsed .= $tag;
438                         }
439                         #
440                         # Check for: Code span marker
441                         # Note: need to check this after backtick fenced code blocks
442                         #
443                         else if ($tag{0} == "`") {
444                                 # Find corresponding end marker.
445                                 $tag_re = preg_quote($tag);
446                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
447                                         $text, $matches))
448                                 {
449                                         # End marker found: pass text unchanged until marker.
450                                         $parsed .= $tag . $matches[0];
451                                         $text = substr($text, strlen($matches[0]));
452                                 }
453                                 else {
454                                         # Unmatched marker: just skip it.
455                                         $parsed .= $tag;
456                                 }
457                         }
458                         #
459                         # Check for: Opening Block level tag or
460                         #            Opening Context Block tag (like ins and del) 
461                         #               used as a block tag (tag is alone on it's line).
462                         #
463                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
464                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
465                                         preg_match($newline_before_re, $parsed) &&
466                                         preg_match($newline_after_re, $text)    )
467                                 )
468                         {
469                                 # Need to parse tag and following text using the HTML parser.
470                                 list($block_text, $text) = 
471                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
472                                 
473                                 # Make sure it stays outside of any paragraph by adding newlines.
474                                 $parsed .= "\n\n$block_text\n\n";
475                         }
476                         #
477                         # Check for: Clean tag (like script, math)
478                         #            HTML Comments, processing instructions.
479                         #
480                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
481                                 $tag{1} == '!' || $tag{1} == '?')
482                         {
483                                 # Need to parse tag and following text using the HTML parser.
484                                 # (don't check for markdown attribute)
485                                 list($block_text, $text) = 
486                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
487                                 
488                                 $parsed .= $block_text;
489                         }
490                         #
491                         # Check for: Tag with same name as enclosing tag.
492                         #
493                         else if ($enclosing_tag_re !== '' &&
494                                 # Same name as enclosing tag.
495                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
496                         {
497                                 #
498                                 # Increase/decrease nested tag count.
499                                 #
500                                 if ($tag{1} == '/')                                             $depth--;
501                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
503                                 if ($depth < 0) {
504                                         #
505                                         # Going out of parent element. Clean up and break so we
506                                         # return to the calling function.
507                                         #
508                                         $text = $tag . $text;
509                                         break;
510                                 }
511                                 
512                                 $parsed .= $tag;
513                         }
514                         else {
515                                 $parsed .= $tag;
516                         }
517                 } while ($depth >= 0);
518                 
519                 return array($parsed, $text);
520         }
521         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
522         #
523         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
524         #
525         # *   Calls $hash_method to convert any blocks.
526         # *   Stops when the first opening tag closes.
527         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
528         #     (it is not inside clean tags)
529         #
530         # Returns an array of that form: ( processed text , remaining text )
531         #
532                 if ($text === '') return array('', '');
533                 
534                 # Regex to match `markdown` attribute inside of a tag.
535                 $markdown_attr_re = '
536                         {
537                                 \s*                     # Eat whitespace before the `markdown` attribute
538                                 markdown
539                                 \s*=\s*
540                                 (?>
541                                         (["\'])         # $1: quote delimiter           
542                                         (.*?)           # $2: attribute value
543                                         \1                      # matching delimiter    
544                                 |
545                                         ([^\s>]*)       # $3: unquoted attribute value
546                                 )
547                                 ()                              # $4: make $3 always defined (avoid warnings)
548                         }xs';
549                 
550                 # Regex to match any tag.
551                 $tag_re = '{
552                                 (                                       # $2: Capture whole tag.
553                                         </?                                     # Any opening or closing tag.
554                                                 [\w:$]+                 # Tag name.
555                                                 (?:
556                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
557                                                         (?>
558                                                                 ".*?"           |       # Double quotes (can contain `>`)
559                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
560                                                                 .+?                             # Anything but quotes and `>`.
561                                                         )*?
562                                                 )?
563                                         >                                       # End of tag.
564                                 |
565                                         <!--    .*?     -->     # HTML Comment
566                                 |
567                                         <\?.*?\?> | <%.*?%>     # Processing instruction
568                                 |
569                                         <!\[CDATA\[.*?\]\]>     # CData Block
570                                 )
571                         }xs';
572                 
573                 $original_text = $text;         # Save original text in case of faliure.
574                 
575                 $depth          = 0;    # Current depth inside the tag tree.
576                 $block_text     = "";   # Temporary text holder for current text.
577                 $parsed         = "";   # Parsed text that will be returned.
579                 #
580                 # Get the name of the starting tag.
581                 # (This pattern makes $base_tag_name_re safe without quoting.)
582                 #
583                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
584                         $base_tag_name_re = $matches[1];
586                 #
587                 # Loop through every tag until we find the corresponding closing tag.
588                 #
589                 do {
590                         #
591                         # Split the text using the first $tag_match pattern found.
592                         # Text before  pattern will be first in the array, text after
593                         # pattern will be at the end, and between will be any catches made 
594                         # by the pattern.
595                         #
596                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
597                         
598                         if (count($parts) < 3) {
599                                 #
600                                 # End of $text reached with unbalenced tag(s).
601                                 # In that case, we return original text unchanged and pass the
602                                 # first character as filtered to prevent an infinite loop in the 
603                                 # parent function.
604                                 #
605                                 return array($original_text{0}, substr($original_text, 1));
606                         }
607                         
608                         $block_text .= $parts[0]; # Text before current tag.
609                         $tag         = $parts[1]; # Tag to handle.
610                         $text        = $parts[2]; # Remaining text after current tag.
611                         
612                         #
613                         # Check for: Auto-close tag (like <hr/>)
614                         #                        Comments and Processing Instructions.
615                         #
616                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
617                                 $tag{1} == '!' || $tag{1} == '?')
618                         {
619                                 # Just add the tag to the block as if it was text.
620                                 $block_text .= $tag;
621                         }
622                         else {
623                                 #
624                                 # Increase/decrease nested tag count. Only do so if
625                                 # the tag's name match base tag's.
626                                 #
627                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
628                                         if ($tag{1} == '/')                                             $depth--;
629                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
630                                 }
631                                 
632                                 #
633                                 # Check for `markdown="1"` attribute and handle it.
634                                 #
635                                 if ($md_attr && 
636                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
637                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
638                                 {
639                                         # Remove `markdown` attribute from opening tag.
640                                         $tag = preg_replace($markdown_attr_re, '', $tag);
641                                         
642                                         # Check if text inside this tag must be parsed in span mode.
643                                         $this->mode = $attr_m[2] . $attr_m[3];
644                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
645                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
646                                         
647                                         # Calculate indent before tag.
648                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
649                                                 $strlen = $this->utf8_strlen;
650                                                 $indent = $strlen($matches[1], 'UTF-8');
651                                         } else {
652                                                 $indent = 0;
653                                         }
654                                         
655                                         # End preceding block with this tag.
656                                         $block_text .= $tag;
657                                         $parsed .= $this->$hash_method($block_text);
658                                         
659                                         # Get enclosing tag name for the ParseMarkdown function.
660                                         # (This pattern makes $tag_name_re safe without quoting.)
661                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
662                                         $tag_name_re = $matches[1];
663                                         
664                                         # Parse the content using the HTML-in-Markdown parser.
665                                         list ($block_text, $text)
666                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
667                                                         $tag_name_re, $span_mode);
668                                         
669                                         # Outdent markdown text.
670                                         if ($indent > 0) {
671                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
672                                                                                                         $block_text);
673                                         }
674                                         
675                                         # Append tag content to parsed text.
676                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
677                                         else                            $parsed .= "$block_text";
678                                         
679                                         # Start over with a new block.
680                                         $block_text = "";
681                                 }
682                                 else $block_text .= $tag;
683                         }
684                         
685                 } while ($depth > 0);
686                 
687                 #
688                 # Hash last block text that wasn't processed inside the loop.
689                 #
690                 $parsed .= $this->$hash_method($block_text);
691                 
692                 return array($parsed, $text);
693         }
696         protected function hashClean($text) {
697         #
698         # Called whenever a tag must be hashed when a function inserts a "clean" tag
699         # in $text, it passes through this function and is automaticaly escaped, 
700         # blocking invalid nested overlap.
701         #
702                 return $this->hashPart($text, 'C');
703         }
706         protected function doAnchors($text) {
707         #
708         # Turn Markdown link shortcuts into XHTML <a> tags.
709         #
710                 if ($this->in_anchor) return $text;
711                 $this->in_anchor = true;
712                 
713                 #
714                 # First, handle reference-style links: [link text] [id]
715                 #
716                 $text = preg_replace_callback('{
717                         (                                       # wrap whole match in $1
718                           \[
719                                 ('.$this->nested_brackets_re.') # link text = $2
720                           \]
722                           [ ]?                          # one optional space
723                           (?:\n[ ]*)?           # one optional newline followed by spaces
725                           \[
726                                 (.*?)           # id = $3
727                           \]
728                         )
729                         }xs',
730                         array($this, '_doAnchors_reference_callback'), $text);
732                 #
733                 # Next, inline-style links: [link text](url "optional title")
734                 #
735                 $text = preg_replace_callback('{
736                         (                               # wrap whole match in $1
737                           \[
738                                 ('.$this->nested_brackets_re.') # link text = $2
739                           \]
740                           \(                    # literal paren
741                                 [ \n]*
742                                 (?:
743                                         <(.+?)> # href = $3
744                                 |
745                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
746                                 )
747                                 [ \n]*
748                                 (                       # $5
749                                   ([\'"])       # quote char = $6
750                                   (.*?)         # Title = $7
751                                   \6            # matching quote
752                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
753                                 )?                      # title is optional
754                           \)
755                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
756                         )
757                         }xs',
758                         array($this, '_doAnchors_inline_callback'), $text);
760                 #
761                 # Last, handle reference-style shortcuts: [link text]
762                 # These must come last in case you've also got [link text][1]
763                 # or [link text](/foo)
764                 #
765                 $text = preg_replace_callback('{
766                         (                                       # wrap whole match in $1
767                           \[
768                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
769                           \]
770                         )
771                         }xs',
772                         array($this, '_doAnchors_reference_callback'), $text);
774                 $this->in_anchor = false;
775                 return $text;
776         }
777         protected function _doAnchors_reference_callback($matches) {
778                 $whole_match =  $matches[1];
779                 $link_text   =  $matches[2];
780                 $link_id     =& $matches[3];
782                 if ($link_id == "") {
783                         # for shortcut links like [this][] or [this].
784                         $link_id = $link_text;
785                 }
786                 
787                 # lower-case and turn embedded newlines into spaces
788                 $link_id = strtolower($link_id);
789                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
791                 if (isset($this->urls[$link_id])) {
792                         $url = $this->urls[$link_id];
793                         $url = $this->encodeURLAttribute($url);
794                         
795                         $result = "<a href=\"$url\"";
796                         if ( isset( $this->titles[$link_id] ) ) {
797                                 $title = $this->titles[$link_id];
798                                 $title = $this->encodeAttribute($title);
799                                 $result .=  " title=\"$title\"";
800                         }
801                         if (isset($this->ref_attr[$link_id]))
802                                 $result .= $this->ref_attr[$link_id];
803                 
804                         $link_text = $this->runSpanGamut($link_text);
805                         $result .= ">$link_text</a>";
806                         $result = $this->hashPart($result);
807                 }
808                 else {
809                         $result = $whole_match;
810                 }
811                 return $result;
812         }
813         protected function _doAnchors_inline_callback($matches) {
814                 $whole_match    =  $matches[1];
815                 $link_text              =  $this->runSpanGamut($matches[2]);
816                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
817                 $title                  =& $matches[7];
818                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
820                 // if the URL was of the form <s p a c e s> it got caught by the HTML
821                 // tag parser and hashed. Need to reverse the process before using the URL.
822                 $unhashed = $this->unhash($url);
823                 if ($unhashed != $url)
824                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
826                 $url = $this->encodeURLAttribute($url);
828                 $result = "<a href=\"$url\"";
829                 if (isset($title)) {
830                         $title = $this->encodeAttribute($title);
831                         $result .=  " title=\"$title\"";
832                 }
833                 $result .= $attr;
834                 
835                 $link_text = $this->runSpanGamut($link_text);
836                 $result .= ">$link_text</a>";
838                 return $this->hashPart($result);
839         }
842         protected function doImages($text) {
843         #
844         # Turn Markdown image shortcuts into <img> tags.
845         #
846                 #
847                 # First, handle reference-style labeled images: ![alt text][id]
848                 #
849                 $text = preg_replace_callback('{
850                         (                               # wrap whole match in $1
851                           !\[
852                                 ('.$this->nested_brackets_re.')         # alt text = $2
853                           \]
855                           [ ]?                          # one optional space
856                           (?:\n[ ]*)?           # one optional newline followed by spaces
858                           \[
859                                 (.*?)           # id = $3
860                           \]
862                         )
863                         }xs', 
864                         array($this, '_doImages_reference_callback'), $text);
866                 #
867                 # Next, handle inline images:  ![alt text](url "optional title")
868                 # Don't forget: encode * and _
869                 #
870                 $text = preg_replace_callback('{
871                         (                               # wrap whole match in $1
872                           !\[
873                                 ('.$this->nested_brackets_re.')         # alt text = $2
874                           \]
875                           \s?                   # One optional whitespace character
876                           \(                    # literal paren
877                                 [ \n]*
878                                 (?:
879                                         <(\S*)> # src url = $3
880                                 |
881                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
882                                 )
883                                 [ \n]*
884                                 (                       # $5
885                                   ([\'"])       # quote char = $6
886                                   (.*?)         # title = $7
887                                   \6            # matching quote
888                                   [ \n]*
889                                 )?                      # title is optional
890                           \)
891                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
892                         )
893                         }xs',
894                         array($this, '_doImages_inline_callback'), $text);
896                 return $text;
897         }
898         protected function _doImages_reference_callback($matches) {
899                 $whole_match = $matches[1];
900                 $alt_text    = $matches[2];
901                 $link_id     = strtolower($matches[3]);
903                 if ($link_id == "") {
904                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
905                 }
907                 $alt_text = $this->encodeAttribute($alt_text);
908                 if (isset($this->urls[$link_id])) {
909                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
910                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
911                         if (isset($this->titles[$link_id])) {
912                                 $title = $this->titles[$link_id];
913                                 $title = $this->encodeAttribute($title);
914                                 $result .=  " title=\"$title\"";
915                         }
916                         if (isset($this->ref_attr[$link_id]))
917                                 $result .= $this->ref_attr[$link_id];
918                         $result .= $this->empty_element_suffix;
919                         $result = $this->hashPart($result);
920                 }
921                 else {
922                         # If there's no such link ID, leave intact:
923                         $result = $whole_match;
924                 }
926                 return $result;
927         }
928         protected function _doImages_inline_callback($matches) {
929                 $whole_match    = $matches[1];
930                 $alt_text               = $matches[2];
931                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
932                 $title                  =& $matches[7];
933                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
935                 $alt_text = $this->encodeAttribute($alt_text);
936                 $url = $this->encodeURLAttribute($url);
937                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
938                 if (isset($title)) {
939                         $title = $this->encodeAttribute($title);
940                         $result .=  " title=\"$title\""; # $title already quoted
941                 }
942                 $result .= $attr;
943                 $result .= $this->empty_element_suffix;
945                 return $this->hashPart($result);
946         }
949         protected function doHeaders($text) {
950         #
951         # Redefined to add id and class attribute support.
952         #
953                 # Setext-style headers:
954                 #         Header 1  {#header1}
955                 #         ========
956                 #  
957                 #         Header 2  {#header2 .class1 .class2}
958                 #         --------
959                 #
960                 $text = preg_replace_callback(
961                         '{
962                                 (^.+?)                                                          # $1: Header text
963                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
964                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
965                         }mx',
966                         array($this, '_doHeaders_callback_setext'), $text);
968                 # atx-style headers:
969                 #       # Header 1        {#header1}
970                 #       ## Header 2       {#header2}
971                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
972                 #       ...
973                 #       ###### Header 6   {.class2}
974                 #
975                 $text = preg_replace_callback('{
976                                 ^(\#{1,6})      # $1 = string of #\'s
977                                 [ ]*
978                                 (.+?)           # $2 = Header text
979                                 [ ]*
980                                 \#*                     # optional closing #\'s (not counted)
981                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
982                                 [ ]*
983                                 \n+
984                         }xm',
985                         array($this, '_doHeaders_callback_atx'), $text);
987                 return $text;
988         }
989         protected function _doHeaders_callback_setext($matches) {
990                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
991                         return $matches[0];
993                 $level = $matches[3]{0} == '=' ? 1 : 2;
995                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
997                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
998                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
999                 return "\n" . $this->hashBlock($block) . "\n\n";
1000         }
1001         protected function _doHeaders_callback_atx($matches) {
1002                 $level = strlen($matches[1]);
1004                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1005                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1006                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1007                 return "\n" . $this->hashBlock($block) . "\n\n";
1008         }
1011         protected function doTables($text) {
1012         #
1013         # Form HTML tables.
1014         #
1015                 $less_than_tab = $this->tab_width - 1;
1016                 #
1017                 # Find tables with leading pipe.
1018                 #
1019                 #       | Header 1 | Header 2
1020                 #       | -------- | --------
1021                 #       | Cell 1   | Cell 2
1022                 #       | Cell 3   | Cell 4
1023                 #
1024                 $text = preg_replace_callback('
1025                         {
1026                                 ^                                                       # Start of a line
1027                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1028                                 [|]                                                     # Optional leading pipe (present)
1029                                 (.+) \n                                         # $1: Header row (at least one pipe)
1030                                 
1031                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1032                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1033                                 
1034                                 (                                                       # $3: Cells
1035                                         (?>
1036                                                 [ ]*                            # Allowed whitespace.
1037                                                 [|] .* \n                       # Row content.
1038                                         )*
1039                                 )
1040                                 (?=\n|\Z)                                       # Stop at final double newline.
1041                         }xm',
1042                         array($this, '_doTable_leadingPipe_callback'), $text);
1043                 
1044                 #
1045                 # Find tables without leading pipe.
1046                 #
1047                 #       Header 1 | Header 2
1048                 #       -------- | --------
1049                 #       Cell 1   | Cell 2
1050                 #       Cell 3   | Cell 4
1051                 #
1052                 $text = preg_replace_callback('
1053                         {
1054                                 ^                                                       # Start of a line
1055                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1056                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1057                                 
1058                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1059                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1060                                 
1061                                 (                                                       # $3: Cells
1062                                         (?>
1063                                                 .* [|] .* \n            # Row content
1064                                         )*
1065                                 )
1066                                 (?=\n|\Z)                                       # Stop at final double newline.
1067                         }xm',
1068                         array($this, '_DoTable_callback'), $text);
1070                 return $text;
1071         }
1072         protected function _doTable_leadingPipe_callback($matches) {
1073                 $head           = $matches[1];
1074                 $underline      = $matches[2];
1075                 $content        = $matches[3];
1076                 
1077                 # Remove leading pipe for each row.
1078                 $content        = preg_replace('/^ *[|]/m', '', $content);
1079                 
1080                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1081         }
1082         protected function _doTable_makeAlignAttr($alignname)
1083         {
1084                 if (empty($this->table_align_class_tmpl))
1085                         return " align=\"$alignname\"";
1087                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1088                 return " class=\"$classname\"";
1089         }
1090         protected function _doTable_callback($matches) {
1091                 $head           = $matches[1];
1092                 $underline      = $matches[2];
1093                 $content        = $matches[3];
1095                 # Remove any tailing pipes for each line.
1096                 $head           = preg_replace('/[|] *$/m', '', $head);
1097                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1098                 $content        = preg_replace('/[|] *$/m', '', $content);
1099                 
1100                 # Reading alignement from header underline.
1101                 $separators     = preg_split('/ *[|] */', $underline);
1102                 foreach ($separators as $n => $s) {
1103                         if (preg_match('/^ *-+: *$/', $s))
1104                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1105                         else if (preg_match('/^ *:-+: *$/', $s))
1106                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1107                         else if (preg_match('/^ *:-+ *$/', $s))
1108                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1109                         else
1110                                 $attr[$n] = '';
1111                 }
1112                 
1113                 # Parsing span elements, including code spans, character escapes, 
1114                 # and inline HTML tags, so that pipes inside those gets ignored.
1115                 $head           = $this->parseSpan($head);
1116                 $headers        = preg_split('/ *[|] */', $head);
1117                 $col_count      = count($headers);
1118                 $attr       = array_pad($attr, $col_count, '');
1119                 
1120                 # Write column headers.
1121                 $text = "<table>\n";
1122                 $text .= "<thead>\n";
1123                 $text .= "<tr>\n";
1124                 foreach ($headers as $n => $header)
1125                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1126                 $text .= "</tr>\n";
1127                 $text .= "</thead>\n";
1128                 
1129                 # Split content by row.
1130                 $rows = explode("\n", trim($content, "\n"));
1131                 
1132                 $text .= "<tbody>\n";
1133                 foreach ($rows as $row) {
1134                         # Parsing span elements, including code spans, character escapes, 
1135                         # and inline HTML tags, so that pipes inside those gets ignored.
1136                         $row = $this->parseSpan($row);
1137                         
1138                         # Split row by cell.
1139                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1140                         $row_cells = array_pad($row_cells, $col_count, '');
1141                         
1142                         $text .= "<tr>\n";
1143                         foreach ($row_cells as $n => $cell)
1144                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1145                         $text .= "</tr>\n";
1146                 }
1147                 $text .= "</tbody>\n";
1148                 $text .= "</table>";
1149                 
1150                 return $this->hashBlock($text) . "\n";
1151         }
1153         
1154         protected function doDefLists($text) {
1155         #
1156         # Form HTML definition lists.
1157         #
1158                 $less_than_tab = $this->tab_width - 1;
1160                 # Re-usable pattern to match any entire dl list:
1161                 $whole_list_re = '(?>
1162                         (                                                               # $1 = whole list
1163                           (                                                             # $2
1164                                 [ ]{0,'.$less_than_tab.'}
1165                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1166                                 \n?
1167                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1168                           )
1169                           (?s:.+?)
1170                           (                                                             # $4
1171                                   \z
1172                                 |
1173                                   \n{2,}
1174                                   (?=\S)
1175                                   (?!                                           # Negative lookahead for another term
1176                                         [ ]{0,'.$less_than_tab.'}
1177                                         (?: \S.*\n )+?                  # defined term
1178                                         \n?
1179                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1180                                   )
1181                                   (?!                                           # Negative lookahead for another definition
1182                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1183                                   )
1184                           )
1185                         )
1186                 )'; // mx
1188                 $text = preg_replace_callback('{
1189                                 (?>\A\n?|(?<=\n\n))
1190                                 '.$whole_list_re.'
1191                         }mx',
1192                         array($this, '_doDefLists_callback'), $text);
1194                 return $text;
1195         }
1196         protected function _doDefLists_callback($matches) {
1197                 # Re-usable patterns to match list item bullets and number markers:
1198                 $list = $matches[1];
1199                 
1200                 # Turn double returns into triple returns, so that we can make a
1201                 # paragraph for the last item in a list, if necessary:
1202                 $result = trim($this->processDefListItems($list));
1203                 $result = "<dl>\n" . $result . "\n</dl>";
1204                 return $this->hashBlock($result) . "\n\n";
1205         }
1208         protected function processDefListItems($list_str) {
1209         #
1210         #       Process the contents of a single definition list, splitting it
1211         #       into individual term and definition list items.
1212         #
1213                 $less_than_tab = $this->tab_width - 1;
1214                 
1215                 # trim trailing blank lines:
1216                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1218                 # Process definition terms.
1219                 $list_str = preg_replace_callback('{
1220                         (?>\A\n?|\n\n+)                                 # leading line
1221                         (                                                               # definition terms = $1
1222                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
1223                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
1224                                                                                         #   mark (colon) or more whitespace.
1225                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
1226                         )                       
1227                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
1228                                                                                         #   with a definition mark.
1229                         }xm',
1230                         array($this, '_processDefListItems_callback_dt'), $list_str);
1232                 # Process actual definitions.
1233                 $list_str = preg_replace_callback('{
1234                         \n(\n+)?                                                # leading line = $1
1235                         (                                                               # marker space = $2
1236                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
1237                                 \:[ ]+                                          # definition mark (colon)
1238                         )
1239                         ((?s:.+?))                                              # definition text = $3
1240                         (?= \n+                                                 # stop at next definition mark,
1241                                 (?:                                                     # next term or end of text
1242                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
1243                                         <dt> | \z
1244                                 )                                               
1245                         )                                       
1246                         }xm',
1247                         array($this, '_processDefListItems_callback_dd'), $list_str);
1249                 return $list_str;
1250         }
1251         protected function _processDefListItems_callback_dt($matches) {
1252                 $terms = explode("\n", trim($matches[1]));
1253                 $text = '';
1254                 foreach ($terms as $term) {
1255                         $term = $this->runSpanGamut(trim($term));
1256                         $text .= "\n<dt>" . $term . "</dt>";
1257                 }
1258                 return $text . "\n";
1259         }
1260         protected function _processDefListItems_callback_dd($matches) {
1261                 $leading_line   = $matches[1];
1262                 $marker_space   = $matches[2];
1263                 $def                    = $matches[3];
1265                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1266                         # Replace marker with the appropriate whitespace indentation
1267                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1268                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1269                         $def = "\n". $def ."\n";
1270                 }
1271                 else {
1272                         $def = rtrim($def);
1273                         $def = $this->runSpanGamut($this->outdent($def));
1274                 }
1276                 return "\n<dd>" . $def . "</dd>\n";
1277         }
1280         protected function doFencedCodeBlocks($text) {
1281         #
1282         # Adding the fenced code block syntax to regular Markdown:
1283         #
1284         # ~~~
1285         # Code block
1286         # ~~~
1287         #
1288                 $less_than_tab = $this->tab_width;
1289                 
1290                 $text = preg_replace_callback('{
1291                                 (?:\n|\A)
1292                                 # 1: Opening marker
1293                                 (
1294                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1295                                 )
1296                                 [ ]*
1297                                 (?:
1298                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1299                                 )?
1300                                 [ ]*
1301                                 (?:
1302                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1303                                 )?
1304                                 [ ]* \n # Whitespace and newline following marker.
1305                                 
1306                                 # 4: Content
1307                                 (
1308                                         (?>
1309                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1310                                                 .*\n+
1311                                         )+
1312                                 )
1313                                 
1314                                 # Closing marker.
1315                                 \1 [ ]* (?= \n )
1316                         }xm',
1317                         array($this, '_doFencedCodeBlocks_callback'), $text);
1319                 return $text;
1320         }
1321         protected function _doFencedCodeBlocks_callback($matches) {
1322                 $classname =& $matches[2];
1323                 $attrs     =& $matches[3];
1324                 $codeblock = $matches[4];
1326                 if ($this->code_block_content_func) {
1327                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1328                 } else {
1329                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1330                 }
1332                 $codeblock = preg_replace_callback('/^\n+/',
1333                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1335                 $classes = array();
1336                 if ($classname != "") {
1337                         if ($classname{0} == '.')
1338                                 $classname = substr($classname, 1);
1339                         $classes[] = $this->code_class_prefix.$classname;
1340                 }
1341                 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1342                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1343                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1344                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1345                 
1346                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1347         }
1348         protected function _doFencedCodeBlocks_newlines($matches) {
1349                 return str_repeat("<br$this->empty_element_suffix", 
1350                         strlen($matches[0]));
1351         }
1354         #
1355         # Redefining emphasis markers so that emphasis by underscore does not
1356         # work in the middle of a word.
1357         #
1358         protected $em_relist = array(
1359                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1360                 '*' => '(?<![\s*])\*(?!\*)',
1361                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1362                 );
1363         protected $strong_relist = array(
1364                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1365                 '**' => '(?<![\s*])\*\*(?!\*)',
1366                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1367                 );
1368         protected $em_strong_relist = array(
1369                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1370                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1371                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1372                 );
1375         protected function formParagraphs($text) {
1376         #
1377         #       Params:
1378         #               $text - string to process with html <p> tags
1379         #
1380                 # Strip leading and trailing lines:
1381                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1382                 
1383                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1385                 #
1386                 # Wrap <p> tags and unhashify HTML blocks
1387                 #
1388                 foreach ($grafs as $key => $value) {
1389                         $value = trim($this->runSpanGamut($value));
1390                         
1391                         # Check if this should be enclosed in a paragraph.
1392                         # Clean tag hashes & block tag hashes are left alone.
1393                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1394                         
1395                         if ($is_p) {
1396                                 $value = "<p>$value</p>";
1397                         }
1398                         $grafs[$key] = $value;
1399                 }
1400                 
1401                 # Join grafs in one text, then unhash HTML tags. 
1402                 $text = implode("\n\n", $grafs);
1403                 
1404                 # Finish by removing any tag hashes still present in $text.
1405                 $text = $this->unhash($text);
1406                 
1407                 return $text;
1408         }
1409         
1410         
1411         ### Footnotes
1412         
1413         protected function stripFootnotes($text) {
1414         #
1415         # Strips link definitions from text, stores the URLs and titles in
1416         # hash references.
1417         #
1418                 $less_than_tab = $this->tab_width - 1;
1420                 # Link defs are in the form: [^id]: url "optional title"
1421                 $text = preg_replace_callback('{
1422                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
1423                           [ ]*
1424                           \n?                                   # maybe *one* newline
1425                         (                                               # text = $2 (no blank lines allowed)
1426                                 (?:                                     
1427                                         .+                              # actual text
1428                                 |
1429                                         \n                              # newlines but 
1430                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1431                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
1432                                                                         # by non-indented content
1433                                 )*
1434                         )               
1435                         }xm',
1436                         array($this, '_stripFootnotes_callback'),
1437                         $text);
1438                 return $text;
1439         }
1440         protected function _stripFootnotes_callback($matches) {
1441                 $note_id = $this->fn_id_prefix . $matches[1];
1442                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1443                 return ''; # String that will replace the block
1444         }
1447         protected function doFootnotes($text) {
1448         #
1449         # Replace footnote references in $text [^id] with a special text-token 
1450         # which will be replaced by the actual footnote marker in appendFootnotes.
1451         #
1452                 if (!$this->in_anchor) {
1453                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1454                 }
1455                 return $text;
1456         }
1458         
1459         protected function appendFootnotes($text) {
1460         #
1461         # Append footnote list to text.
1462         #
1463                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1464                         array($this, '_appendFootnotes_callback'), $text);
1465         
1466                 if (!empty($this->footnotes_ordered)) {
1467                         $text .= "\n\n";
1468                         $text .= "<div class=\"footnotes\">\n";
1469                         $text .= "<hr". $this->empty_element_suffix ."\n";
1470                         $text .= "<ol>\n\n";
1472                         $attr = "";
1473                         if ($this->fn_backlink_class != "") {
1474                                 $class = $this->fn_backlink_class;
1475                                 $class = $this->encodeAttribute($class);
1476                                 $attr .= " class=\"$class\"";
1477                         }
1478                         if ($this->fn_backlink_title != "") {
1479                                 $title = $this->fn_backlink_title;
1480                                 $title = $this->encodeAttribute($title);
1481                                 $attr .= " title=\"$title\"";
1482                         }
1483                         $backlink_text = $this->fn_backlink_html;
1484                         $num = 0;
1485                         
1486                         while (!empty($this->footnotes_ordered)) {
1487                                 $footnote = reset($this->footnotes_ordered);
1488                                 $note_id = key($this->footnotes_ordered);
1489                                 unset($this->footnotes_ordered[$note_id]);
1490                                 $ref_count = $this->footnotes_ref_count[$note_id];
1491                                 unset($this->footnotes_ref_count[$note_id]);
1492                                 unset($this->footnotes[$note_id]);
1493                                 
1494                                 $footnote .= "\n"; # Need to append newline before parsing.
1495                                 $footnote = $this->runBlockGamut("$footnote\n");                                
1496                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1497                                         array($this, '_appendFootnotes_callback'), $footnote);
1498                                 
1499                                 $attr = str_replace("%%", ++$num, $attr);
1500                                 $note_id = $this->encodeAttribute($note_id);
1502                                 # Prepare backlink, multiple backlinks if multiple references
1503                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1504                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1505                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1506                                 }
1507                                 # Add backlink to last paragraph; create new paragraph if needed.
1508                                 if (preg_match('{</p>$}', $footnote)) {
1509                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1510                                 } else {
1511                                         $footnote .= "\n\n<p>$backlink</p>";
1512                                 }
1513                                 
1514                                 $text .= "<li id=\"fn:$note_id\">\n";
1515                                 $text .= $footnote . "\n";
1516                                 $text .= "</li>\n\n";
1517                         }
1518                         
1519                         $text .= "</ol>\n";
1520                         $text .= "</div>";
1521                 }
1522                 return $text;
1523         }
1524         protected function _appendFootnotes_callback($matches) {
1525                 $node_id = $this->fn_id_prefix . $matches[1];
1526                 
1527                 # Create footnote marker only if it has a corresponding footnote *and*
1528                 # the footnote hasn't been used by another marker.
1529                 if (isset($this->footnotes[$node_id])) {
1530                         $num =& $this->footnotes_numbers[$node_id];
1531                         if (!isset($num)) {
1532                                 # Transfer footnote content to the ordered list and give it its
1533                                 # number
1534                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1535                                 $this->footnotes_ref_count[$node_id] = 1;
1536                                 $num = $this->footnote_counter++;
1537                                 $ref_count_mark = '';
1538                         } else {
1539                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1540                         }
1542                         $attr = "";
1543                         if ($this->fn_link_class != "") {
1544                                 $class = $this->fn_link_class;
1545                                 $class = $this->encodeAttribute($class);
1546                                 $attr .= " class=\"$class\"";
1547                         }
1548                         if ($this->fn_link_title != "") {
1549                                 $title = $this->fn_link_title;
1550                                 $title = $this->encodeAttribute($title);
1551                                 $attr .= " title=\"$title\"";
1552                         }
1553                         
1554                         $attr = str_replace("%%", $num, $attr);
1555                         $node_id = $this->encodeAttribute($node_id);
1556                         
1557                         return
1558                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1559                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1560                                 "</sup>";
1561                 }
1562                 
1563                 return "[^".$matches[1]."]";
1564         }
1565                 
1566         
1567         ### Abbreviations ###
1568         
1569         protected function stripAbbreviations($text) {
1570         #
1571         # Strips abbreviations from text, stores titles in hash references.
1572         #
1573                 $less_than_tab = $this->tab_width - 1;
1575                 # Link defs are in the form: [id]*: url "optional title"
1576                 $text = preg_replace_callback('{
1577                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
1578                         (.*)                                    # text = $2 (no blank lines allowed)    
1579                         }xm',
1580                         array($this, '_stripAbbreviations_callback'),
1581                         $text);
1582                 return $text;
1583         }
1584         protected function _stripAbbreviations_callback($matches) {
1585                 $abbr_word = $matches[1];
1586                 $abbr_desc = $matches[2];
1587                 if ($this->abbr_word_re)
1588                         $this->abbr_word_re .= '|';
1589                 $this->abbr_word_re .= preg_quote($abbr_word);
1590                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1591                 return ''; # String that will replace the block
1592         }
1593         
1594         
1595         protected function doAbbreviations($text) {
1596         #
1597         # Find defined abbreviations in text and wrap them in <abbr> elements.
1598         #
1599                 if ($this->abbr_word_re) {
1600                         // cannot use the /x modifier because abbr_word_re may 
1601                         // contain significant spaces:
1602                         $text = preg_replace_callback('{'.
1603                                 '(?<![\w\x1A])'.
1604                                 '(?:'.$this->abbr_word_re.')'.
1605                                 '(?![\w\x1A])'.
1606                                 '}', 
1607                                 array($this, '_doAbbreviations_callback'), $text);
1608                 }
1609                 return $text;
1610         }
1611         protected function _doAbbreviations_callback($matches) {
1612                 $abbr = $matches[0];
1613                 if (isset($this->abbr_desciptions[$abbr])) {
1614                         $desc = $this->abbr_desciptions[$abbr];
1615                         if (empty($desc)) {
1616                                 return $this->hashPart("<abbr>$abbr</abbr>");
1617                         } else {
1618                                 $desc = $this->encodeAttribute($desc);
1619                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1620                         }
1621                 } else {
1622                         return $matches[0];
1623                 }
1624         }