3107a797c1869d18ac6d7c0608813b391aeac961
[moodle.git] / lib / markdown / MarkdownExtra.php
1 <?php
2 #
3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown Extra
6 # Copyright (c) 2004-2015 Michel Fortin  
7 # <https://michelf.ca/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
13 namespace Michelf;
16 #
17 # Markdown Extra Parser Class
18 #
20 class MarkdownExtra extends \Michelf\Markdown {
22         ### Configuration Variables ###
24         # Prefix for footnote ids.
25         public $fn_id_prefix = "";
26         
27         # Optional title attribute for footnote links and backlinks.
28         public $fn_link_title = "";
29         public $fn_backlink_title = "";
30         
31         # Optional class attribute for footnote links and backlinks.
32         public $fn_link_class = "footnote-ref";
33         public $fn_backlink_class = "footnote-backref";
35         # Class name for table cell alignment (%% replaced left/center/right)
36         # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
37         # If empty, the align attribute is used instead of a class name.
38         public $table_align_class_tmpl = '';
40         # Optional class prefix for fenced code block.
41         public $code_class_prefix = "";
42         # Class attribute for code blocks goes on the `code` tag;
43         # setting this to true will put attributes on the `pre` tag instead.
44         public $code_attr_on_pre = false;
45         
46         # Predefined abbreviations.
47         public $predef_abbr = array();
49         ### Parser Implementation ###
51         public function __construct() {
52         #
53         # Constructor function. Initialize the parser object.
54         #
55                 # Add extra escapable characters before parent constructor 
56                 # initialize the table.
57                 $this->escape_chars .= ':|';
58                 
59                 # Insert extra document, block, and span transformations. 
60                 # Parent constructor will do the sorting.
61                 $this->document_gamut += array(
62                         "doFencedCodeBlocks" => 5,
63                         "stripFootnotes"     => 15,
64                         "stripAbbreviations" => 25,
65                         "appendFootnotes"    => 50,
66                         );
67                 $this->block_gamut += array(
68                         "doFencedCodeBlocks" => 5,
69                         "doTables"           => 15,
70                         "doDefLists"         => 45,
71                         );
72                 $this->span_gamut += array(
73                         "doFootnotes"        => 5,
74                         "doAbbreviations"    => 70,
75                         );
76                 
77                 $this->enhanced_ordered_list = true;
78                 parent::__construct();
79         }
80         
81         
82         # Extra variables used during extra transformations.
83         protected $footnotes = array();
84         protected $footnotes_ordered = array();
85         protected $footnotes_ref_count = array();
86         protected $footnotes_numbers = array();
87         protected $abbr_desciptions = array();
88         protected $abbr_word_re = '';
89         
90         # Give the current footnote number.
91         protected $footnote_counter = 1;
92         
93         
94         protected function setup() {
95         #
96         # Setting up Extra-specific variables.
97         #
98                 parent::setup();
99                 
100                 $this->footnotes = array();
101                 $this->footnotes_ordered = array();
102                 $this->footnotes_ref_count = array();
103                 $this->footnotes_numbers = array();
104                 $this->abbr_desciptions = array();
105                 $this->abbr_word_re = '';
106                 $this->footnote_counter = 1;
107                 
108                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
109                         if ($this->abbr_word_re)
110                                 $this->abbr_word_re .= '|';
111                         $this->abbr_word_re .= preg_quote($abbr_word);
112                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
113                 }
114         }
115         
116         protected function teardown() {
117         #
118         # Clearing Extra-specific variables.
119         #
120                 $this->footnotes = array();
121                 $this->footnotes_ordered = array();
122                 $this->footnotes_ref_count = array();
123                 $this->footnotes_numbers = array();
124                 $this->abbr_desciptions = array();
125                 $this->abbr_word_re = '';
126                 
127                 parent::teardown();
128         }
129         
130         
131         ### Extra Attribute Parser ###
133         # Expression to use to catch attributes (includes the braces)
134         protected $id_class_attr_catch_re = '\{((?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
135         # Expression to use when parsing in a context when no capture is desired
136         protected $id_class_attr_nocatch_re = '\{(?:[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
138         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null) {
139         #
140         # Parse attributes caught by the $this->id_class_attr_catch_re expression
141         # and return the HTML-formatted list of attributes.
142         #
143         # Currently supported attributes are .class and #id.
144         #
145         # In addition, this method also supports supplying a default Id value,
146         # which will be used to populate the id attribute in case it was not
147         # overridden.
148                 if (empty($attr) && !$defaultIdValue) return "";
149                 
150                 # Split on components
151                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
152                 $elements = $matches[0];
154                 # handle classes and ids (only first id taken into account)
155                 $classes = array();
156                 $attributes = array();
157                 $id = false;
158                 foreach ($elements as $element) {
159                         if ($element{0} == '.') {
160                                 $classes[] = substr($element, 1);
161                         } else if ($element{0} == '#') {
162                                 if ($id === false) $id = substr($element, 1);
163                         } else if (strpos($element, '=') > 0) {
164                                 $parts = explode('=', $element, 2);
165                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
166                         }
167                 }
169                 if (!$id) $id = $defaultIdValue;
171                 # compose attributes as string
172                 $attr_str = "";
173                 if (!empty($id)) {
174                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
175                 }
176                 if (!empty($classes)) {
177                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
178                 }
179                 if (!$this->no_markup && !empty($attributes)) {
180                         $attr_str .= ' '.implode(" ", $attributes);
181                 }
182                 return $attr_str;
183         }
186         protected function stripLinkDefinitions($text) {
187         #
188         # Strips link definitions from text, stores the URLs and titles in
189         # hash references.
190         #
191                 $less_than_tab = $this->tab_width - 1;
193                 # Link defs are in the form: ^[id]: url "optional title"
194                 $text = preg_replace_callback('{
195                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
196                                                           [ ]*
197                                                           \n?                           # maybe *one* newline
198                                                           [ ]*
199                                                         (?:
200                                                           <(.+?)>                       # url = $2
201                                                         |
202                                                           (\S+?)                        # url = $3
203                                                         )
204                                                           [ ]*
205                                                           \n?                           # maybe one newline
206                                                           [ ]*
207                                                         (?:
208                                                                 (?<=\s)                 # lookbehind for whitespace
209                                                                 ["(]
210                                                                 (.*?)                   # title = $4
211                                                                 [")]
212                                                                 [ ]*
213                                                         )?      # title is optional
214                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
215                                                         (?:\n+|\Z)
216                         }xm',
217                         array($this, '_stripLinkDefinitions_callback'),
218                         $text);
219                 return $text;
220         }
221         protected function _stripLinkDefinitions_callback($matches) {
222                 $link_id = strtolower($matches[1]);
223                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
224                 $this->urls[$link_id] = $url;
225                 $this->titles[$link_id] =& $matches[4];
226                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
227                 return ''; # String that will replace the block
228         }
231         ### HTML Block Parser ###
232         
233         # Tags that are always treated as block tags:
234         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
235                                                    
236         # Tags treated as block tags only if the opening tag is alone on its line:
237         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
238         
239         # Tags where markdown="1" default to span mode:
240         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
241         
242         # Tags which must not have their contents modified, no matter where 
243         # they appear:
244         protected $clean_tags_re = 'script|style|math|svg';
245         
246         # Tags that do not need to be closed.
247         protected $auto_close_tags_re = 'hr|img|param|source|track';
248         
250         protected function hashHTMLBlocks($text) {
251         #
252         # Hashify HTML Blocks and "clean tags".
253         #
254         # We only want to do this for block-level HTML tags, such as headers,
255         # lists, and tables. That's because we still want to wrap <p>s around
256         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
257         # phrase emphasis, and spans. The list of tags we're looking for is
258         # hard-coded.
259         #
260         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
261         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
262         # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
263         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
264         # These two functions are calling each other. It's recursive!
265         #
266                 if ($this->no_markup)  return $text;
268                 #
269                 # Call the HTML-in-Markdown hasher.
270                 #
271                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
272                 
273                 return $text;
274         }
275         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
276                                                                                 $enclosing_tag_re = '', $span = false)
277         {
278         #
279         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
280         #
281         # *   $indent is the number of space to be ignored when checking for code 
282         #     blocks. This is important because if we don't take the indent into 
283         #     account, something like this (which looks right) won't work as expected:
284         #
285         #     <div>
286         #         <div markdown="1">
287         #         Hello World.  <-- Is this a Markdown code block or text?
288         #         </div>  <-- Is this a Markdown code block or a real tag?
289         #     <div>
290         #
291         #     If you don't like this, just don't indent the tag on which
292         #     you apply the markdown="1" attribute.
293         #
294         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
295         #     tag with that name. Nested tags supported.
296         #
297         # *   If $span is true, text inside must treated as span. So any double 
298         #     newline will be replaced by a single newline so that it does not create 
299         #     paragraphs.
300         #
301         # Returns an array of that form: ( processed text , remaining text )
302         #
303                 if ($text === '') return array('', '');
305                 # Regex to check for the presense of newlines around a block tag.
306                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
307                 $newline_after_re = 
308                         '{
309                                 ^                                               # Start of text following the tag.
310                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
311                                 [ ]*\n                                  # Must be followed by newline.
312                         }xs';
313                 
314                 # Regex to match any tag.
315                 $block_tag_re =
316                         '{
317                                 (                                       # $2: Capture whole tag.
318                                         </?                                     # Any opening or closing tag.
319                                                 (?>                             # Tag name.
320                                                         '.$this->block_tags_re.'                        |
321                                                         '.$this->context_block_tags_re.'        |
322                                                         '.$this->clean_tags_re.'                |
323                                                         (?!\s)'.$enclosing_tag_re.'
324                                                 )
325                                                 (?:
326                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
327                                                         (?>
328                                                                 ".*?"           |       # Double quotes (can contain `>`)
329                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
330                                                                 .+?                             # Anything but quotes and `>`.
331                                                         )*?
332                                                 )?
333                                         >                                       # End of tag.
334                                 |
335                                         <!--    .*?     -->     # HTML Comment
336                                 |
337                                         <\?.*?\?> | <%.*?%>     # Processing instruction
338                                 |
339                                         <!\[CDATA\[.*?\]\]>     # CData Block
340                                 '. ( !$span ? ' # If not in span.
341                                 |
342                                         # Indented code block
343                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
344                                         [ ]{'.($indent+4).'}[^\n]* \n
345                                         (?>
346                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
347                                         )*
348                                 |
349                                         # Fenced code block marker
350                                         (?<= ^ | \n )
351                                         [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
352                                                                         [ ]*
353                                         (?:
354                                         \.?[-_:a-zA-Z0-9]+ # standalone class name
355                                         |
356                                                 '.$this->id_class_attr_nocatch_re.' # extra attributes
357                                         )?
358                                         [ ]*
359                                         (?= \n )
360                                 ' : '' ). ' # End (if not is span).
361                                 |
362                                         # Code span marker
363                                         # Note, this regex needs to go after backtick fenced
364                                         # code blocks but it should also be kept outside of the
365                                         # "if not in span" condition adding backticks to the parser
366                                         `+
367                                 )
368                         }xs';
370                 
371                 $depth = 0;             # Current depth inside the tag tree.
372                 $parsed = "";   # Parsed text that will be returned.
374                 #
375                 # Loop through every tag until we find the closing tag of the parent
376                 # or loop until reaching the end of text if no parent tag specified.
377                 #
378                 do {
379                         #
380                         # Split the text using the first $tag_match pattern found.
381                         # Text before  pattern will be first in the array, text after
382                         # pattern will be at the end, and between will be any catches made 
383                         # by the pattern.
384                         #
385                         $parts = preg_split($block_tag_re, $text, 2, 
386                                                                 PREG_SPLIT_DELIM_CAPTURE);
387                         
388                         # If in Markdown span mode, add a empty-string span-level hash 
389                         # after each newline to prevent triggering any block element.
390                         if ($span) {
391                                 $void = $this->hashPart("", ':');
392                                 $newline = "$void\n";
393                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
394                         }
395                         
396                         $parsed .= $parts[0]; # Text before current tag.
397                         
398                         # If end of $text has been reached. Stop loop.
399                         if (count($parts) < 3) {
400                                 $text = "";
401                                 break;
402                         }
403                         
404                         $tag  = $parts[1]; # Tag to handle.
405                         $text = $parts[2]; # Remaining text after current tag.
406                         $tag_re = preg_quote($tag); # For use in a regular expression.
407                         
408                         #
409                         # Check for: Fenced code block marker.
410                         # Note: need to recheck the whole tag to disambiguate backtick
411                         # fences from code spans
412                         #
413                         if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
414                                 # Fenced code block marker: find matching end marker.
415                                 $fence_indent = strlen($capture[1]); # use captured indent in re
416                                 $fence_re = $capture[2]; # use captured fence in re
417                                 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
418                                         $matches)) 
419                                 {
420                                         # End marker found: pass text unchanged until marker.
421                                         $parsed .= $tag . $matches[0];
422                                         $text = substr($text, strlen($matches[0]));
423                                 }
424                                 else {
425                                         # No end marker: just skip it.
426                                         $parsed .= $tag;
427                                 }
428                         }
429                         #
430                         # Check for: Indented code block.
431                         #
432                         else if ($tag{0} == "\n" || $tag{0} == " ") {
433                                 # Indented code block: pass it unchanged, will be handled 
434                                 # later.
435                                 $parsed .= $tag;
436                         }
437                         #
438                         # Check for: Code span marker
439                         # Note: need to check this after backtick fenced code blocks
440                         #
441                         else if ($tag{0} == "`") {
442                                 # Find corresponding end marker.
443                                 $tag_re = preg_quote($tag);
444                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
445                                         $text, $matches))
446                                 {
447                                         # End marker found: pass text unchanged until marker.
448                                         $parsed .= $tag . $matches[0];
449                                         $text = substr($text, strlen($matches[0]));
450                                 }
451                                 else {
452                                         # Unmatched marker: just skip it.
453                                         $parsed .= $tag;
454                                 }
455                         }
456                         #
457                         # Check for: Opening Block level tag or
458                         #            Opening Context Block tag (like ins and del) 
459                         #               used as a block tag (tag is alone on it's line).
460                         #
461                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
462                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
463                                         preg_match($newline_before_re, $parsed) &&
464                                         preg_match($newline_after_re, $text)    )
465                                 )
466                         {
467                                 # Need to parse tag and following text using the HTML parser.
468                                 list($block_text, $text) = 
469                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
470                                 
471                                 # Make sure it stays outside of any paragraph by adding newlines.
472                                 $parsed .= "\n\n$block_text\n\n";
473                         }
474                         #
475                         # Check for: Clean tag (like script, math)
476                         #            HTML Comments, processing instructions.
477                         #
478                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
479                                 $tag{1} == '!' || $tag{1} == '?')
480                         {
481                                 # Need to parse tag and following text using the HTML parser.
482                                 # (don't check for markdown attribute)
483                                 list($block_text, $text) = 
484                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
485                                 
486                                 $parsed .= $block_text;
487                         }
488                         #
489                         # Check for: Tag with same name as enclosing tag.
490                         #
491                         else if ($enclosing_tag_re !== '' &&
492                                 # Same name as enclosing tag.
493                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
494                         {
495                                 #
496                                 # Increase/decrease nested tag count.
497                                 #
498                                 if ($tag{1} == '/')                                             $depth--;
499                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
501                                 if ($depth < 0) {
502                                         #
503                                         # Going out of parent element. Clean up and break so we
504                                         # return to the calling function.
505                                         #
506                                         $text = $tag . $text;
507                                         break;
508                                 }
509                                 
510                                 $parsed .= $tag;
511                         }
512                         else {
513                                 $parsed .= $tag;
514                         }
515                 } while ($depth >= 0);
516                 
517                 return array($parsed, $text);
518         }
519         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
520         #
521         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
522         #
523         # *   Calls $hash_method to convert any blocks.
524         # *   Stops when the first opening tag closes.
525         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
526         #     (it is not inside clean tags)
527         #
528         # Returns an array of that form: ( processed text , remaining text )
529         #
530                 if ($text === '') return array('', '');
531                 
532                 # Regex to match `markdown` attribute inside of a tag.
533                 $markdown_attr_re = '
534                         {
535                                 \s*                     # Eat whitespace before the `markdown` attribute
536                                 markdown
537                                 \s*=\s*
538                                 (?>
539                                         (["\'])         # $1: quote delimiter           
540                                         (.*?)           # $2: attribute value
541                                         \1                      # matching delimiter    
542                                 |
543                                         ([^\s>]*)       # $3: unquoted attribute value
544                                 )
545                                 ()                              # $4: make $3 always defined (avoid warnings)
546                         }xs';
547                 
548                 # Regex to match any tag.
549                 $tag_re = '{
550                                 (                                       # $2: Capture whole tag.
551                                         </?                                     # Any opening or closing tag.
552                                                 [\w:$]+                 # Tag name.
553                                                 (?:
554                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
555                                                         (?>
556                                                                 ".*?"           |       # Double quotes (can contain `>`)
557                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
558                                                                 .+?                             # Anything but quotes and `>`.
559                                                         )*?
560                                                 )?
561                                         >                                       # End of tag.
562                                 |
563                                         <!--    .*?     -->     # HTML Comment
564                                 |
565                                         <\?.*?\?> | <%.*?%>     # Processing instruction
566                                 |
567                                         <!\[CDATA\[.*?\]\]>     # CData Block
568                                 )
569                         }xs';
570                 
571                 $original_text = $text;         # Save original text in case of faliure.
572                 
573                 $depth          = 0;    # Current depth inside the tag tree.
574                 $block_text     = "";   # Temporary text holder for current text.
575                 $parsed         = "";   # Parsed text that will be returned.
577                 #
578                 # Get the name of the starting tag.
579                 # (This pattern makes $base_tag_name_re safe without quoting.)
580                 #
581                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
582                         $base_tag_name_re = $matches[1];
584                 #
585                 # Loop through every tag until we find the corresponding closing tag.
586                 #
587                 do {
588                         #
589                         # Split the text using the first $tag_match pattern found.
590                         # Text before  pattern will be first in the array, text after
591                         # pattern will be at the end, and between will be any catches made 
592                         # by the pattern.
593                         #
594                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
595                         
596                         if (count($parts) < 3) {
597                                 #
598                                 # End of $text reached with unbalenced tag(s).
599                                 # In that case, we return original text unchanged and pass the
600                                 # first character as filtered to prevent an infinite loop in the 
601                                 # parent function.
602                                 #
603                                 return array($original_text{0}, substr($original_text, 1));
604                         }
605                         
606                         $block_text .= $parts[0]; # Text before current tag.
607                         $tag         = $parts[1]; # Tag to handle.
608                         $text        = $parts[2]; # Remaining text after current tag.
609                         
610                         #
611                         # Check for: Auto-close tag (like <hr/>)
612                         #                        Comments and Processing Instructions.
613                         #
614                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
615                                 $tag{1} == '!' || $tag{1} == '?')
616                         {
617                                 # Just add the tag to the block as if it was text.
618                                 $block_text .= $tag;
619                         }
620                         else {
621                                 #
622                                 # Increase/decrease nested tag count. Only do so if
623                                 # the tag's name match base tag's.
624                                 #
625                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
626                                         if ($tag{1} == '/')                                             $depth--;
627                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
628                                 }
629                                 
630                                 #
631                                 # Check for `markdown="1"` attribute and handle it.
632                                 #
633                                 if ($md_attr && 
634                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
635                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
636                                 {
637                                         # Remove `markdown` attribute from opening tag.
638                                         $tag = preg_replace($markdown_attr_re, '', $tag);
639                                         
640                                         # Check if text inside this tag must be parsed in span mode.
641                                         $this->mode = $attr_m[2] . $attr_m[3];
642                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
643                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
644                                         
645                                         # Calculate indent before tag.
646                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
647                                                 $strlen = $this->utf8_strlen;
648                                                 $indent = $strlen($matches[1], 'UTF-8');
649                                         } else {
650                                                 $indent = 0;
651                                         }
652                                         
653                                         # End preceding block with this tag.
654                                         $block_text .= $tag;
655                                         $parsed .= $this->$hash_method($block_text);
656                                         
657                                         # Get enclosing tag name for the ParseMarkdown function.
658                                         # (This pattern makes $tag_name_re safe without quoting.)
659                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
660                                         $tag_name_re = $matches[1];
661                                         
662                                         # Parse the content using the HTML-in-Markdown parser.
663                                         list ($block_text, $text)
664                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
665                                                         $tag_name_re, $span_mode);
666                                         
667                                         # Outdent markdown text.
668                                         if ($indent > 0) {
669                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
670                                                                                                         $block_text);
671                                         }
672                                         
673                                         # Append tag content to parsed text.
674                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
675                                         else                            $parsed .= "$block_text";
676                                         
677                                         # Start over with a new block.
678                                         $block_text = "";
679                                 }
680                                 else $block_text .= $tag;
681                         }
682                         
683                 } while ($depth > 0);
684                 
685                 #
686                 # Hash last block text that wasn't processed inside the loop.
687                 #
688                 $parsed .= $this->$hash_method($block_text);
689                 
690                 return array($parsed, $text);
691         }
694         protected function hashClean($text) {
695         #
696         # Called whenever a tag must be hashed when a function inserts a "clean" tag
697         # in $text, it passes through this function and is automaticaly escaped, 
698         # blocking invalid nested overlap.
699         #
700                 return $this->hashPart($text, 'C');
701         }
704         protected function doAnchors($text) {
705         #
706         # Turn Markdown link shortcuts into XHTML <a> tags.
707         #
708                 if ($this->in_anchor) return $text;
709                 $this->in_anchor = true;
710                 
711                 #
712                 # First, handle reference-style links: [link text] [id]
713                 #
714                 $text = preg_replace_callback('{
715                         (                                       # wrap whole match in $1
716                           \[
717                                 ('.$this->nested_brackets_re.') # link text = $2
718                           \]
720                           [ ]?                          # one optional space
721                           (?:\n[ ]*)?           # one optional newline followed by spaces
723                           \[
724                                 (.*?)           # id = $3
725                           \]
726                         )
727                         }xs',
728                         array($this, '_doAnchors_reference_callback'), $text);
730                 #
731                 # Next, inline-style links: [link text](url "optional title")
732                 #
733                 $text = preg_replace_callback('{
734                         (                               # wrap whole match in $1
735                           \[
736                                 ('.$this->nested_brackets_re.') # link text = $2
737                           \]
738                           \(                    # literal paren
739                                 [ \n]*
740                                 (?:
741                                         <(.+?)> # href = $3
742                                 |
743                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
744                                 )
745                                 [ \n]*
746                                 (                       # $5
747                                   ([\'"])       # quote char = $6
748                                   (.*?)         # Title = $7
749                                   \6            # matching quote
750                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
751                                 )?                      # title is optional
752                           \)
753                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
754                         )
755                         }xs',
756                         array($this, '_doAnchors_inline_callback'), $text);
758                 #
759                 # Last, handle reference-style shortcuts: [link text]
760                 # These must come last in case you've also got [link text][1]
761                 # or [link text](/foo)
762                 #
763                 $text = preg_replace_callback('{
764                         (                                       # wrap whole match in $1
765                           \[
766                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
767                           \]
768                         )
769                         }xs',
770                         array($this, '_doAnchors_reference_callback'), $text);
772                 $this->in_anchor = false;
773                 return $text;
774         }
775         protected function _doAnchors_reference_callback($matches) {
776                 $whole_match =  $matches[1];
777                 $link_text   =  $matches[2];
778                 $link_id     =& $matches[3];
780                 if ($link_id == "") {
781                         # for shortcut links like [this][] or [this].
782                         $link_id = $link_text;
783                 }
784                 
785                 # lower-case and turn embedded newlines into spaces
786                 $link_id = strtolower($link_id);
787                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
789                 if (isset($this->urls[$link_id])) {
790                         $url = $this->urls[$link_id];
791                         $url = $this->encodeURLAttribute($url);
792                         
793                         $result = "<a href=\"$url\"";
794                         if ( isset( $this->titles[$link_id] ) ) {
795                                 $title = $this->titles[$link_id];
796                                 $title = $this->encodeAttribute($title);
797                                 $result .=  " title=\"$title\"";
798                         }
799                         if (isset($this->ref_attr[$link_id]))
800                                 $result .= $this->ref_attr[$link_id];
801                 
802                         $link_text = $this->runSpanGamut($link_text);
803                         $result .= ">$link_text</a>";
804                         $result = $this->hashPart($result);
805                 }
806                 else {
807                         $result = $whole_match;
808                 }
809                 return $result;
810         }
811         protected function _doAnchors_inline_callback($matches) {
812                 $whole_match    =  $matches[1];
813                 $link_text              =  $this->runSpanGamut($matches[2]);
814                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
815                 $title                  =& $matches[7];
816                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
818                 // if the URL was of the form <s p a c e s> it got caught by the HTML
819                 // tag parser and hashed. Need to reverse the process before using the URL.
820                 $unhashed = $this->unhash($url);
821                 if ($unhashed != $url)
822                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
824                 $url = $this->encodeURLAttribute($url);
826                 $result = "<a href=\"$url\"";
827                 if (isset($title)) {
828                         $title = $this->encodeAttribute($title);
829                         $result .=  " title=\"$title\"";
830                 }
831                 $result .= $attr;
832                 
833                 $link_text = $this->runSpanGamut($link_text);
834                 $result .= ">$link_text</a>";
836                 return $this->hashPart($result);
837         }
840         protected function doImages($text) {
841         #
842         # Turn Markdown image shortcuts into <img> tags.
843         #
844                 #
845                 # First, handle reference-style labeled images: ![alt text][id]
846                 #
847                 $text = preg_replace_callback('{
848                         (                               # wrap whole match in $1
849                           !\[
850                                 ('.$this->nested_brackets_re.')         # alt text = $2
851                           \]
853                           [ ]?                          # one optional space
854                           (?:\n[ ]*)?           # one optional newline followed by spaces
856                           \[
857                                 (.*?)           # id = $3
858                           \]
860                         )
861                         }xs', 
862                         array($this, '_doImages_reference_callback'), $text);
864                 #
865                 # Next, handle inline images:  ![alt text](url "optional title")
866                 # Don't forget: encode * and _
867                 #
868                 $text = preg_replace_callback('{
869                         (                               # wrap whole match in $1
870                           !\[
871                                 ('.$this->nested_brackets_re.')         # alt text = $2
872                           \]
873                           \s?                   # One optional whitespace character
874                           \(                    # literal paren
875                                 [ \n]*
876                                 (?:
877                                         <(\S*)> # src url = $3
878                                 |
879                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
880                                 )
881                                 [ \n]*
882                                 (                       # $5
883                                   ([\'"])       # quote char = $6
884                                   (.*?)         # title = $7
885                                   \6            # matching quote
886                                   [ \n]*
887                                 )?                      # title is optional
888                           \)
889                           (?:[ ]? '.$this->id_class_attr_catch_re.' )?   # $8 = id/class attributes
890                         )
891                         }xs',
892                         array($this, '_doImages_inline_callback'), $text);
894                 return $text;
895         }
896         protected function _doImages_reference_callback($matches) {
897                 $whole_match = $matches[1];
898                 $alt_text    = $matches[2];
899                 $link_id     = strtolower($matches[3]);
901                 if ($link_id == "") {
902                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
903                 }
905                 $alt_text = $this->encodeAttribute($alt_text);
906                 if (isset($this->urls[$link_id])) {
907                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
908                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
909                         if (isset($this->titles[$link_id])) {
910                                 $title = $this->titles[$link_id];
911                                 $title = $this->encodeAttribute($title);
912                                 $result .=  " title=\"$title\"";
913                         }
914                         if (isset($this->ref_attr[$link_id]))
915                                 $result .= $this->ref_attr[$link_id];
916                         $result .= $this->empty_element_suffix;
917                         $result = $this->hashPart($result);
918                 }
919                 else {
920                         # If there's no such link ID, leave intact:
921                         $result = $whole_match;
922                 }
924                 return $result;
925         }
926         protected function _doImages_inline_callback($matches) {
927                 $whole_match    = $matches[1];
928                 $alt_text               = $matches[2];
929                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
930                 $title                  =& $matches[7];
931                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
933                 $alt_text = $this->encodeAttribute($alt_text);
934                 $url = $this->encodeURLAttribute($url);
935                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
936                 if (isset($title)) {
937                         $title = $this->encodeAttribute($title);
938                         $result .=  " title=\"$title\""; # $title already quoted
939                 }
940                 $result .= $attr;
941                 $result .= $this->empty_element_suffix;
943                 return $this->hashPart($result);
944         }
947         protected function doHeaders($text) {
948         #
949         # Redefined to add id and class attribute support.
950         #
951                 # Setext-style headers:
952                 #         Header 1  {#header1}
953                 #         ========
954                 #  
955                 #         Header 2  {#header2 .class1 .class2}
956                 #         --------
957                 #
958                 $text = preg_replace_callback(
959                         '{
960                                 (^.+?)                                                          # $1: Header text
961                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
962                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
963                         }mx',
964                         array($this, '_doHeaders_callback_setext'), $text);
966                 # atx-style headers:
967                 #       # Header 1        {#header1}
968                 #       ## Header 2       {#header2}
969                 #       ## Header 2 with closing hashes ##  {#header3.class1.class2}
970                 #       ...
971                 #       ###### Header 6   {.class2}
972                 #
973                 $text = preg_replace_callback('{
974                                 ^(\#{1,6})      # $1 = string of #\'s
975                                 [ ]*
976                                 (.+?)           # $2 = Header text
977                                 [ ]*
978                                 \#*                     # optional closing #\'s (not counted)
979                                 (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
980                                 [ ]*
981                                 \n+
982                         }xm',
983                         array($this, '_doHeaders_callback_atx'), $text);
985                 return $text;
986         }
987         protected function _doHeaders_callback_setext($matches) {
988                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
989                         return $matches[0];
991                 $level = $matches[3]{0} == '=' ? 1 : 2;
993                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
995                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
996                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
997                 return "\n" . $this->hashBlock($block) . "\n\n";
998         }
999         protected function _doHeaders_callback_atx($matches) {
1000                 $level = strlen($matches[1]);
1002                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1003                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1004                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1005                 return "\n" . $this->hashBlock($block) . "\n\n";
1006         }
1009         protected function doTables($text) {
1010         #
1011         # Form HTML tables.
1012         #
1013                 $less_than_tab = $this->tab_width - 1;
1014                 #
1015                 # Find tables with leading pipe.
1016                 #
1017                 #       | Header 1 | Header 2
1018                 #       | -------- | --------
1019                 #       | Cell 1   | Cell 2
1020                 #       | Cell 3   | Cell 4
1021                 #
1022                 $text = preg_replace_callback('
1023                         {
1024                                 ^                                                       # Start of a line
1025                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1026                                 [|]                                                     # Optional leading pipe (present)
1027                                 (.+) \n                                         # $1: Header row (at least one pipe)
1028                                 
1029                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1030                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1031                                 
1032                                 (                                                       # $3: Cells
1033                                         (?>
1034                                                 [ ]*                            # Allowed whitespace.
1035                                                 [|] .* \n                       # Row content.
1036                                         )*
1037                                 )
1038                                 (?=\n|\Z)                                       # Stop at final double newline.
1039                         }xm',
1040                         array($this, '_doTable_leadingPipe_callback'), $text);
1041                 
1042                 #
1043                 # Find tables without leading pipe.
1044                 #
1045                 #       Header 1 | Header 2
1046                 #       -------- | --------
1047                 #       Cell 1   | Cell 2
1048                 #       Cell 3   | Cell 4
1049                 #
1050                 $text = preg_replace_callback('
1051                         {
1052                                 ^                                                       # Start of a line
1053                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1054                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1055                                 
1056                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
1057                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1058                                 
1059                                 (                                                       # $3: Cells
1060                                         (?>
1061                                                 .* [|] .* \n            # Row content
1062                                         )*
1063                                 )
1064                                 (?=\n|\Z)                                       # Stop at final double newline.
1065                         }xm',
1066                         array($this, '_DoTable_callback'), $text);
1068                 return $text;
1069         }
1070         protected function _doTable_leadingPipe_callback($matches) {
1071                 $head           = $matches[1];
1072                 $underline      = $matches[2];
1073                 $content        = $matches[3];
1074                 
1075                 # Remove leading pipe for each row.
1076                 $content        = preg_replace('/^ *[|]/m', '', $content);
1077                 
1078                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1079         }
1080         protected function _doTable_makeAlignAttr($alignname)
1081         {
1082                 if (empty($this->table_align_class_tmpl))
1083                         return " align=\"$alignname\"";
1085                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1086                 return " class=\"$classname\"";
1087         }
1088         protected function _doTable_callback($matches) {
1089                 $head           = $matches[1];
1090                 $underline      = $matches[2];
1091                 $content        = $matches[3];
1093                 # Remove any tailing pipes for each line.
1094                 $head           = preg_replace('/[|] *$/m', '', $head);
1095                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1096                 $content        = preg_replace('/[|] *$/m', '', $content);
1097                 
1098                 # Reading alignement from header underline.
1099                 $separators     = preg_split('/ *[|] */', $underline);
1100                 foreach ($separators as $n => $s) {
1101                         if (preg_match('/^ *-+: *$/', $s))
1102                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1103                         else if (preg_match('/^ *:-+: *$/', $s))
1104                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1105                         else if (preg_match('/^ *:-+ *$/', $s))
1106                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1107                         else
1108                                 $attr[$n] = '';
1109                 }
1110                 
1111                 # Parsing span elements, including code spans, character escapes, 
1112                 # and inline HTML tags, so that pipes inside those gets ignored.
1113                 $head           = $this->parseSpan($head);
1114                 $headers        = preg_split('/ *[|] */', $head);
1115                 $col_count      = count($headers);
1116                 $attr       = array_pad($attr, $col_count, '');
1117                 
1118                 # Write column headers.
1119                 $text = "<table>\n";
1120                 $text .= "<thead>\n";
1121                 $text .= "<tr>\n";
1122                 foreach ($headers as $n => $header)
1123                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1124                 $text .= "</tr>\n";
1125                 $text .= "</thead>\n";
1126                 
1127                 # Split content by row.
1128                 $rows = explode("\n", trim($content, "\n"));
1129                 
1130                 $text .= "<tbody>\n";
1131                 foreach ($rows as $row) {
1132                         # Parsing span elements, including code spans, character escapes, 
1133                         # and inline HTML tags, so that pipes inside those gets ignored.
1134                         $row = $this->parseSpan($row);
1135                         
1136                         # Split row by cell.
1137                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1138                         $row_cells = array_pad($row_cells, $col_count, '');
1139                         
1140                         $text .= "<tr>\n";
1141                         foreach ($row_cells as $n => $cell)
1142                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1143                         $text .= "</tr>\n";
1144                 }
1145                 $text .= "</tbody>\n";
1146                 $text .= "</table>";
1147                 
1148                 return $this->hashBlock($text) . "\n";
1149         }
1151         
1152         protected function doDefLists($text) {
1153         #
1154         # Form HTML definition lists.
1155         #
1156                 $less_than_tab = $this->tab_width - 1;
1158                 # Re-usable pattern to match any entire dl list:
1159                 $whole_list_re = '(?>
1160                         (                                                               # $1 = whole list
1161                           (                                                             # $2
1162                                 [ ]{0,'.$less_than_tab.'}
1163                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1164                                 \n?
1165                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1166                           )
1167                           (?s:.+?)
1168                           (                                                             # $4
1169                                   \z
1170                                 |
1171                                   \n{2,}
1172                                   (?=\S)
1173                                   (?!                                           # Negative lookahead for another term
1174                                         [ ]{0,'.$less_than_tab.'}
1175                                         (?: \S.*\n )+?                  # defined term
1176                                         \n?
1177                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1178                                   )
1179                                   (?!                                           # Negative lookahead for another definition
1180                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1181                                   )
1182                           )
1183                         )
1184                 )'; // mx
1186                 $text = preg_replace_callback('{
1187                                 (?>\A\n?|(?<=\n\n))
1188                                 '.$whole_list_re.'
1189                         }mx',
1190                         array($this, '_doDefLists_callback'), $text);
1192                 return $text;
1193         }
1194         protected function _doDefLists_callback($matches) {
1195                 # Re-usable patterns to match list item bullets and number markers:
1196                 $list = $matches[1];
1197                 
1198                 # Turn double returns into triple returns, so that we can make a
1199                 # paragraph for the last item in a list, if necessary:
1200                 $result = trim($this->processDefListItems($list));
1201                 $result = "<dl>\n" . $result . "\n</dl>";
1202                 return $this->hashBlock($result) . "\n\n";
1203         }
1206         protected function processDefListItems($list_str) {
1207         #
1208         #       Process the contents of a single definition list, splitting it
1209         #       into individual term and definition list items.
1210         #
1211                 $less_than_tab = $this->tab_width - 1;
1212                 
1213                 # trim trailing blank lines:
1214                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1216                 # Process definition terms.
1217                 $list_str = preg_replace_callback('{
1218                         (?>\A\n?|\n\n+)                                 # leading line
1219                         (                                                               # definition terms = $1
1220                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
1221                                 (?!\:[ ]|[ ])                           # negative lookahead for a definition
1222                                                                                         #   mark (colon) or more whitespace.
1223                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
1224                         )                       
1225                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
1226                                                                                         #   with a definition mark.
1227                         }xm',
1228                         array($this, '_processDefListItems_callback_dt'), $list_str);
1230                 # Process actual definitions.
1231                 $list_str = preg_replace_callback('{
1232                         \n(\n+)?                                                # leading line = $1
1233                         (                                                               # marker space = $2
1234                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
1235                                 \:[ ]+                                          # definition mark (colon)
1236                         )
1237                         ((?s:.+?))                                              # definition text = $3
1238                         (?= \n+                                                 # stop at next definition mark,
1239                                 (?:                                                     # next term or end of text
1240                                         [ ]{0,'.$less_than_tab.'} \:[ ] |
1241                                         <dt> | \z
1242                                 )                                               
1243                         )                                       
1244                         }xm',
1245                         array($this, '_processDefListItems_callback_dd'), $list_str);
1247                 return $list_str;
1248         }
1249         protected function _processDefListItems_callback_dt($matches) {
1250                 $terms = explode("\n", trim($matches[1]));
1251                 $text = '';
1252                 foreach ($terms as $term) {
1253                         $term = $this->runSpanGamut(trim($term));
1254                         $text .= "\n<dt>" . $term . "</dt>";
1255                 }
1256                 return $text . "\n";
1257         }
1258         protected function _processDefListItems_callback_dd($matches) {
1259                 $leading_line   = $matches[1];
1260                 $marker_space   = $matches[2];
1261                 $def                    = $matches[3];
1263                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1264                         # Replace marker with the appropriate whitespace indentation
1265                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1266                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1267                         $def = "\n". $def ."\n";
1268                 }
1269                 else {
1270                         $def = rtrim($def);
1271                         $def = $this->runSpanGamut($this->outdent($def));
1272                 }
1274                 return "\n<dd>" . $def . "</dd>\n";
1275         }
1278         protected function doFencedCodeBlocks($text) {
1279         #
1280         # Adding the fenced code block syntax to regular Markdown:
1281         #
1282         # ~~~
1283         # Code block
1284         # ~~~
1285         #
1286                 $less_than_tab = $this->tab_width;
1287                 
1288                 $text = preg_replace_callback('{
1289                                 (?:\n|\A)
1290                                 # 1: Opening marker
1291                                 (
1292                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1293                                 )
1294                                 [ ]*
1295                                 (?:
1296                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1297                                 |
1298                                         '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1299                                 )?
1300                                 [ ]* \n # Whitespace and newline following marker.
1301                                 
1302                                 # 4: Content
1303                                 (
1304                                         (?>
1305                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1306                                                 .*\n+
1307                                         )+
1308                                 )
1309                                 
1310                                 # Closing marker.
1311                                 \1 [ ]* (?= \n )
1312                         }xm',
1313                         array($this, '_doFencedCodeBlocks_callback'), $text);
1315                 return $text;
1316         }
1317         protected function _doFencedCodeBlocks_callback($matches) {
1318                 $classname =& $matches[2];
1319                 $attrs     =& $matches[3];
1320                 $codeblock = $matches[4];
1321                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1322                 $codeblock = preg_replace_callback('/^\n+/',
1323                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1325                 if ($classname != "") {
1326                         if ($classname{0} == '.')
1327                                 $classname = substr($classname, 1);
1328                         $attr_str = ' class="'.$this->code_class_prefix.$classname.'"';
1329                 } else {
1330                         $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs);
1331                 }
1332                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1333                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1334                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1335                 
1336                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1337         }
1338         protected function _doFencedCodeBlocks_newlines($matches) {
1339                 return str_repeat("<br$this->empty_element_suffix", 
1340                         strlen($matches[0]));
1341         }
1344         #
1345         # Redefining emphasis markers so that emphasis by underscore does not
1346         # work in the middle of a word.
1347         #
1348         protected $em_relist = array(
1349                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1350                 '*' => '(?<![\s*])\*(?!\*)',
1351                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1352                 );
1353         protected $strong_relist = array(
1354                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1355                 '**' => '(?<![\s*])\*\*(?!\*)',
1356                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1357                 );
1358         protected $em_strong_relist = array(
1359                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1360                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1361                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1362                 );
1365         protected function formParagraphs($text) {
1366         #
1367         #       Params:
1368         #               $text - string to process with html <p> tags
1369         #
1370                 # Strip leading and trailing lines:
1371                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1372                 
1373                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1375                 #
1376                 # Wrap <p> tags and unhashify HTML blocks
1377                 #
1378                 foreach ($grafs as $key => $value) {
1379                         $value = trim($this->runSpanGamut($value));
1380                         
1381                         # Check if this should be enclosed in a paragraph.
1382                         # Clean tag hashes & block tag hashes are left alone.
1383                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1384                         
1385                         if ($is_p) {
1386                                 $value = "<p>$value</p>";
1387                         }
1388                         $grafs[$key] = $value;
1389                 }
1390                 
1391                 # Join grafs in one text, then unhash HTML tags. 
1392                 $text = implode("\n\n", $grafs);
1393                 
1394                 # Finish by removing any tag hashes still present in $text.
1395                 $text = $this->unhash($text);
1396                 
1397                 return $text;
1398         }
1399         
1400         
1401         ### Footnotes
1402         
1403         protected function stripFootnotes($text) {
1404         #
1405         # Strips link definitions from text, stores the URLs and titles in
1406         # hash references.
1407         #
1408                 $less_than_tab = $this->tab_width - 1;
1410                 # Link defs are in the form: [^id]: url "optional title"
1411                 $text = preg_replace_callback('{
1412                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
1413                           [ ]*
1414                           \n?                                   # maybe *one* newline
1415                         (                                               # text = $2 (no blank lines allowed)
1416                                 (?:                                     
1417                                         .+                              # actual text
1418                                 |
1419                                         \n                              # newlines but 
1420                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1421                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
1422                                                                         # by non-indented content
1423                                 )*
1424                         )               
1425                         }xm',
1426                         array($this, '_stripFootnotes_callback'),
1427                         $text);
1428                 return $text;
1429         }
1430         protected function _stripFootnotes_callback($matches) {
1431                 $note_id = $this->fn_id_prefix . $matches[1];
1432                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1433                 return ''; # String that will replace the block
1434         }
1437         protected function doFootnotes($text) {
1438         #
1439         # Replace footnote references in $text [^id] with a special text-token 
1440         # which will be replaced by the actual footnote marker in appendFootnotes.
1441         #
1442                 if (!$this->in_anchor) {
1443                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1444                 }
1445                 return $text;
1446         }
1448         
1449         protected function appendFootnotes($text) {
1450         #
1451         # Append footnote list to text.
1452         #
1453                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1454                         array($this, '_appendFootnotes_callback'), $text);
1455         
1456                 if (!empty($this->footnotes_ordered)) {
1457                         $text .= "\n\n";
1458                         $text .= "<div class=\"footnotes\">\n";
1459                         $text .= "<hr". $this->empty_element_suffix ."\n";
1460                         $text .= "<ol>\n\n";
1462                         $attr = "";
1463                         if ($this->fn_backlink_class != "") {
1464                                 $class = $this->fn_backlink_class;
1465                                 $class = $this->encodeAttribute($class);
1466                                 $attr .= " class=\"$class\"";
1467                         }
1468                         if ($this->fn_backlink_title != "") {
1469                                 $title = $this->fn_backlink_title;
1470                                 $title = $this->encodeAttribute($title);
1471                                 $attr .= " title=\"$title\"";
1472                         }
1473                         $num = 0;
1474                         
1475                         while (!empty($this->footnotes_ordered)) {
1476                                 $footnote = reset($this->footnotes_ordered);
1477                                 $note_id = key($this->footnotes_ordered);
1478                                 unset($this->footnotes_ordered[$note_id]);
1479                                 $ref_count = $this->footnotes_ref_count[$note_id];
1480                                 unset($this->footnotes_ref_count[$note_id]);
1481                                 unset($this->footnotes[$note_id]);
1482                                 
1483                                 $footnote .= "\n"; # Need to append newline before parsing.
1484                                 $footnote = $this->runBlockGamut("$footnote\n");                                
1485                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1486                                         array($this, '_appendFootnotes_callback'), $footnote);
1487                                 
1488                                 $attr = str_replace("%%", ++$num, $attr);
1489                                 $note_id = $this->encodeAttribute($note_id);
1491                                 # Prepare backlink, multiple backlinks if multiple references
1492                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
1493                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1494                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>&#8617;</a>";
1495                                 }
1496                                 # Add backlink to last paragraph; create new paragraph if needed.
1497                                 if (preg_match('{</p>$}', $footnote)) {
1498                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1499                                 } else {
1500                                         $footnote .= "\n\n<p>$backlink</p>";
1501                                 }
1502                                 
1503                                 $text .= "<li id=\"fn:$note_id\">\n";
1504                                 $text .= $footnote . "\n";
1505                                 $text .= "</li>\n\n";
1506                         }
1507                         
1508                         $text .= "</ol>\n";
1509                         $text .= "</div>";
1510                 }
1511                 return $text;
1512         }
1513         protected function _appendFootnotes_callback($matches) {
1514                 $node_id = $this->fn_id_prefix . $matches[1];
1515                 
1516                 # Create footnote marker only if it has a corresponding footnote *and*
1517                 # the footnote hasn't been used by another marker.
1518                 if (isset($this->footnotes[$node_id])) {
1519                         $num =& $this->footnotes_numbers[$node_id];
1520                         if (!isset($num)) {
1521                                 # Transfer footnote content to the ordered list and give it its
1522                                 # number
1523                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1524                                 $this->footnotes_ref_count[$node_id] = 1;
1525                                 $num = $this->footnote_counter++;
1526                                 $ref_count_mark = '';
1527                         } else {
1528                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1529                         }
1531                         $attr = "";
1532                         if ($this->fn_link_class != "") {
1533                                 $class = $this->fn_link_class;
1534                                 $class = $this->encodeAttribute($class);
1535                                 $attr .= " class=\"$class\"";
1536                         }
1537                         if ($this->fn_link_title != "") {
1538                                 $title = $this->fn_link_title;
1539                                 $title = $this->encodeAttribute($title);
1540                                 $attr .= " title=\"$title\"";
1541                         }
1542                         
1543                         $attr = str_replace("%%", $num, $attr);
1544                         $node_id = $this->encodeAttribute($node_id);
1545                         
1546                         return
1547                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1548                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1549                                 "</sup>";
1550                 }
1551                 
1552                 return "[^".$matches[1]."]";
1553         }
1554                 
1555         
1556         ### Abbreviations ###
1557         
1558         protected function stripAbbreviations($text) {
1559         #
1560         # Strips abbreviations from text, stores titles in hash references.
1561         #
1562                 $less_than_tab = $this->tab_width - 1;
1564                 # Link defs are in the form: [id]*: url "optional title"
1565                 $text = preg_replace_callback('{
1566                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
1567                         (.*)                                    # text = $2 (no blank lines allowed)    
1568                         }xm',
1569                         array($this, '_stripAbbreviations_callback'),
1570                         $text);
1571                 return $text;
1572         }
1573         protected function _stripAbbreviations_callback($matches) {
1574                 $abbr_word = $matches[1];
1575                 $abbr_desc = $matches[2];
1576                 if ($this->abbr_word_re)
1577                         $this->abbr_word_re .= '|';
1578                 $this->abbr_word_re .= preg_quote($abbr_word);
1579                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1580                 return ''; # String that will replace the block
1581         }
1582         
1583         
1584         protected function doAbbreviations($text) {
1585         #
1586         # Find defined abbreviations in text and wrap them in <abbr> elements.
1587         #
1588                 if ($this->abbr_word_re) {
1589                         // cannot use the /x modifier because abbr_word_re may 
1590                         // contain significant spaces:
1591                         $text = preg_replace_callback('{'.
1592                                 '(?<![\w\x1A])'.
1593                                 '(?:'.$this->abbr_word_re.')'.
1594                                 '(?![\w\x1A])'.
1595                                 '}', 
1596                                 array($this, '_doAbbreviations_callback'), $text);
1597                 }
1598                 return $text;
1599         }
1600         protected function _doAbbreviations_callback($matches) {
1601                 $abbr = $matches[0];
1602                 if (isset($this->abbr_desciptions[$abbr])) {
1603                         $desc = $this->abbr_desciptions[$abbr];
1604                         if (empty($desc)) {
1605                                 return $this->hashPart("<abbr>$abbr</abbr>");
1606                         } else {
1607                                 $desc = $this->encodeAttribute($desc);
1608                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1609                         }
1610                 } else {
1611                         return $matches[0];
1612                 }
1613         }