MDL-24552 importing latest markdown extra 1.2.4
[moodle.git] / lib / markdown.php
1 <?php
2 #
3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2009 Michel Fortin  
7 # <http://michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
15 define( 'MARKDOWN_VERSION',  "1.0.1n" ); # Sat 10 Oct 2009
16 define( 'MARKDOWNEXTRA_VERSION',  "1.2.4" ); # Sat 10 Oct 2009
19 #
20 # Global default settings:
21 #
23 # Change to ">" for HTML output
24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
26 # Define the width of a tab for code blocks.
27 @define( 'MARKDOWN_TAB_WIDTH',     4 );
29 # Optional title attribute for footnote links and backlinks.
30 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
31 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
33 # Optional class attribute for footnote links and backlinks.
34 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
35 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
38 #
39 # WordPress settings:
40 #
42 # Change to false to remove Markdown from posts and/or comments.
43 @define( 'MARKDOWN_WP_POSTS',      true );
44 @define( 'MARKDOWN_WP_COMMENTS',   true );
48 ### Standard Function Interface ###
50 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
52 function Markdown($text) {
53 #
54 # Initialize the parser and return the result of its transform method.
55 #
56         # Setup static parser variable.
57         static $parser;
58         if (!isset($parser)) {
59                 $parser_class = MARKDOWN_PARSER_CLASS;
60                 $parser = new $parser_class;
61         }
63         # Transform text using parser.
64         return $parser->transform($text);
65 }
68 ### WordPress Plugin Interface ###
70 /*
71 Plugin Name: Markdown Extra
72 Plugin URI: http://michelf.com/projects/php-markdown/
73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
74 Version: 1.2.4
75 Author: Michel Fortin
76 Author URI: http://michelf.com/
77 */
79 if (isset($wp_version)) {
80         # More details about how it works here:
81         # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
82         
83         # Post content and excerpts
84         # - Remove WordPress paragraph generator.
85         # - Run Markdown on excerpt, then remove all tags.
86         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
87         if (MARKDOWN_WP_POSTS) {
88                 remove_filter('the_content',     'wpautop');
89         remove_filter('the_content_rss', 'wpautop');
90                 remove_filter('the_excerpt',     'wpautop');
91                 add_filter('the_content',     'mdwp_MarkdownPost', 6);
92         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
93                 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
94                 add_filter('get_the_excerpt', 'trim', 7);
95                 add_filter('the_excerpt',     'mdwp_add_p');
96                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
97                 
98                 remove_filter('content_save_pre',  'balanceTags', 50);
99                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
100                 add_filter('the_content',         'balanceTags', 50);
101                 add_filter('get_the_excerpt', 'balanceTags', 9);
102         }
103         
104         # Add a footnote id prefix to posts when inside a loop.
105         function mdwp_MarkdownPost($text) {
106                 static $parser;
107                 if (!$parser) {
108                         $parser_class = MARKDOWN_PARSER_CLASS;
109                         $parser = new $parser_class;
110                 }
111                 if (is_single() || is_page() || is_feed()) {
112                         $parser->fn_id_prefix = "";
113                 } else {
114                         $parser->fn_id_prefix = get_the_ID() . ".";
115                 }
116                 return $parser->transform($text);
117         }
118         
119         # Comments
120         # - Remove WordPress paragraph generator.
121         # - Remove WordPress auto-link generator.
122         # - Scramble important tags before passing them to the kses filter.
123         # - Run Markdown on excerpt then remove paragraph tags.
124         if (MARKDOWN_WP_COMMENTS) {
125                 remove_filter('comment_text', 'wpautop', 30);
126                 remove_filter('comment_text', 'make_clickable');
127                 add_filter('pre_comment_content', 'Markdown', 6);
128                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
129                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
130                 add_filter('get_comment_text',    'Markdown', 6);
131                 add_filter('get_comment_excerpt', 'Markdown', 6);
132                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
133         
134                 global $mdwp_hidden_tags, $mdwp_placeholders;
135                 $mdwp_hidden_tags = explode(' ',
136                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
137                 $mdwp_placeholders = explode(' ', str_rot13(
138                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
139                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
140         }
141         
142         function mdwp_add_p($text) {
143                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
144                         $text = '<p>'.$text.'</p>';
145                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
146                 }
147                 return $text;
148         }
149         
150         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
152         function mdwp_hide_tags($text) {
153                 global $mdwp_hidden_tags, $mdwp_placeholders;
154                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
155         }
156         function mdwp_show_tags($text) {
157                 global $mdwp_hidden_tags, $mdwp_placeholders;
158                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
159         }
163 ### bBlog Plugin Info ###
165 function identify_modifier_markdown() {
166         return array(
167                 'name' => 'markdown',
168                 'type' => 'modifier',
169                 'nicename' => 'PHP Markdown Extra',
170                 'description' => 'A text-to-HTML conversion tool for web writers',
171                 'authors' => 'Michel Fortin and John Gruber',
172                 'licence' => 'GPL',
173                 'version' => MARKDOWNEXTRA_VERSION,
174                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
175                 );
179 ### Smarty Modifier Interface ###
181 function smarty_modifier_markdown($text) {
182         return Markdown($text);
186 ### Textile Compatibility Mode ###
188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
191         # Try to include PHP SmartyPants. Should be in the same directory.
192         @include_once 'smartypants.php';
193         # Fake Textile class. It calls Markdown instead.
194         class Textile {
195                 function TextileThis($text, $lite='', $encode='') {
196                         if ($lite == '' && $encode == '')    $text = Markdown($text);
197                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
198                         return $text;
199                 }
200                 # Fake restricted version: restrictions are not supported for now.
201                 function TextileRestricted($text, $lite='', $noimage='') {
202                         return $this->TextileThis($text, $lite);
203                 }
204                 # Workaround to ensure compatibility with TextPattern 4.0.3.
205                 function blockLite($text) { return $text; }
206         }
212 # Markdown Parser Class
215 class Markdown_Parser {
217         # Regex to match balanced [brackets].
218         # Needed to insert a maximum bracked depth while converting to PHP.
219         var $nested_brackets_depth = 6;
220         var $nested_brackets_re;
221         
222         var $nested_url_parenthesis_depth = 4;
223         var $nested_url_parenthesis_re;
225         # Table of hash values for escaped characters:
226         var $escape_chars = '\`*_{}[]()>#+-.!';
227         var $escape_chars_re;
229         # Change to ">" for HTML output.
230         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
231         var $tab_width = MARKDOWN_TAB_WIDTH;
232         
233         # Change to `true` to disallow markup or entities.
234         var $no_markup = false;
235         var $no_entities = false;
236         
237         # Predefined urls and titles for reference links and images.
238         var $predef_urls = array();
239         var $predef_titles = array();
242         function Markdown_Parser() {
243         #
244         # Constructor function. Initialize appropriate member variables.
245         #
246                 $this->_initDetab();
247                 $this->prepareItalicsAndBold();
248         
249                 $this->nested_brackets_re = 
250                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
251                         str_repeat('\])*', $this->nested_brackets_depth);
252         
253                 $this->nested_url_parenthesis_re = 
254                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
255                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
256                 
257                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
258                 
259                 # Sort document, block, and span gamut in ascendent priority order.
260                 asort($this->document_gamut);
261                 asort($this->block_gamut);
262                 asort($this->span_gamut);
263         }
266         # Internal hashes used during transformation.
267         var $urls = array();
268         var $titles = array();
269         var $html_hashes = array();
270         
271         # Status flag to avoid invalid nesting.
272         var $in_anchor = false;
273         
274         
275         function setup() {
276         #
277         # Called before the transformation process starts to setup parser 
278         # states.
279         #
280                 # Clear global hashes.
281                 $this->urls = $this->predef_urls;
282                 $this->titles = $this->predef_titles;
283                 $this->html_hashes = array();
284                 
285                 $in_anchor = false;
286         }
287         
288         function teardown() {
289         #
290         # Called after the transformation process to clear any variable 
291         # which may be taking up memory unnecessarly.
292         #
293                 $this->urls = array();
294                 $this->titles = array();
295                 $this->html_hashes = array();
296         }
299         function transform($text) {
300         #
301         # Main function. Performs some preprocessing on the input text
302         # and pass it through the document gamut.
303         #
304                 $this->setup();
305         
306                 # Remove UTF-8 BOM and marker character in input, if present.
307                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
309                 # Standardize line endings:
310                 #   DOS to Unix and Mac to Unix
311                 $text = preg_replace('{\r\n?}', "\n", $text);
313                 # Make sure $text ends with a couple of newlines:
314                 $text .= "\n\n";
316                 # Convert all tabs to spaces.
317                 $text = $this->detab($text);
319                 # Turn block-level HTML blocks into hash entries
320                 $text = $this->hashHTMLBlocks($text);
322                 # Strip any lines consisting only of spaces and tabs.
323                 # This makes subsequent regexen easier to write, because we can
324                 # match consecutive blank lines with /\n+/ instead of something
325                 # contorted like /[ ]*\n+/ .
326                 $text = preg_replace('/^[ ]+$/m', '', $text);
328                 # Run document gamut methods.
329                 foreach ($this->document_gamut as $method => $priority) {
330                         $text = $this->$method($text);
331                 }
332                 
333                 $this->teardown();
335                 return $text . "\n";
336         }
337         
338         var $document_gamut = array(
339                 # Strip link definitions, store in hashes.
340                 "stripLinkDefinitions" => 20,
341                 
342                 "runBasicBlockGamut"   => 30,
343                 );
346         function stripLinkDefinitions($text) {
347         #
348         # Strips link definitions from text, stores the URLs and titles in
349         # hash references.
350         #
351                 $less_than_tab = $this->tab_width - 1;
353                 # Link defs are in the form: ^[id]: url "optional title"
354                 $text = preg_replace_callback('{
355                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
356                                                           [ ]*
357                                                           \n?                           # maybe *one* newline
358                                                           [ ]*
359                                                         (?:
360                                                           <(.+?)>                       # url = $2
361                                                         |
362                                                           (\S+?)                        # url = $3
363                                                         )
364                                                           [ ]*
365                                                           \n?                           # maybe one newline
366                                                           [ ]*
367                                                         (?:
368                                                                 (?<=\s)                 # lookbehind for whitespace
369                                                                 ["(]
370                                                                 (.*?)                   # title = $4
371                                                                 [")]
372                                                                 [ ]*
373                                                         )?      # title is optional
374                                                         (?:\n+|\Z)
375                         }xm',
376                         array(&$this, '_stripLinkDefinitions_callback'),
377                         $text);
378                 return $text;
379         }
380         function _stripLinkDefinitions_callback($matches) {
381                 $link_id = strtolower($matches[1]);
382                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
383                 $this->urls[$link_id] = $url;
384                 $this->titles[$link_id] =& $matches[4];
385                 return ''; # String that will replace the block
386         }
389         function hashHTMLBlocks($text) {
390                 if ($this->no_markup)  return $text;
392                 $less_than_tab = $this->tab_width - 1;
394                 # Hashify HTML blocks:
395                 # We only want to do this for block-level HTML tags, such as headers,
396                 # lists, and tables. That's because we still want to wrap <p>s around
397                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
398                 # phrase emphasis, and spans. The list of tags we're looking for is
399                 # hard-coded:
400                 #
401                 # *  List "a" is made of tags which can be both inline or block-level.
402                 #    These will be treated block-level when the start tag is alone on 
403                 #    its line, otherwise they're not matched here and will be taken as 
404                 #    inline later.
405                 # *  List "b" is made of tags which are always block-level;
406                 #
407                 $block_tags_a_re = 'ins|del';
408                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
409                                                    'script|noscript|form|fieldset|iframe|math';
411                 # Regular expression for the content of a block tag.
412                 $nested_tags_level = 4;
413                 $attr = '
414                         (?>                             # optional tag attributes
415                           \s                    # starts with whitespace
416                           (?>
417                                 [^>"/]+         # text outside quotes
418                           |
419                                 /+(?!>)         # slash not followed by ">"
420                           |
421                                 "[^"]*"         # text inside double quotes (tolerate ">")
422                           |
423                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
424                           )*
425                         )?      
426                         ';
427                 $content =
428                         str_repeat('
429                                 (?>
430                                   [^<]+                 # content without tag
431                                 |
432                                   <\2                   # nested opening tag
433                                         '.$attr.'       # attributes
434                                         (?>
435                                           />
436                                         |
437                                           >', $nested_tags_level).      # end of opening tag
438                                           '.*?'.                                        # last level nested tag content
439                         str_repeat('
440                                           </\2\s*>      # closing nested tag
441                                         )
442                                   |                             
443                                         <(?!/\2\s*>     # other tags with a different name
444                                   )
445                                 )*',
446                                 $nested_tags_level);
447                 $content2 = str_replace('\2', '\3', $content);
449                 # First, look for nested blocks, e.g.:
450                 #       <div>
451                 #               <div>
452                 #               tags for inner block must be indented.
453                 #               </div>
454                 #       </div>
455                 #
456                 # The outermost tags must start at the left margin for this to match, and
457                 # the inner nested divs must be indented.
458                 # We need to do this before the next, more liberal match, because the next
459                 # match will start at the first `<div>` and stop at the first `</div>`.
460                 $text = preg_replace_callback('{(?>
461                         (?>
462                                 (?<=\n\n)               # Starting after a blank line
463                                 |                               # or
464                                 \A\n?                   # the beginning of the doc
465                         )
466                         (                                               # save in $1
468                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
469                           # in between.
470                                         
471                                                 [ ]{0,'.$less_than_tab.'}
472                                                 <('.$block_tags_b_re.')# start tag = $2
473                                                 '.$attr.'>                      # attributes followed by > and \n
474                                                 '.$content.'            # content, support nesting
475                                                 </\2>                           # the matching end tag
476                                                 [ ]*                            # trailing spaces/tabs
477                                                 (?=\n+|\Z)      # followed by a newline or end of document
479                         | # Special version for tags of group a.
481                                                 [ ]{0,'.$less_than_tab.'}
482                                                 <('.$block_tags_a_re.')# start tag = $3
483                                                 '.$attr.'>[ ]*\n        # attributes followed by >
484                                                 '.$content2.'           # content, support nesting
485                                                 </\3>                           # the matching end tag
486                                                 [ ]*                            # trailing spaces/tabs
487                                                 (?=\n+|\Z)      # followed by a newline or end of document
488                                         
489                         | # Special case just for <hr />. It was easier to make a special 
490                           # case than to make the other regex more complicated.
491                         
492                                                 [ ]{0,'.$less_than_tab.'}
493                                                 <(hr)                           # start tag = $2
494                                                 '.$attr.'                       # attributes
495                                                 /?>                                     # the matching end tag
496                                                 [ ]*
497                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
498                         
499                         | # Special case for standalone HTML comments:
500                         
501                                         [ ]{0,'.$less_than_tab.'}
502                                         (?s:
503                                                 <!-- .*? -->
504                                         )
505                                         [ ]*
506                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
507                         
508                         | # PHP and ASP-style processor instructions (<? and <%)
509                         
510                                         [ ]{0,'.$less_than_tab.'}
511                                         (?s:
512                                                 <([?%])                 # $2
513                                                 .*?
514                                                 \2>
515                                         )
516                                         [ ]*
517                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
518                                         
519                         )
520                         )}Sxmi',
521                         array(&$this, '_hashHTMLBlocks_callback'),
522                         $text);
524                 return $text;
525         }
526         function _hashHTMLBlocks_callback($matches) {
527                 $text = $matches[1];
528                 $key  = $this->hashBlock($text);
529                 return "\n\n$key\n\n";
530         }
531         
532         
533         function hashPart($text, $boundary = 'X') {
534         #
535         # Called whenever a tag must be hashed when a function insert an atomic 
536         # element in the text stream. Passing $text to through this function gives
537         # a unique text-token which will be reverted back when calling unhash.
538         #
539         # The $boundary argument specify what character should be used to surround
540         # the token. By convension, "B" is used for block elements that needs not
541         # to be wrapped into paragraph tags at the end, ":" is used for elements
542         # that are word separators and "X" is used in the general case.
543         #
544                 # Swap back any tag hash found in $text so we do not have to `unhash`
545                 # multiple times at the end.
546                 $text = $this->unhash($text);
547                 
548                 # Then hash the block.
549                 static $i = 0;
550                 $key = "$boundary\x1A" . ++$i . $boundary;
551                 $this->html_hashes[$key] = $text;
552                 return $key; # String that will replace the tag.
553         }
556         function hashBlock($text) {
557         #
558         # Shortcut function for hashPart with block-level boundaries.
559         #
560                 return $this->hashPart($text, 'B');
561         }
564         var $block_gamut = array(
565         #
566         # These are all the transformations that form block-level
567         # tags like paragraphs, headers, and list items.
568         #
569                 "doHeaders"         => 10,
570                 "doHorizontalRules" => 20,
571                 
572                 "doLists"           => 40,
573                 "doCodeBlocks"      => 50,
574                 "doBlockQuotes"     => 60,
575                 );
577         function runBlockGamut($text) {
578         #
579         # Run block gamut tranformations.
580         #
581                 # We need to escape raw HTML in Markdown source before doing anything 
582                 # else. This need to be done for each block, and not only at the 
583                 # begining in the Markdown function since hashed blocks can be part of
584                 # list items and could have been indented. Indented blocks would have 
585                 # been seen as a code block in a previous pass of hashHTMLBlocks.
586                 $text = $this->hashHTMLBlocks($text);
587                 
588                 return $this->runBasicBlockGamut($text);
589         }
590         
591         function runBasicBlockGamut($text) {
592         #
593         # Run block gamut tranformations, without hashing HTML blocks. This is 
594         # useful when HTML blocks are known to be already hashed, like in the first
595         # whole-document pass.
596         #
597                 foreach ($this->block_gamut as $method => $priority) {
598                         $text = $this->$method($text);
599                 }
600                 
601                 # Finally form paragraph and restore hashed blocks.
602                 $text = $this->formParagraphs($text);
604                 return $text;
605         }
606         
607         
608         function doHorizontalRules($text) {
609                 # Do Horizontal Rules:
610                 return preg_replace(
611                         '{
612                                 ^[ ]{0,3}       # Leading space
613                                 ([-*_])         # $1: First marker
614                                 (?>                     # Repeated marker group
615                                         [ ]{0,2}        # Zero, one, or two spaces.
616                                         \1                      # Marker character
617                                 ){2,}           # Group repeated at least twice
618                                 [ ]*            # Tailing spaces
619                                 $                       # End of line.
620                         }mx',
621                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
622                         $text);
623         }
626         var $span_gamut = array(
627         #
628         # These are all the transformations that occur *within* block-level
629         # tags like paragraphs, headers, and list items.
630         #
631                 # Process character escapes, code spans, and inline HTML
632                 # in one shot.
633                 "parseSpan"           => -30,
635                 # Process anchor and image tags. Images must come first,
636                 # because ![foo][f] looks like an anchor.
637                 "doImages"            =>  10,
638                 "doAnchors"           =>  20,
639                 
640                 # Make links out of things like `<http://example.com/>`
641                 # Must come after doAnchors, because you can use < and >
642                 # delimiters in inline links like [this](<url>).
643                 "doAutoLinks"         =>  30,
644                 "encodeAmpsAndAngles" =>  40,
646                 "doItalicsAndBold"    =>  50,
647                 "doHardBreaks"        =>  60,
648                 );
650         function runSpanGamut($text) {
651         #
652         # Run span gamut tranformations.
653         #
654                 foreach ($this->span_gamut as $method => $priority) {
655                         $text = $this->$method($text);
656                 }
658                 return $text;
659         }
660         
661         
662         function doHardBreaks($text) {
663                 # Do hard breaks:
664                 return preg_replace_callback('/ {2,}\n/', 
665                         array(&$this, '_doHardBreaks_callback'), $text);
666         }
667         function _doHardBreaks_callback($matches) {
668                 return $this->hashPart("<br$this->empty_element_suffix\n");
669         }
672         function doAnchors($text) {
673         #
674         # Turn Markdown link shortcuts into XHTML <a> tags.
675         #
676                 if ($this->in_anchor) return $text;
677                 $this->in_anchor = true;
678                 
679                 #
680                 # First, handle reference-style links: [link text] [id]
681                 #
682                 $text = preg_replace_callback('{
683                         (                                       # wrap whole match in $1
684                           \[
685                                 ('.$this->nested_brackets_re.') # link text = $2
686                           \]
688                           [ ]?                          # one optional space
689                           (?:\n[ ]*)?           # one optional newline followed by spaces
691                           \[
692                                 (.*?)           # id = $3
693                           \]
694                         )
695                         }xs',
696                         array(&$this, '_doAnchors_reference_callback'), $text);
698                 #
699                 # Next, inline-style links: [link text](url "optional title")
700                 #
701                 $text = preg_replace_callback('{
702                         (                               # wrap whole match in $1
703                           \[
704                                 ('.$this->nested_brackets_re.') # link text = $2
705                           \]
706                           \(                    # literal paren
707                                 [ \n]*
708                                 (?:
709                                         <(.+?)> # href = $3
710                                 |
711                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
712                                 )
713                                 [ \n]*
714                                 (                       # $5
715                                   ([\'"])       # quote char = $6
716                                   (.*?)         # Title = $7
717                                   \6            # matching quote
718                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
719                                 )?                      # title is optional
720                           \)
721                         )
722                         }xs',
723                         array(&$this, '_doAnchors_inline_callback'), $text);
725                 #
726                 # Last, handle reference-style shortcuts: [link text]
727                 # These must come last in case you've also got [link text][1]
728                 # or [link text](/foo)
729                 #
730                 $text = preg_replace_callback('{
731                         (                                       # wrap whole match in $1
732                           \[
733                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
734                           \]
735                         )
736                         }xs',
737                         array(&$this, '_doAnchors_reference_callback'), $text);
739                 $this->in_anchor = false;
740                 return $text;
741         }
742         function _doAnchors_reference_callback($matches) {
743                 $whole_match =  $matches[1];
744                 $link_text   =  $matches[2];
745                 $link_id     =& $matches[3];
747                 if ($link_id == "") {
748                         # for shortcut links like [this][] or [this].
749                         $link_id = $link_text;
750                 }
751                 
752                 # lower-case and turn embedded newlines into spaces
753                 $link_id = strtolower($link_id);
754                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
756                 if (isset($this->urls[$link_id])) {
757                         $url = $this->urls[$link_id];
758                         $url = $this->encodeAttribute($url);
759                         
760                         $result = "<a href=\"$url\"";
761                         if ( isset( $this->titles[$link_id] ) ) {
762                                 $title = $this->titles[$link_id];
763                                 $title = $this->encodeAttribute($title);
764                                 $result .=  " title=\"$title\"";
765                         }
766                 
767                         $link_text = $this->runSpanGamut($link_text);
768                         $result .= ">$link_text</a>";
769                         $result = $this->hashPart($result);
770                 }
771                 else {
772                         $result = $whole_match;
773                 }
774                 return $result;
775         }
776         function _doAnchors_inline_callback($matches) {
777                 $whole_match    =  $matches[1];
778                 $link_text              =  $this->runSpanGamut($matches[2]);
779                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
780                 $title                  =& $matches[7];
782                 $url = $this->encodeAttribute($url);
784                 $result = "<a href=\"$url\"";
785                 if (isset($title)) {
786                         $title = $this->encodeAttribute($title);
787                         $result .=  " title=\"$title\"";
788                 }
789                 
790                 $link_text = $this->runSpanGamut($link_text);
791                 $result .= ">$link_text</a>";
793                 return $this->hashPart($result);
794         }
797         function doImages($text) {
798         #
799         # Turn Markdown image shortcuts into <img> tags.
800         #
801                 #
802                 # First, handle reference-style labeled images: ![alt text][id]
803                 #
804                 $text = preg_replace_callback('{
805                         (                               # wrap whole match in $1
806                           !\[
807                                 ('.$this->nested_brackets_re.')         # alt text = $2
808                           \]
810                           [ ]?                          # one optional space
811                           (?:\n[ ]*)?           # one optional newline followed by spaces
813                           \[
814                                 (.*?)           # id = $3
815                           \]
817                         )
818                         }xs', 
819                         array(&$this, '_doImages_reference_callback'), $text);
821                 #
822                 # Next, handle inline images:  ![alt text](url "optional title")
823                 # Don't forget: encode * and _
824                 #
825                 $text = preg_replace_callback('{
826                         (                               # wrap whole match in $1
827                           !\[
828                                 ('.$this->nested_brackets_re.')         # alt text = $2
829                           \]
830                           \s?                   # One optional whitespace character
831                           \(                    # literal paren
832                                 [ \n]*
833                                 (?:
834                                         <(\S*)> # src url = $3
835                                 |
836                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
837                                 )
838                                 [ \n]*
839                                 (                       # $5
840                                   ([\'"])       # quote char = $6
841                                   (.*?)         # title = $7
842                                   \6            # matching quote
843                                   [ \n]*
844                                 )?                      # title is optional
845                           \)
846                         )
847                         }xs',
848                         array(&$this, '_doImages_inline_callback'), $text);
850                 return $text;
851         }
852         function _doImages_reference_callback($matches) {
853                 $whole_match = $matches[1];
854                 $alt_text    = $matches[2];
855                 $link_id     = strtolower($matches[3]);
857                 if ($link_id == "") {
858                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
859                 }
861                 $alt_text = $this->encodeAttribute($alt_text);
862                 if (isset($this->urls[$link_id])) {
863                         $url = $this->encodeAttribute($this->urls[$link_id]);
864                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
865                         if (isset($this->titles[$link_id])) {
866                                 $title = $this->titles[$link_id];
867                                 $title = $this->encodeAttribute($title);
868                                 $result .=  " title=\"$title\"";
869                         }
870                         $result .= $this->empty_element_suffix;
871                         $result = $this->hashPart($result);
872                 }
873                 else {
874                         # If there's no such link ID, leave intact:
875                         $result = $whole_match;
876                 }
878                 return $result;
879         }
880         function _doImages_inline_callback($matches) {
881                 $whole_match    = $matches[1];
882                 $alt_text               = $matches[2];
883                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
884                 $title                  =& $matches[7];
886                 $alt_text = $this->encodeAttribute($alt_text);
887                 $url = $this->encodeAttribute($url);
888                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
889                 if (isset($title)) {
890                         $title = $this->encodeAttribute($title);
891                         $result .=  " title=\"$title\""; # $title already quoted
892                 }
893                 $result .= $this->empty_element_suffix;
895                 return $this->hashPart($result);
896         }
899         function doHeaders($text) {
900                 # Setext-style headers:
901                 #         Header 1
902                 #         ========
903                 #  
904                 #         Header 2
905                 #         --------
906                 #
907                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
908                         array(&$this, '_doHeaders_callback_setext'), $text);
910                 # atx-style headers:
911                 #       # Header 1
912                 #       ## Header 2
913                 #       ## Header 2 with closing hashes ##
914                 #       ...
915                 #       ###### Header 6
916                 #
917                 $text = preg_replace_callback('{
918                                 ^(\#{1,6})      # $1 = string of #\'s
919                                 [ ]*
920                                 (.+?)           # $2 = Header text
921                                 [ ]*
922                                 \#*                     # optional closing #\'s (not counted)
923                                 \n+
924                         }xm',
925                         array(&$this, '_doHeaders_callback_atx'), $text);
927                 return $text;
928         }
929         function _doHeaders_callback_setext($matches) {
930                 # Terrible hack to check we haven't found an empty list item.
931                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
932                         return $matches[0];
933                 
934                 $level = $matches[2]{0} == '=' ? 1 : 2;
935                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
936                 return "\n" . $this->hashBlock($block) . "\n\n";
937         }
938         function _doHeaders_callback_atx($matches) {
939                 $level = strlen($matches[1]);
940                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
941                 return "\n" . $this->hashBlock($block) . "\n\n";
942         }
945         function doLists($text) {
946         #
947         # Form HTML ordered (numbered) and unordered (bulleted) lists.
948         #
949                 $less_than_tab = $this->tab_width - 1;
951                 # Re-usable patterns to match list item bullets and number markers:
952                 $marker_ul_re  = '[*+-]';
953                 $marker_ol_re  = '\d+[.]';
954                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
956                 $markers_relist = array(
957                         $marker_ul_re => $marker_ol_re,
958                         $marker_ol_re => $marker_ul_re,
959                         );
961                 foreach ($markers_relist as $marker_re => $other_marker_re) {
962                         # Re-usable pattern to match any entirel ul or ol list:
963                         $whole_list_re = '
964                                 (                                                               # $1 = whole list
965                                   (                                                             # $2
966                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
967                                         ('.$marker_re.')                        # $4 = first list item marker
968                                         [ ]+
969                                   )
970                                   (?s:.+?)
971                                   (                                                             # $5
972                                           \z
973                                         |
974                                           \n{2,}
975                                           (?=\S)
976                                           (?!                                           # Negative lookahead for another list item marker
977                                                 [ ]*
978                                                 '.$marker_re.'[ ]+
979                                           )
980                                         |
981                                           (?=                                           # Lookahead for another kind of list
982                                             \n
983                                                 \3                                              # Must have the same indentation
984                                                 '.$other_marker_re.'[ ]+
985                                           )
986                                   )
987                                 )
988                         '; // mx
989                         
990                         # We use a different prefix before nested lists than top-level lists.
991                         # See extended comment in _ProcessListItems().
992                 
993                         if ($this->list_level) {
994                                 $text = preg_replace_callback('{
995                                                 ^
996                                                 '.$whole_list_re.'
997                                         }mx',
998                                         array(&$this, '_doLists_callback'), $text);
999                         }
1000                         else {
1001                                 $text = preg_replace_callback('{
1002                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1003                                                 '.$whole_list_re.'
1004                                         }mx',
1005                                         array(&$this, '_doLists_callback'), $text);
1006                         }
1007                 }
1009                 return $text;
1010         }
1011         function _doLists_callback($matches) {
1012                 # Re-usable patterns to match list item bullets and number markers:
1013                 $marker_ul_re  = '[*+-]';
1014                 $marker_ol_re  = '\d+[.]';
1015                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1016                 
1017                 $list = $matches[1];
1018                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1019                 
1020                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1021                 
1022                 $list .= "\n";
1023                 $result = $this->processListItems($list, $marker_any_re);
1024                 
1025                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1026                 return "\n". $result ."\n\n";
1027         }
1029         var $list_level = 0;
1031         function processListItems($list_str, $marker_any_re) {
1032         #
1033         #       Process the contents of a single ordered or unordered list, splitting it
1034         #       into individual list items.
1035         #
1036                 # The $this->list_level global keeps track of when we're inside a list.
1037                 # Each time we enter a list, we increment it; when we leave a list,
1038                 # we decrement. If it's zero, we're not in a list anymore.
1039                 #
1040                 # We do this because when we're not inside a list, we want to treat
1041                 # something like this:
1042                 #
1043                 #               I recommend upgrading to version
1044                 #               8. Oops, now this line is treated
1045                 #               as a sub-list.
1046                 #
1047                 # As a single paragraph, despite the fact that the second line starts
1048                 # with a digit-period-space sequence.
1049                 #
1050                 # Whereas when we're inside a list (or sub-list), that line will be
1051                 # treated as the start of a sub-list. What a kludge, huh? This is
1052                 # an aspect of Markdown's syntax that's hard to parse perfectly
1053                 # without resorting to mind-reading. Perhaps the solution is to
1054                 # change the syntax rules such that sub-lists must start with a
1055                 # starting cardinal number; e.g. "1." or "a.".
1056                 
1057                 $this->list_level++;
1059                 # trim trailing blank lines:
1060                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1062                 $list_str = preg_replace_callback('{
1063                         (\n)?                                                   # leading line = $1
1064                         (^[ ]*)                                                 # leading whitespace = $2
1065                         ('.$marker_any_re.'                             # list marker and space = $3
1066                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1067                         )
1068                         ((?s:.*?))                                              # list item text   = $4
1069                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1070                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1071                         }xm',
1072                         array(&$this, '_processListItems_callback'), $list_str);
1074                 $this->list_level--;
1075                 return $list_str;
1076         }
1077         function _processListItems_callback($matches) {
1078                 $item = $matches[4];
1079                 $leading_line =& $matches[1];
1080                 $leading_space =& $matches[2];
1081                 $marker_space = $matches[3];
1082                 $tailing_blank_line =& $matches[5];
1084                 if ($leading_line || $tailing_blank_line || 
1085                         preg_match('/\n{2,}/', $item))
1086                 {
1087                         # Replace marker with the appropriate whitespace indentation
1088                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1089                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1090                 }
1091                 else {
1092                         # Recursion for sub-lists:
1093                         $item = $this->doLists($this->outdent($item));
1094                         $item = preg_replace('/\n+$/', '', $item);
1095                         $item = $this->runSpanGamut($item);
1096                 }
1098                 return "<li>" . $item . "</li>\n";
1099         }
1102         function doCodeBlocks($text) {
1103         #
1104         #       Process Markdown `<pre><code>` blocks.
1105         #
1106                 $text = preg_replace_callback('{
1107                                 (?:\n\n|\A\n?)
1108                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1109                                   (?>
1110                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1111                                         .*\n+
1112                                   )+
1113                                 )
1114                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1115                         }xm',
1116                         array(&$this, '_doCodeBlocks_callback'), $text);
1118                 return $text;
1119         }
1120         function _doCodeBlocks_callback($matches) {
1121                 $codeblock = $matches[1];
1123                 $codeblock = $this->outdent($codeblock);
1124                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1126                 # trim leading newlines and trailing newlines
1127                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1129                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1130                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1131         }
1134         function makeCodeSpan($code) {
1135         #
1136         # Create a code span markup for $code. Called from handleSpanToken.
1137         #
1138                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1139                 return $this->hashPart("<code>$code</code>");
1140         }
1143         var $em_relist = array(
1144                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',
1145                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
1146                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
1147                 );
1148         var $strong_relist = array(
1149                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',
1150                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
1151                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
1152                 );
1153         var $em_strong_relist = array(
1154                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',
1155                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
1156                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
1157                 );
1158         var $em_strong_prepared_relist;
1159         
1160         function prepareItalicsAndBold() {
1161         #
1162         # Prepare regular expressions for searching emphasis tokens in any
1163         # context.
1164         #
1165                 foreach ($this->em_relist as $em => $em_re) {
1166                         foreach ($this->strong_relist as $strong => $strong_re) {
1167                                 # Construct list of allowed token expressions.
1168                                 $token_relist = array();
1169                                 if (isset($this->em_strong_relist["$em$strong"])) {
1170                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1171                                 }
1172                                 $token_relist[] = $em_re;
1173                                 $token_relist[] = $strong_re;
1174                                 
1175                                 # Construct master expression from list.
1176                                 $token_re = '{('. implode('|', $token_relist) .')}';
1177                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1178                         }
1179                 }
1180         }
1181         
1182         function doItalicsAndBold($text) {
1183                 $token_stack = array('');
1184                 $text_stack = array('');
1185                 $em = '';
1186                 $strong = '';
1187                 $tree_char_em = false;
1188                 
1189                 while (1) {
1190                         #
1191                         # Get prepared regular expression for seraching emphasis tokens
1192                         # in current context.
1193                         #
1194                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1195                         
1196                         #
1197                         # Each loop iteration search for the next emphasis token. 
1198                         # Each token is then passed to handleSpanToken.
1199                         #
1200                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1201                         $text_stack[0] .= $parts[0];
1202                         $token =& $parts[1];
1203                         $text =& $parts[2];
1204                         
1205                         if (empty($token)) {
1206                                 # Reached end of text span: empty stack without emitting.
1207                                 # any more emphasis.
1208                                 while ($token_stack[0]) {
1209                                         $text_stack[1] .= array_shift($token_stack);
1210                                         $text_stack[0] .= array_shift($text_stack);
1211                                 }
1212                                 break;
1213                         }
1214                         
1215                         $token_len = strlen($token);
1216                         if ($tree_char_em) {
1217                                 # Reached closing marker while inside a three-char emphasis.
1218                                 if ($token_len == 3) {
1219                                         # Three-char closing marker, close em and strong.
1220                                         array_shift($token_stack);
1221                                         $span = array_shift($text_stack);
1222                                         $span = $this->runSpanGamut($span);
1223                                         $span = "<strong><em>$span</em></strong>";
1224                                         $text_stack[0] .= $this->hashPart($span);
1225                                         $em = '';
1226                                         $strong = '';
1227                                 } else {
1228                                         # Other closing marker: close one em or strong and
1229                                         # change current token state to match the other
1230                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1231                                         $tag = $token_len == 2 ? "strong" : "em";
1232                                         $span = $text_stack[0];
1233                                         $span = $this->runSpanGamut($span);
1234                                         $span = "<$tag>$span</$tag>";
1235                                         $text_stack[0] = $this->hashPart($span);
1236                                         $$tag = ''; # $$tag stands for $em or $strong
1237                                 }
1238                                 $tree_char_em = false;
1239                         } else if ($token_len == 3) {
1240                                 if ($em) {
1241                                         # Reached closing marker for both em and strong.
1242                                         # Closing strong marker:
1243                                         for ($i = 0; $i < 2; ++$i) {
1244                                                 $shifted_token = array_shift($token_stack);
1245                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1246                                                 $span = array_shift($text_stack);
1247                                                 $span = $this->runSpanGamut($span);
1248                                                 $span = "<$tag>$span</$tag>";
1249                                                 $text_stack[0] .= $this->hashPart($span);
1250                                                 $$tag = ''; # $$tag stands for $em or $strong
1251                                         }
1252                                 } else {
1253                                         # Reached opening three-char emphasis marker. Push on token 
1254                                         # stack; will be handled by the special condition above.
1255                                         $em = $token{0};
1256                                         $strong = "$em$em";
1257                                         array_unshift($token_stack, $token);
1258                                         array_unshift($text_stack, '');
1259                                         $tree_char_em = true;
1260                                 }
1261                         } else if ($token_len == 2) {
1262                                 if ($strong) {
1263                                         # Unwind any dangling emphasis marker:
1264                                         if (strlen($token_stack[0]) == 1) {
1265                                                 $text_stack[1] .= array_shift($token_stack);
1266                                                 $text_stack[0] .= array_shift($text_stack);
1267                                         }
1268                                         # Closing strong marker:
1269                                         array_shift($token_stack);
1270                                         $span = array_shift($text_stack);
1271                                         $span = $this->runSpanGamut($span);
1272                                         $span = "<strong>$span</strong>";
1273                                         $text_stack[0] .= $this->hashPart($span);
1274                                         $strong = '';
1275                                 } else {
1276                                         array_unshift($token_stack, $token);
1277                                         array_unshift($text_stack, '');
1278                                         $strong = $token;
1279                                 }
1280                         } else {
1281                                 # Here $token_len == 1
1282                                 if ($em) {
1283                                         if (strlen($token_stack[0]) == 1) {
1284                                                 # Closing emphasis marker:
1285                                                 array_shift($token_stack);
1286                                                 $span = array_shift($text_stack);
1287                                                 $span = $this->runSpanGamut($span);
1288                                                 $span = "<em>$span</em>";
1289                                                 $text_stack[0] .= $this->hashPart($span);
1290                                                 $em = '';
1291                                         } else {
1292                                                 $text_stack[0] .= $token;
1293                                         }
1294                                 } else {
1295                                         array_unshift($token_stack, $token);
1296                                         array_unshift($text_stack, '');
1297                                         $em = $token;
1298                                 }
1299                         }
1300                 }
1301                 return $text_stack[0];
1302         }
1305         function doBlockQuotes($text) {
1306                 $text = preg_replace_callback('/
1307                           (                                                             # Wrap whole match in $1
1308                                 (?>
1309                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1310                                         .+\n                                    # rest of the first line
1311                                   (.+\n)*                                       # subsequent consecutive lines
1312                                   \n*                                           # blanks
1313                                 )+
1314                           )
1315                         /xm',
1316                         array(&$this, '_doBlockQuotes_callback'), $text);
1318                 return $text;
1319         }
1320         function _doBlockQuotes_callback($matches) {
1321                 $bq = $matches[1];
1322                 # trim one level of quoting - trim whitespace-only lines
1323                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1324                 $bq = $this->runBlockGamut($bq);                # recurse
1326                 $bq = preg_replace('/^/m', "  ", $bq);
1327                 # These leading spaces cause problem with <pre> content, 
1328                 # so we need to fix that:
1329                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1330                         array(&$this, '_doBlockQuotes_callback2'), $bq);
1332                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1333         }
1334         function _doBlockQuotes_callback2($matches) {
1335                 $pre = $matches[1];
1336                 $pre = preg_replace('/^  /m', '', $pre);
1337                 return $pre;
1338         }
1341         function formParagraphs($text) {
1342         #
1343         #       Params:
1344         #               $text - string to process with html <p> tags
1345         #
1346                 # Strip leading and trailing lines:
1347                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1349                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1351                 #
1352                 # Wrap <p> tags and unhashify HTML blocks
1353                 #
1354                 foreach ($grafs as $key => $value) {
1355                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1356                                 # Is a paragraph.
1357                                 $value = $this->runSpanGamut($value);
1358                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1359                                 $value .= "</p>";
1360                                 $grafs[$key] = $this->unhash($value);
1361                         }
1362                         else {
1363                                 # Is a block.
1364                                 # Modify elements of @grafs in-place...
1365                                 $graf = $value;
1366                                 $block = $this->html_hashes[$graf];
1367                                 $graf = $block;
1368 //                              if (preg_match('{
1369 //                                      \A
1370 //                                      (                                                       # $1 = <div> tag
1371 //                                        <div  \s+
1372 //                                        [^>]*
1373 //                                        \b
1374 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1375 //                                        1
1376 //                                        \2
1377 //                                        [^>]*
1378 //                                        >
1379 //                                      )
1380 //                                      (                                                       # $3 = contents
1381 //                                      .*
1382 //                                      )
1383 //                                      (</div>)                                        # $4 = closing tag
1384 //                                      \z
1385 //                                      }xs', $block, $matches))
1386 //                              {
1387 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1388 //
1389 //                                      # We can't call Markdown(), because that resets the hash;
1390 //                                      # that initialization code should be pulled into its own sub, though.
1391 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1392 //                                      
1393 //                                      # Run document gamut methods on the content.
1394 //                                      foreach ($this->document_gamut as $method => $priority) {
1395 //                                              $div_content = $this->$method($div_content);
1396 //                                      }
1397 //
1398 //                                      $div_open = preg_replace(
1399 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1400 //
1401 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1402 //                              }
1403                                 $grafs[$key] = $graf;
1404                         }
1405                 }
1407                 return implode("\n\n", $grafs);
1408         }
1411         function encodeAttribute($text) {
1412         #
1413         # Encode text for a double-quoted HTML attribute. This function
1414         # is *not* suitable for attributes enclosed in single quotes.
1415         #
1416                 $text = $this->encodeAmpsAndAngles($text);
1417                 $text = str_replace('"', '&quot;', $text);
1418                 return $text;
1419         }
1420         
1421         
1422         function encodeAmpsAndAngles($text) {
1423         #
1424         # Smart processing for ampersands and angle brackets that need to 
1425         # be encoded. Valid character entities are left alone unless the
1426         # no-entities mode is set.
1427         #
1428                 if ($this->no_entities) {
1429                         $text = str_replace('&', '&amp;', $text);
1430                 } else {
1431                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1432                         # MT plugin: <http://bumppo.net/projects/amputator/>
1433                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1434                                                                 '&amp;', $text);;
1435                 }
1436                 # Encode remaining <'s
1437                 $text = str_replace('<', '&lt;', $text);
1439                 return $text;
1440         }
1443         function doAutoLinks($text) {
1444                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
1445                         array(&$this, '_doAutoLinks_url_callback'), $text);
1447                 # Email addresses: <address@domain.foo>
1448                 $text = preg_replace_callback('{
1449                         <
1450                         (?:mailto:)?
1451                         (
1452                                 (?:
1453                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1454                                 |
1455                                         ".*?"
1456                                 )
1457                                 \@
1458                                 (?:
1459                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1460                                 |
1461                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1462                                 )
1463                         )
1464                         >
1465                         }xi',
1466                         array(&$this, '_doAutoLinks_email_callback'), $text);
1468                 return $text;
1469         }
1470         function _doAutoLinks_url_callback($matches) {
1471                 $url = $this->encodeAttribute($matches[1]);
1472                 $link = "<a href=\"$url\">$url</a>";
1473                 return $this->hashPart($link);
1474         }
1475         function _doAutoLinks_email_callback($matches) {
1476                 $address = $matches[1];
1477                 $link = $this->encodeEmailAddress($address);
1478                 return $this->hashPart($link);
1479         }
1482         function encodeEmailAddress($addr) {
1483         #
1484         #       Input: an email address, e.g. "foo@example.com"
1485         #
1486         #       Output: the email address as a mailto link, with each character
1487         #               of the address encoded as either a decimal or hex entity, in
1488         #               the hopes of foiling most address harvesting spam bots. E.g.:
1489         #
1490         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1491         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1492         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1493         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1494         #
1495         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1496         #   With some optimizations by Milian Wolff.
1497         #
1498                 $addr = "mailto:" . $addr;
1499                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1500                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1501                 
1502                 foreach ($chars as $key => $char) {
1503                         $ord = ord($char);
1504                         # Ignore non-ascii chars.
1505                         if ($ord < 128) {
1506                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1507                                 # roughly 10% raw, 45% hex, 45% dec
1508                                 # '@' *must* be encoded. I insist.
1509                                 if ($r > 90 && $char != '@') /* do nothing */;
1510                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1511                                 else              $chars[$key] = '&#'.$ord.';';
1512                         }
1513                 }
1514                 
1515                 $addr = implode('', $chars);
1516                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1517                 $addr = "<a href=\"$addr\">$text</a>";
1519                 return $addr;
1520         }
1523         function parseSpan($str) {
1524         #
1525         # Take the string $str and parse it into tokens, hashing embeded HTML,
1526         # escaped characters and handling code spans.
1527         #
1528                 $output = '';
1529                 
1530                 $span_re = '{
1531                                 (
1532                                         \\\\'.$this->escape_chars_re.'
1533                                 |
1534                                         (?<![`\\\\])
1535                                         `+                                              # code span marker
1536                         '.( $this->no_markup ? '' : '
1537                                 |
1538                                         <!--    .*?     -->             # comment
1539                                 |
1540                                         <\?.*?\?> | <%.*?%>             # processing instruction
1541                                 |
1542                                         <[/!$]?[-a-zA-Z0-9:_]+  # regular tags
1543                                         (?>
1544                                                 \s
1545                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1546                                         )?
1547                                         >
1548                         ').'
1549                                 )
1550                                 }xs';
1552                 while (1) {
1553                         #
1554                         # Each loop iteration seach for either the next tag, the next 
1555                         # openning code span marker, or the next escaped character. 
1556                         # Each token is then passed to handleSpanToken.
1557                         #
1558                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1559                         
1560                         # Create token from text preceding tag.
1561                         if ($parts[0] != "") {
1562                                 $output .= $parts[0];
1563                         }
1564                         
1565                         # Check if we reach the end.
1566                         if (isset($parts[1])) {
1567                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1568                                 $str = $parts[2];
1569                         }
1570                         else {
1571                                 break;
1572                         }
1573                 }
1574                 
1575                 return $output;
1576         }
1577         
1578         
1579         function handleSpanToken($token, &$str) {
1580         #
1581         # Handle $token provided by parseSpan by determining its nature and 
1582         # returning the corresponding value that should replace it.
1583         #
1584                 switch ($token{0}) {
1585                         case "\\":
1586                                 return $this->hashPart("&#". ord($token{1}). ";");
1587                         case "`":
1588                                 # Search for end marker in remaining text.
1589                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1590                                         $str, $matches))
1591                                 {
1592                                         $str = $matches[2];
1593                                         $codespan = $this->makeCodeSpan($matches[1]);
1594                                         return $this->hashPart($codespan);
1595                                 }
1596                                 return $token; // return as text since no ending marker found.
1597                         default:
1598                                 return $this->hashPart($token);
1599                 }
1600         }
1603         function outdent($text) {
1604         #
1605         # Remove one level of line-leading tabs or spaces
1606         #
1607                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1608         }
1611         # String length function for detab. `_initDetab` will create a function to 
1612         # hanlde UTF-8 if the default function does not exist.
1613         var $utf8_strlen = 'mb_strlen';
1614         
1615         function detab($text) {
1616         #
1617         # Replace tabs with the appropriate amount of space.
1618         #
1619                 # For each line we separate the line in blocks delemited by
1620                 # tab characters. Then we reconstruct every line by adding the 
1621                 # appropriate number of space between each blocks.
1622                 
1623                 $text = preg_replace_callback('/^.*\t.*$/m',
1624                         array(&$this, '_detab_callback'), $text);
1626                 return $text;
1627         }
1628         function _detab_callback($matches) {
1629                 $line = $matches[0];
1630                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1631                 
1632                 # Split in blocks.
1633                 $blocks = explode("\t", $line);
1634                 # Add each blocks to the line.
1635                 $line = $blocks[0];
1636                 unset($blocks[0]); # Do not add first block twice.
1637                 foreach ($blocks as $block) {
1638                         # Calculate amount of space, insert spaces, insert block.
1639                         $amount = $this->tab_width - 
1640                                 $strlen($line, 'UTF-8') % $this->tab_width;
1641                         $line .= str_repeat(" ", $amount) . $block;
1642                 }
1643                 return $line;
1644         }
1645         function _initDetab() {
1646         #
1647         # Check for the availability of the function in the `utf8_strlen` property
1648         # (initially `mb_strlen`). If the function is not available, create a 
1649         # function that will loosely count the number of UTF-8 characters with a
1650         # regular expression.
1651         #
1652                 if (function_exists($this->utf8_strlen)) return;
1653                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1654                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1655                         $text, $m);');
1656         }
1659         function unhash($text) {
1660         #
1661         # Swap back in all the tags hashed by _HashHTMLBlocks.
1662         #
1663                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1664                         array(&$this, '_unhash_callback'), $text);
1665         }
1666         function _unhash_callback($matches) {
1667                 return $this->html_hashes[$matches[0]];
1668         }
1674 # Markdown Extra Parser Class
1677 class MarkdownExtra_Parser extends Markdown_Parser {
1679         # Prefix for footnote ids.
1680         var $fn_id_prefix = "";
1681         
1682         # Optional title attribute for footnote links and backlinks.
1683         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1684         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1685         
1686         # Optional class attribute for footnote links and backlinks.
1687         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1688         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1689         
1690         # Predefined abbreviations.
1691         var $predef_abbr = array();
1694         function MarkdownExtra_Parser() {
1695         #
1696         # Constructor function. Initialize the parser object.
1697         #
1698                 # Add extra escapable characters before parent constructor 
1699                 # initialize the table.
1700                 $this->escape_chars .= ':|';
1701                 
1702                 # Insert extra document, block, and span transformations. 
1703                 # Parent constructor will do the sorting.
1704                 $this->document_gamut += array(
1705                         "doFencedCodeBlocks" => 5,
1706                         "stripFootnotes"     => 15,
1707                         "stripAbbreviations" => 25,
1708                         "appendFootnotes"    => 50,
1709                         );
1710                 $this->block_gamut += array(
1711                         "doFencedCodeBlocks" => 5,
1712                         "doTables"           => 15,
1713                         "doDefLists"         => 45,
1714                         );
1715                 $this->span_gamut += array(
1716                         "doFootnotes"        => 5,
1717                         "doAbbreviations"    => 70,
1718                         );
1719                 
1720                 parent::Markdown_Parser();
1721         }
1722         
1723         
1724         # Extra variables used during extra transformations.
1725         var $footnotes = array();
1726         var $footnotes_ordered = array();
1727         var $abbr_desciptions = array();
1728         var $abbr_word_re = '';
1729         
1730         # Give the current footnote number.
1731         var $footnote_counter = 1;
1732         
1733         
1734         function setup() {
1735         #
1736         # Setting up Extra-specific variables.
1737         #
1738                 parent::setup();
1739                 
1740                 $this->footnotes = array();
1741                 $this->footnotes_ordered = array();
1742                 $this->abbr_desciptions = array();
1743                 $this->abbr_word_re = '';
1744                 $this->footnote_counter = 1;
1745                 
1746                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1747                         if ($this->abbr_word_re)
1748                                 $this->abbr_word_re .= '|';
1749                         $this->abbr_word_re .= preg_quote($abbr_word);
1750                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1751                 }
1752         }
1753         
1754         function teardown() {
1755         #
1756         # Clearing Extra-specific variables.
1757         #
1758                 $this->footnotes = array();
1759                 $this->footnotes_ordered = array();
1760                 $this->abbr_desciptions = array();
1761                 $this->abbr_word_re = '';
1762                 
1763                 parent::teardown();
1764         }
1765         
1766         
1767         ### HTML Block Parser ###
1768         
1769         # Tags that are always treated as block tags:
1770         var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1771         
1772         # Tags treated as block tags only if the opening tag is alone on it's line:
1773         var $context_block_tags_re = 'script|noscript|math|ins|del';
1774         
1775         # Tags where markdown="1" default to span mode:
1776         var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1777         
1778         # Tags which must not have their contents modified, no matter where 
1779         # they appear:
1780         var $clean_tags_re = 'script|math';
1781         
1782         # Tags that do not need to be closed.
1783         var $auto_close_tags_re = 'hr|img';
1784         
1786         function hashHTMLBlocks($text) {
1787         #
1788         # Hashify HTML Blocks and "clean tags".
1789         #
1790         # We only want to do this for block-level HTML tags, such as headers,
1791         # lists, and tables. That's because we still want to wrap <p>s around
1792         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1793         # phrase emphasis, and spans. The list of tags we're looking for is
1794         # hard-coded.
1795         #
1796         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1797         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
1798         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1799         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1800         # These two functions are calling each other. It's recursive!
1801         #
1802                 #
1803                 # Call the HTML-in-Markdown hasher.
1804                 #
1805                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1806                 
1807                 return $text;
1808         }
1809         function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 
1810                                                                                 $enclosing_tag_re = '', $span = false)
1811         {
1812         #
1813         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1814         #
1815         # *   $indent is the number of space to be ignored when checking for code 
1816         #     blocks. This is important because if we don't take the indent into 
1817         #     account, something like this (which looks right) won't work as expected:
1818         #
1819         #     <div>
1820         #         <div markdown="1">
1821         #         Hello World.  <-- Is this a Markdown code block or text?
1822         #         </div>  <-- Is this a Markdown code block or a real tag?
1823         #     <div>
1824         #
1825         #     If you don't like this, just don't indent the tag on which
1826         #     you apply the markdown="1" attribute.
1827         #
1828         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
1829         #     tag with that name. Nested tags supported.
1830         #
1831         # *   If $span is true, text inside must treated as span. So any double 
1832         #     newline will be replaced by a single newline so that it does not create 
1833         #     paragraphs.
1834         #
1835         # Returns an array of that form: ( processed text , remaining text )
1836         #
1837                 if ($text === '') return array('', '');
1839                 # Regex to check for the presense of newlines around a block tag.
1840                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
1841                 $newline_after_re = 
1842                         '{
1843                                 ^                                               # Start of text following the tag.
1844                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
1845                                 [ ]*\n                                  # Must be followed by newline.
1846                         }xs';
1847                 
1848                 # Regex to match any tag.
1849                 $block_tag_re =
1850                         '{
1851                                 (                                       # $2: Capture hole tag.
1852                                         </?                                     # Any opening or closing tag.
1853                                                 (?>                             # Tag name.
1854                                                         '.$this->block_tags_re.'                        |
1855                                                         '.$this->context_block_tags_re.'        |
1856                                                         '.$this->clean_tags_re.'                |
1857                                                         (?!\s)'.$enclosing_tag_re.'
1858                                                 )
1859                                                 (?:
1860                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1861                                                         (?>
1862                                                                 ".*?"           |       # Double quotes (can contain `>`)
1863                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
1864                                                                 .+?                             # Anything but quotes and `>`.
1865                                                         )*?
1866                                                 )?
1867                                         >                                       # End of tag.
1868                                 |
1869                                         <!--    .*?     -->     # HTML Comment
1870                                 |
1871                                         <\?.*?\?> | <%.*?%>     # Processing instruction
1872                                 |
1873                                         <!\[CDATA\[.*?\]\]>     # CData Block
1874                                 |
1875                                         # Code span marker
1876                                         `+
1877                                 '. ( !$span ? ' # If not in span.
1878                                 |
1879                                         # Indented code block
1880                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
1881                                         [ ]{'.($indent+4).'}[^\n]* \n
1882                                         (?>
1883                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1884                                         )*
1885                                 |
1886                                         # Fenced code block marker
1887                                         (?> ^ | \n )
1888                                         [ ]{'.($indent).'}~~~+[ ]*\n
1889                                 ' : '' ). ' # End (if not is span).
1890                                 )
1891                         }xs';
1893                 
1894                 $depth = 0;             # Current depth inside the tag tree.
1895                 $parsed = "";   # Parsed text that will be returned.
1897                 #
1898                 # Loop through every tag until we find the closing tag of the parent
1899                 # or loop until reaching the end of text if no parent tag specified.
1900                 #
1901                 do {
1902                         #
1903                         # Split the text using the first $tag_match pattern found.
1904                         # Text before  pattern will be first in the array, text after
1905                         # pattern will be at the end, and between will be any catches made 
1906                         # by the pattern.
1907                         #
1908                         $parts = preg_split($block_tag_re, $text, 2, 
1909                                                                 PREG_SPLIT_DELIM_CAPTURE);
1910                         
1911                         # If in Markdown span mode, add a empty-string span-level hash 
1912                         # after each newline to prevent triggering any block element.
1913                         if ($span) {
1914                                 $void = $this->hashPart("", ':');
1915                                 $newline = "$void\n";
1916                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1917                         }
1918                         
1919                         $parsed .= $parts[0]; # Text before current tag.
1920                         
1921                         # If end of $text has been reached. Stop loop.
1922                         if (count($parts) < 3) {
1923                                 $text = "";
1924                                 break;
1925                         }
1926                         
1927                         $tag  = $parts[1]; # Tag to handle.
1928                         $text = $parts[2]; # Remaining text after current tag.
1929                         $tag_re = preg_quote($tag); # For use in a regular expression.
1930                         
1931                         #
1932                         # Check for: Code span marker
1933                         #
1934                         if ($tag{0} == "`") {
1935                                 # Find corresponding end marker.
1936                                 $tag_re = preg_quote($tag);
1937                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1938                                         $text, $matches))
1939                                 {
1940                                         # End marker found: pass text unchanged until marker.
1941                                         $parsed .= $tag . $matches[0];
1942                                         $text = substr($text, strlen($matches[0]));
1943                                 }
1944                                 else {
1945                                         # Unmatched marker: just skip it.
1946                                         $parsed .= $tag;
1947                                 }
1948                         }
1949                         #
1950                         # Check for: Indented code block.
1951                         #
1952                         else if ($tag{0} == "\n" || $tag{0} == " ") {
1953                                 # Indented code block: pass it unchanged, will be handled 
1954                                 # later.
1955                                 $parsed .= $tag;
1956                         }
1957                         #
1958                         # Check for: Fenced code block marker.
1959                         #
1960                         else if ($tag{0} == "~") {
1961                                 # Fenced code block marker: find matching end marker.
1962                                 $tag_re = preg_quote(trim($tag));
1963                                 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, 
1964                                         $matches)) 
1965                                 {
1966                                         # End marker found: pass text unchanged until marker.
1967                                         $parsed .= $tag . $matches[0];
1968                                         $text = substr($text, strlen($matches[0]));
1969                                 }
1970                                 else {
1971                                         # No end marker: just skip it.
1972                                         $parsed .= $tag;
1973                                 }
1974                         }
1975                         #
1976                         # Check for: Opening Block level tag or
1977                         #            Opening Context Block tag (like ins and del) 
1978                         #               used as a block tag (tag is alone on it's line).
1979                         #
1980                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1981                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1982                                         preg_match($newline_before_re, $parsed) &&
1983                                         preg_match($newline_after_re, $text)    )
1984                                 )
1985                         {
1986                                 # Need to parse tag and following text using the HTML parser.
1987                                 list($block_text, $text) = 
1988                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1989                                 
1990                                 # Make sure it stays outside of any paragraph by adding newlines.
1991                                 $parsed .= "\n\n$block_text\n\n";
1992                         }
1993                         #
1994                         # Check for: Clean tag (like script, math)
1995                         #            HTML Comments, processing instructions.
1996                         #
1997                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1998                                 $tag{1} == '!' || $tag{1} == '?')
1999                         {
2000                                 # Need to parse tag and following text using the HTML parser.
2001                                 # (don't check for markdown attribute)
2002                                 list($block_text, $text) = 
2003                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
2004                                 
2005                                 $parsed .= $block_text;
2006                         }
2007                         #
2008                         # Check for: Tag with same name as enclosing tag.
2009                         #
2010                         else if ($enclosing_tag_re !== '' &&
2011                                 # Same name as enclosing tag.
2012                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
2013                         {
2014                                 #
2015                                 # Increase/decrease nested tag count.
2016                                 #
2017                                 if ($tag{1} == '/')                                             $depth--;
2018                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
2020                                 if ($depth < 0) {
2021                                         #
2022                                         # Going out of parent element. Clean up and break so we
2023                                         # return to the calling function.
2024                                         #
2025                                         $text = $tag . $text;
2026                                         break;
2027                                 }
2028                                 
2029                                 $parsed .= $tag;
2030                         }
2031                         else {
2032                                 $parsed .= $tag;
2033                         }
2034                 } while ($depth >= 0);
2035                 
2036                 return array($parsed, $text);
2037         }
2038         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2039         #
2040         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2041         #
2042         # *   Calls $hash_method to convert any blocks.
2043         # *   Stops when the first opening tag closes.
2044         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2045         #     (it is not inside clean tags)
2046         #
2047         # Returns an array of that form: ( processed text , remaining text )
2048         #
2049                 if ($text === '') return array('', '');
2050                 
2051                 # Regex to match `markdown` attribute inside of a tag.
2052                 $markdown_attr_re = '
2053                         {
2054                                 \s*                     # Eat whitespace before the `markdown` attribute
2055                                 markdown
2056                                 \s*=\s*
2057                                 (?>
2058                                         (["\'])         # $1: quote delimiter           
2059                                         (.*?)           # $2: attribute value
2060                                         \1                      # matching delimiter    
2061                                 |
2062                                         ([^\s>]*)       # $3: unquoted attribute value
2063                                 )
2064                                 ()                              # $4: make $3 always defined (avoid warnings)
2065                         }xs';
2066                 
2067                 # Regex to match any tag.
2068                 $tag_re = '{
2069                                 (                                       # $2: Capture hole tag.
2070                                         </?                                     # Any opening or closing tag.
2071                                                 [\w:$]+                 # Tag name.
2072                                                 (?:
2073                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2074                                                         (?>
2075                                                                 ".*?"           |       # Double quotes (can contain `>`)
2076                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
2077                                                                 .+?                             # Anything but quotes and `>`.
2078                                                         )*?
2079                                                 )?
2080                                         >                                       # End of tag.
2081                                 |
2082                                         <!--    .*?     -->     # HTML Comment
2083                                 |
2084                                         <\?.*?\?> | <%.*?%>     # Processing instruction
2085                                 |
2086                                         <!\[CDATA\[.*?\]\]>     # CData Block
2087                                 )
2088                         }xs';
2089                 
2090                 $original_text = $text;         # Save original text in case of faliure.
2091                 
2092                 $depth          = 0;    # Current depth inside the tag tree.
2093                 $block_text     = "";   # Temporary text holder for current text.
2094                 $parsed         = "";   # Parsed text that will be returned.
2096                 #
2097                 # Get the name of the starting tag.
2098                 # (This pattern makes $base_tag_name_re safe without quoting.)
2099                 #
2100                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2101                         $base_tag_name_re = $matches[1];
2103                 #
2104                 # Loop through every tag until we find the corresponding closing tag.
2105                 #
2106                 do {
2107                         #
2108                         # Split the text using the first $tag_match pattern found.
2109                         # Text before  pattern will be first in the array, text after
2110                         # pattern will be at the end, and between will be any catches made 
2111                         # by the pattern.
2112                         #
2113                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2114                         
2115                         if (count($parts) < 3) {
2116                                 #
2117                                 # End of $text reached with unbalenced tag(s).
2118                                 # In that case, we return original text unchanged and pass the
2119                                 # first character as filtered to prevent an infinite loop in the 
2120                                 # parent function.
2121                                 #
2122                                 return array($original_text{0}, substr($original_text, 1));
2123                         }
2124                         
2125                         $block_text .= $parts[0]; # Text before current tag.
2126                         $tag         = $parts[1]; # Tag to handle.
2127                         $text        = $parts[2]; # Remaining text after current tag.
2128                         
2129                         #
2130                         # Check for: Auto-close tag (like <hr/>)
2131                         #                        Comments and Processing Instructions.
2132                         #
2133                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2134                                 $tag{1} == '!' || $tag{1} == '?')
2135                         {
2136                                 # Just add the tag to the block as if it was text.
2137                                 $block_text .= $tag;
2138                         }
2139                         else {
2140                                 #
2141                                 # Increase/decrease nested tag count. Only do so if
2142                                 # the tag's name match base tag's.
2143                                 #
2144                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2145                                         if ($tag{1} == '/')                                             $depth--;
2146                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
2147                                 }
2148                                 
2149                                 #
2150                                 # Check for `markdown="1"` attribute and handle it.
2151                                 #
2152                                 if ($md_attr && 
2153                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
2154                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2155                                 {
2156                                         # Remove `markdown` attribute from opening tag.
2157                                         $tag = preg_replace($markdown_attr_re, '', $tag);
2158                                         
2159                                         # Check if text inside this tag must be parsed in span mode.
2160                                         $this->mode = $attr_m[2] . $attr_m[3];
2161                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2162                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2163                                         
2164                                         # Calculate indent before tag.
2165                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2166                                                 $strlen = $this->utf8_strlen;
2167                                                 $indent = $strlen($matches[1], 'UTF-8');
2168                                         } else {
2169                                                 $indent = 0;
2170                                         }
2171                                         
2172                                         # End preceding block with this tag.
2173                                         $block_text .= $tag;
2174                                         $parsed .= $this->$hash_method($block_text);
2175                                         
2176                                         # Get enclosing tag name for the ParseMarkdown function.
2177                                         # (This pattern makes $tag_name_re safe without quoting.)
2178                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2179                                         $tag_name_re = $matches[1];
2180                                         
2181                                         # Parse the content using the HTML-in-Markdown parser.
2182                                         list ($block_text, $text)
2183                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
2184                                                         $tag_name_re, $span_mode);
2185                                         
2186                                         # Outdent markdown text.
2187                                         if ($indent > 0) {
2188                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
2189                                                                                                         $block_text);
2190                                         }
2191                                         
2192                                         # Append tag content to parsed text.
2193                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
2194                                         else                            $parsed .= "$block_text";
2195                                         
2196                                         # Start over a new block.
2197                                         $block_text = "";
2198                                 }
2199                                 else $block_text .= $tag;
2200                         }
2201                         
2202                 } while ($depth > 0);
2203                 
2204                 #
2205                 # Hash last block text that wasn't processed inside the loop.
2206                 #
2207                 $parsed .= $this->$hash_method($block_text);
2208                 
2209                 return array($parsed, $text);
2210         }
2213         function hashClean($text) {
2214         #
2215         # Called whenever a tag must be hashed when a function insert a "clean" tag
2216         # in $text, it pass through this function and is automaticaly escaped, 
2217         # blocking invalid nested overlap.
2218         #
2219                 return $this->hashPart($text, 'C');
2220         }
2223         function doHeaders($text) {
2224         #
2225         # Redefined to add id attribute support.
2226         #
2227                 # Setext-style headers:
2228                 #         Header 1  {#header1}
2229                 #         ========
2230                 #  
2231                 #         Header 2  {#header2}
2232                 #         --------
2233                 #
2234                 $text = preg_replace_callback(
2235                         '{
2236                                 (^.+?)                                                          # $1: Header text
2237                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?        # $2: Id attribute
2238                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
2239                         }mx',
2240                         array(&$this, '_doHeaders_callback_setext'), $text);
2242                 # atx-style headers:
2243                 #       # Header 1        {#header1}
2244                 #       ## Header 2       {#header2}
2245                 #       ## Header 2 with closing hashes ##  {#header3}
2246                 #       ...
2247                 #       ###### Header 6   {#header2}
2248                 #
2249                 $text = preg_replace_callback('{
2250                                 ^(\#{1,6})      # $1 = string of #\'s
2251                                 [ ]*
2252                                 (.+?)           # $2 = Header text
2253                                 [ ]*
2254                                 \#*                     # optional closing #\'s (not counted)
2255                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2256                                 [ ]*
2257                                 \n+
2258                         }xm',
2259                         array(&$this, '_doHeaders_callback_atx'), $text);
2261                 return $text;
2262         }
2263         function _doHeaders_attr($attr) {
2264                 if (empty($attr))  return "";
2265                 return " id=\"$attr\"";
2266         }
2267         function _doHeaders_callback_setext($matches) {
2268                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2269                         return $matches[0];
2270                 $level = $matches[3]{0} == '=' ? 1 : 2;
2271                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2272                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2273                 return "\n" . $this->hashBlock($block) . "\n\n";
2274         }
2275         function _doHeaders_callback_atx($matches) {
2276                 $level = strlen($matches[1]);
2277                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2278                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2279                 return "\n" . $this->hashBlock($block) . "\n\n";
2280         }
2283         function doTables($text) {
2284         #
2285         # Form HTML tables.
2286         #
2287                 $less_than_tab = $this->tab_width - 1;
2288                 #
2289                 # Find tables with leading pipe.
2290                 #
2291                 #       | Header 1 | Header 2
2292                 #       | -------- | --------
2293                 #       | Cell 1   | Cell 2
2294                 #       | Cell 3   | Cell 4
2295                 #
2296                 $text = preg_replace_callback('
2297                         {
2298                                 ^                                                       # Start of a line
2299                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2300                                 [|]                                                     # Optional leading pipe (present)
2301                                 (.+) \n                                         # $1: Header row (at least one pipe)
2302                                 
2303                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2304                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
2305                                 
2306                                 (                                                       # $3: Cells
2307                                         (?>
2308                                                 [ ]*                            # Allowed whitespace.
2309                                                 [|] .* \n                       # Row content.
2310                                         )*
2311                                 )
2312                                 (?=\n|\Z)                                       # Stop at final double newline.
2313                         }xm',
2314                         array(&$this, '_doTable_leadingPipe_callback'), $text);
2315                 
2316                 #
2317                 # Find tables without leading pipe.
2318                 #
2319                 #       Header 1 | Header 2
2320                 #       -------- | --------
2321                 #       Cell 1   | Cell 2
2322                 #       Cell 3   | Cell 4
2323                 #
2324                 $text = preg_replace_callback('
2325                         {
2326                                 ^                                                       # Start of a line
2327                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2328                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
2329                                 
2330                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
2331                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
2332                                 
2333                                 (                                                       # $3: Cells
2334                                         (?>
2335                                                 .* [|] .* \n            # Row content
2336                                         )*
2337                                 )
2338                                 (?=\n|\Z)                                       # Stop at final double newline.
2339                         }xm',
2340                         array(&$this, '_DoTable_callback'), $text);
2342                 return $text;
2343         }
2344         function _doTable_leadingPipe_callback($matches) {
2345                 $head           = $matches[1];
2346                 $underline      = $matches[2];
2347                 $content        = $matches[3];
2348                 
2349                 # Remove leading pipe for each row.
2350                 $content        = preg_replace('/^ *[|]/m', '', $content);
2351                 
2352                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2353         }
2354         function _doTable_callback($matches) {
2355                 $head           = $matches[1];
2356                 $underline      = $matches[2];
2357                 $content        = $matches[3];
2359                 # Remove any tailing pipes for each line.
2360                 $head           = preg_replace('/[|] *$/m', '', $head);
2361                 $underline      = preg_replace('/[|] *$/m', '', $underline);
2362                 $content        = preg_replace('/[|] *$/m', '', $content);
2363                 
2364                 # Reading alignement from header underline.
2365                 $separators     = preg_split('/ *[|] */', $underline);
2366                 foreach ($separators as $n => $s) {
2367                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
2368                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2369                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2370                         else                                                                    $attr[$n] = '';
2371                 }
2372                 
2373                 # Parsing span elements, including code spans, character escapes, 
2374                 # and inline HTML tags, so that pipes inside those gets ignored.
2375                 $head           = $this->parseSpan($head);
2376                 $headers        = preg_split('/ *[|] */', $head);
2377                 $col_count      = count($headers);
2378                 
2379                 # Write column headers.
2380                 $text = "<table>\n";
2381                 $text .= "<thead>\n";
2382                 $text .= "<tr>\n";
2383                 foreach ($headers as $n => $header)
2384                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2385                 $text .= "</tr>\n";
2386                 $text .= "</thead>\n";
2387                 
2388                 # Split content by row.
2389                 $rows = explode("\n", trim($content, "\n"));
2390                 
2391                 $text .= "<tbody>\n";
2392                 foreach ($rows as $row) {
2393                         # Parsing span elements, including code spans, character escapes, 
2394                         # and inline HTML tags, so that pipes inside those gets ignored.
2395                         $row = $this->parseSpan($row);
2396                         
2397                         # Split row by cell.
2398                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
2399                         $row_cells = array_pad($row_cells, $col_count, '');
2400                         
2401                         $text .= "<tr>\n";
2402                         foreach ($row_cells as $n => $cell)
2403                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2404                         $text .= "</tr>\n";
2405                 }
2406                 $text .= "</tbody>\n";
2407                 $text .= "</table>";
2408                 
2409                 return $this->hashBlock($text) . "\n";
2410         }
2412         
2413         function doDefLists($text) {
2414         #
2415         # Form HTML definition lists.
2416         #
2417                 $less_than_tab = $this->tab_width - 1;
2419                 # Re-usable pattern to match any entire dl list:
2420                 $whole_list_re = '(?>
2421                         (                                                               # $1 = whole list
2422                           (                                                             # $2
2423                                 [ ]{0,'.$less_than_tab.'}
2424                                 ((?>.*\S.*\n)+)                         # $3 = defined term
2425                                 \n?
2426                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2427                           )
2428                           (?s:.+?)
2429                           (                                                             # $4
2430                                   \z
2431                                 |
2432                                   \n{2,}
2433                                   (?=\S)
2434                                   (?!                                           # Negative lookahead for another term
2435                                         [ ]{0,'.$less_than_tab.'}
2436                                         (?: \S.*\n )+?                  # defined term
2437                                         \n?
2438                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2439                                   )
2440                                   (?!                                           # Negative lookahead for another definition
2441                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2442                                   )
2443                           )
2444                         )
2445                 )'; // mx
2447                 $text = preg_replace_callback('{
2448                                 (?>\A\n?|(?<=\n\n))
2449                                 '.$whole_list_re.'
2450                         }mx',
2451                         array(&$this, '_doDefLists_callback'), $text);
2453                 return $text;
2454         }
2455         function _doDefLists_callback($matches) {
2456                 # Re-usable patterns to match list item bullets and number markers:
2457                 $list = $matches[1];
2458                 
2459                 # Turn double returns into triple returns, so that we can make a
2460                 # paragraph for the last item in a list, if necessary:
2461                 $result = trim($this->processDefListItems($list));
2462                 $result = "<dl>\n" . $result . "\n</dl>";
2463                 return $this->hashBlock($result) . "\n\n";
2464         }
2467         function processDefListItems($list_str) {
2468         #
2469         #       Process the contents of a single definition list, splitting it
2470         #       into individual term and definition list items.
2471         #
2472                 $less_than_tab = $this->tab_width - 1;
2473                 
2474                 # trim trailing blank lines:
2475                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2477                 # Process definition terms.
2478                 $list_str = preg_replace_callback('{
2479                         (?>\A\n?|\n\n+)                                 # leading line
2480                         (                                                               # definition terms = $1
2481                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
2482                                 (?![:][ ]|[ ])                          # negative lookahead for a definition 
2483                                                                                         #   mark (colon) or more whitespace.
2484                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
2485                         )                       
2486                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
2487                                                                                         #   with a definition mark.
2488                         }xm',
2489                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
2491                 # Process actual definitions.
2492                 $list_str = preg_replace_callback('{
2493                         \n(\n+)?                                                # leading line = $1
2494                         (                                                               # marker space = $2
2495                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
2496                                 [:][ ]+                                         # definition mark (colon)
2497                         )
2498                         ((?s:.+?))                                              # definition text = $3
2499                         (?= \n+                                                 # stop at next definition mark,
2500                                 (?:                                                     # next term or end of text
2501                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
2502                                         <dt> | \z
2503                                 )                                               
2504                         )                                       
2505                         }xm',
2506                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
2508                 return $list_str;
2509         }
2510         function _processDefListItems_callback_dt($matches) {
2511                 $terms = explode("\n", trim($matches[1]));
2512                 $text = '';
2513                 foreach ($terms as $term) {
2514                         $term = $this->runSpanGamut(trim($term));
2515                         $text .= "\n<dt>" . $term . "</dt>";
2516                 }
2517                 return $text . "\n";
2518         }
2519         function _processDefListItems_callback_dd($matches) {
2520                 $leading_line   = $matches[1];
2521                 $marker_space   = $matches[2];
2522                 $def                    = $matches[3];
2524                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
2525                         # Replace marker with the appropriate whitespace indentation
2526                         $def = str_repeat(' ', strlen($marker_space)) . $def;
2527                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2528                         $def = "\n". $def ."\n";
2529                 }
2530                 else {
2531                         $def = rtrim($def);
2532                         $def = $this->runSpanGamut($this->outdent($def));
2533                 }
2535                 return "\n<dd>" . $def . "</dd>\n";
2536         }
2539         function doFencedCodeBlocks($text) {
2540         #
2541         # Adding the fenced code block syntax to regular Markdown:
2542         #
2543         # ~~~
2544         # Code block
2545         # ~~~
2546         #
2547                 $less_than_tab = $this->tab_width;
2548                 
2549                 $text = preg_replace_callback('{
2550                                 (?:\n|\A)
2551                                 # 1: Opening marker
2552                                 (
2553                                         ~{3,} # Marker: three tilde or more.
2554                                 )
2555                                 [ ]* \n # Whitespace and newline following marker.
2556                                 
2557                                 # 2: Content
2558                                 (
2559                                         (?>
2560                                                 (?!\1 [ ]* \n)  # Not a closing marker.
2561                                                 .*\n+
2562                                         )+
2563                                 )
2564                                 
2565                                 # Closing marker.
2566                                 \1 [ ]* \n
2567                         }xm',
2568                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
2570                 return $text;
2571         }
2572         function _doFencedCodeBlocks_callback($matches) {
2573                 $codeblock = $matches[2];
2574                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2575                 $codeblock = preg_replace_callback('/^\n+/',
2576                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2577                 $codeblock = "<pre><code>$codeblock</code></pre>";
2578                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
2579         }
2580         function _doFencedCodeBlocks_newlines($matches) {
2581                 return str_repeat("<br$this->empty_element_suffix", 
2582                         strlen($matches[0]));
2583         }
2586         #
2587         # Redefining emphasis markers so that emphasis by underscore does not
2588         # work in the middle of a word.
2589         #
2590         var $em_relist = array(
2591                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)',
2592                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
2593                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
2594                 );
2595         var $strong_relist = array(
2596                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)',
2597                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
2598                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
2599                 );
2600         var $em_strong_relist = array(
2601                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)',
2602                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
2603                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
2604                 );
2607         function formParagraphs($text) {
2608         #
2609         #       Params:
2610         #               $text - string to process with html <p> tags
2611         #
2612                 # Strip leading and trailing lines:
2613                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2614                 
2615                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2617                 #
2618                 # Wrap <p> tags and unhashify HTML blocks
2619                 #
2620                 foreach ($grafs as $key => $value) {
2621                         $value = trim($this->runSpanGamut($value));
2622                         
2623                         # Check if this should be enclosed in a paragraph.
2624                         # Clean tag hashes & block tag hashes are left alone.
2625                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2626                         
2627                         if ($is_p) {
2628                                 $value = "<p>$value</p>";
2629                         }
2630                         $grafs[$key] = $value;
2631                 }
2632                 
2633                 # Join grafs in one text, then unhash HTML tags. 
2634                 $text = implode("\n\n", $grafs);
2635                 
2636                 # Finish by removing any tag hashes still present in $text.
2637                 $text = $this->unhash($text);
2638                 
2639                 return $text;
2640         }
2641         
2642         
2643         ### Footnotes
2644         
2645         function stripFootnotes($text) {
2646         #
2647         # Strips link definitions from text, stores the URLs and titles in
2648         # hash references.
2649         #
2650                 $less_than_tab = $this->tab_width - 1;
2652                 # Link defs are in the form: [^id]: url "optional title"
2653                 $text = preg_replace_callback('{
2654                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
2655                           [ ]*
2656                           \n?                                   # maybe *one* newline
2657                         (                                               # text = $2 (no blank lines allowed)
2658                                 (?:                                     
2659                                         .+                              # actual text
2660                                 |
2661                                         \n                              # newlines but 
2662                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2663                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
2664                                                                         # by non-indented content
2665                                 )*
2666                         )               
2667                         }xm',
2668                         array(&$this, '_stripFootnotes_callback'),
2669                         $text);
2670                 return $text;
2671         }
2672         function _stripFootnotes_callback($matches) {
2673                 $note_id = $this->fn_id_prefix . $matches[1];
2674                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
2675                 return ''; # String that will replace the block
2676         }
2679         function doFootnotes($text) {
2680         #
2681         # Replace footnote references in $text [^id] with a special text-token 
2682         # which will be replaced by the actual footnote marker in appendFootnotes.
2683         #
2684                 if (!$this->in_anchor) {
2685                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2686                 }
2687                 return $text;
2688         }
2690         
2691         function appendFootnotes($text) {
2692         #
2693         # Append footnote list to text.
2694         #
2695                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2696                         array(&$this, '_appendFootnotes_callback'), $text);
2697         
2698                 if (!empty($this->footnotes_ordered)) {
2699                         $text .= "\n\n";
2700                         $text .= "<div class=\"footnotes\">\n";
2701                         $text .= "<hr". $this->empty_element_suffix ."\n";
2702                         $text .= "<ol>\n\n";
2703                         
2704                         $attr = " rev=\"footnote\"";
2705                         if ($this->fn_backlink_class != "") {
2706                                 $class = $this->fn_backlink_class;
2707                                 $class = $this->encodeAttribute($class);
2708                                 $attr .= " class=\"$class\"";
2709                         }
2710                         if ($this->fn_backlink_title != "") {
2711                                 $title = $this->fn_backlink_title;
2712                                 $title = $this->encodeAttribute($title);
2713                                 $attr .= " title=\"$title\"";
2714                         }
2715                         $num = 0;
2716                         
2717                         while (!empty($this->footnotes_ordered)) {
2718                                 $footnote = reset($this->footnotes_ordered);
2719                                 $note_id = key($this->footnotes_ordered);
2720                                 unset($this->footnotes_ordered[$note_id]);
2721                                 
2722                                 $footnote .= "\n"; # Need to append newline before parsing.
2723                                 $footnote = $this->runBlockGamut("$footnote\n");                                
2724                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
2725                                         array(&$this, '_appendFootnotes_callback'), $footnote);
2726                                 
2727                                 $attr = str_replace("%%", ++$num, $attr);
2728                                 $note_id = $this->encodeAttribute($note_id);
2729                                 
2730                                 # Add backlink to last paragraph; create new paragraph if needed.
2731                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2732                                 if (preg_match('{</p>$}', $footnote)) {
2733                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2734                                 } else {
2735                                         $footnote .= "\n\n<p>$backlink</p>";
2736                                 }
2737                                 
2738                                 $text .= "<li id=\"fn:$note_id\">\n";
2739                                 $text .= $footnote . "\n";
2740                                 $text .= "</li>\n\n";
2741                         }
2742                         
2743                         $text .= "</ol>\n";
2744                         $text .= "</div>";
2745                 }
2746                 return $text;
2747         }
2748         function _appendFootnotes_callback($matches) {
2749                 $node_id = $this->fn_id_prefix . $matches[1];
2750                 
2751                 # Create footnote marker only if it has a corresponding footnote *and*
2752                 # the footnote hasn't been used by another marker.
2753                 if (isset($this->footnotes[$node_id])) {
2754                         # Transfert footnote content to the ordered list.
2755                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];